hadoop:linux下读取集群上的文件

package com.wh.util;


import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;


import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.yarn.proto.YarnServerCommonServiceProtos.SystemCredentialsForAppsProtoOrBuilder;


public class HDFSUtil{
static Configuration hadoopConf =new Configuration();
public static byte[] readFromFileToByteArray(String srcFile) throws Exception{
if(srcFile==null||srcFile.trim().length()==0){
throw new Exception("空");
}
//集群对象引用
FileSystem fs=FileSystem.get(hadoopConf);
//路径对象
Path hdfspath=new Path(srcFile);
FSDataInputStream hdfsinstream = fs.open(hdfspath);
//初始化数组,流
byte[] byteArray=new byte[65535];
ByteArrayOutputStream bos=new ByteArrayOutputStream();

//读
int readLen=0;
while((readLen=hdfsinstream.read(byteArray))>0){
bos.write(byteArray);
//清空
byteArray=new byte[65535];
}
hdfsinstream.close();
return bos.toByteArray();
}
public static String readFromFile(String srcFile) throws Exception{
if(srcFile==null||srcFile.trim().length()==0){
throw new Exception("空");
}
byte[] byteArray=readFromFileToByteArray(srcFile);
if(byteArray==null||byteArray.length==0){
return null;
}
return new String(byteArray,"utf-8");

}

public static void main(String [] args){

System.out.print(readFromFile("路径"));

}

}

写完后,

打包

打包成功


打包结束后,在工程下的target中找到jar文件



把它copy到桌面上,用rz命令传到linux系统里,用yarn jar TlHadoopCore-jar-with-dependencies.jar com.wh.util.HDFSUtil命令,执行该文件,就可以读相应路径下的文件了。

对该命令的解释:yarn jar jar文件的名 要执行的类所处的包 类名。










猜你喜欢

转载自blog.csdn.net/mrs_wuho_o/article/details/79223025