HDFS-文件读取API

package com.zhen.hdfs;

import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.net.URISyntaxException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;

/**
 * @author FengZhen
 * @date 2018年8月12日
 * 
 */
public class FileSystemReadAPI {

	/**
	 * FileSystem实例有几个静态工厂方法
	 * public static FileSystem get(Configuration conf) throws IOException
	 * 	return get(getDefaultUri(conf), conf);
	 * 	返回的是默认文件系统(在 conf/core-site.xml中指定的，如果没有指定，则使用默认的本地文件系统)
	 * public static FileSystem get(URI uri, Configuration conf) throws IOException
	 * 	通过给定的URI方案和权限来确定要使用的文件系统，如果给定URI中没有指定方案，则返回默认文件系统
	 * public static FileSystem get(final URI uri, final Configuration conf, final String user) throws IOException, InterruptedException
	 * 	作为给定用户来访问文件系统，对安全来说是至关重要的
	 * 
	 * 在某些情况下，可能希望获取本地文件系统的运行实例，此时可以使用getLocal()方法
	 */
	public static void main(String[] args) {
		String uri = "hdfs://fz/user/hdfs/MapReduce/data/test.txt";
		String uri1 = "hdfs://fz/user/hdfs/MapReduce/data/test1.txt";
		String user = "hadoop";
		//getByUri(uri);
		//getByUriAndUser(uri, user);
		seekDoubleCat(uri1);
	}
	
	public static void getByUri(String uri) {
		Configuration conf = new Configuration();
		InputStream inputStream = null;
		try {
			FileSystem fileSystem = FileSystem.get(new URI(uri), conf);
			inputStream = fileSystem.open(new Path(uri));
			IOUtils.copyBytes(inputStream, System.out, 4096, false);
		} catch (IOException e) {
			e.printStackTrace();
		} catch (URISyntaxException e) {
			e.printStackTrace();
		} finally {
			IOUtils.closeStream(inputStream);
		}
	}
	
	public static void getByUriAndUser(String uri, String user) {
		Configuration conf = new Configuration();
		InputStream inputStream = null;
		try {
			FileSystem fileSystem = FileSystem.get(new URI(uri), conf, user);
			inputStream = fileSystem.open(new Path(uri));
			IOUtils.copyBytes(inputStream, System.out, 4096, false);
		} catch (IOException e) {
			e.printStackTrace();
		} catch (URISyntaxException e) {
			e.printStackTrace();
		} catch (InterruptedException e) {
			e.printStackTrace();
		} finally {
			IOUtils.closeStream(inputStream);
		}
	}
	
	/**
	 * 实际上，FileSystem对象中的open方法返回的是FSDataInputStream对象，而不是标准的java.io类对象。这个类是继承了java.io.DataInputStream接口的一个特殊类，并支持随机访问，由此可以从流的任意位置读取数据
	 * public class FSDataInputStream extends DataInputStream
    	implements Seekable, PositionedReadable, 
      	ByteBufferReadable, HasFileDescriptor, CanSetDropBehind, CanSetReadahead,
      	HasEnhancedByteBufferAccess {}
	 */
	/**
	 * Seekable接口支持在文件中找到指定位置，并提供一个查询当前位置相对于文件其实位置偏移量(getPos())的查询方法
	 * public interface Seekable {
		  void seek(long pos) throws IOException;
		  long getPos() throws IOException;
		  @InterfaceAudience.Private
		  boolean seekToNewSource(long targetPos) throws IOException;
		}
		调用seek方法来定位大于文件长度的位置会引发IOException异常。与java.io.InputStream的skip不同，seek可以移动到文件中任意一个绝对位置，skip则只能相对于当前位置定位到另一个新位置。、
		注意，seek方法是一个相对高开销的操作，需要慎重使用
	 */
	public static void seekDoubleCat(String uri) {
		Configuration conf = new Configuration();
		FSDataInputStream inputStream = null;
		try {
			FileSystem fileSystem = FileSystem.get(new URI(uri), conf);
			inputStream = fileSystem.open(new Path(uri));
			IOUtils.copyBytes(inputStream, System.out, 4096, false);
			System.out.println("-------------------end-----------------");
			inputStream.seek(1);//索引
			IOUtils.copyBytes(inputStream, System.out, 4096, false);
		} catch (IOException e) {
			e.printStackTrace();
		} catch (URISyntaxException e) {
			e.printStackTrace();
		} finally {
			IOUtils.closeStream(inputStream);
		}
	}
}
猜你喜欢