Hadoop下的压缩与解压--Hadoop学习笔记

压缩与解压

这里压缩和解压是在Hadoop框架下进行的,需要Hadoop环境。
这里以BZip2和Gzip压缩格式为例,直接上代码。

package compress;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionCodecFactory;
import org.apache.hadoop.io.compress.CompressionInputStream;
import org.apache.hadoop.io.compress.CompressionOutputStream;
import org.apache.hadoop.util.ReflectionUtils;

import java.io.*;

/**
 * @author Administrator
 */
public class TestCompress {
    
    
    public static void main(String[] args) throws IOException, ClassNotFoundException {
    
    

        //压缩
        //compress("C:/Users/Administrator/Desktop/input/hello.txt","org.apache.hadoop.io.compress.BZip2Codec");
        compress("C:/Users/Administrator/Desktop/input/hello.txt","org.apache.hadoop.io.compress.GzipCodec");

        //解压
        decompress("C:/Users/Administrator/Desktop/input/hello.txt.gz");
    }

    private static void compress(String fileName, String method) throws IOException, ClassNotFoundException {
    
    

        //获取输入输出流
        FileInputStream fileInputStream = new FileInputStream(new File(fileName));
        Class aClass = Class.forName(method);
        CompressionCodec codec = (CompressionCodec) ReflectionUtils.newInstance(aClass, new Configuration());
        FileOutputStream fileOutputStream = new FileOutputStream(new File(fileName + codec.getDefaultExtension()));
        CompressionOutputStream codecOutputStream = codec.createOutputStream(fileOutputStream);

        //流的拷贝
        IOUtils.copyBytes(fileInputStream,codecOutputStream,1024*1024,false);

        //关闭流
        IOUtils.closeStream(codecOutputStream);
        IOUtils.closeStream(fileOutputStream);
        IOUtils.closeStream(fileInputStream);
    }

    private static void decompress(String fileName) throws IOException {
    
    

        //压缩方式检查
        CompressionCodecFactory factory = new CompressionCodecFactory(new Configuration());
        CompressionCodec codec = factory.getCodec(new Path(fileName));
        if (codec==null){
    
    
            System.out.println("can't process");
            return;
        }

        //获取输入输出流
        FileInputStream fileInputStream = new FileInputStream(new File(fileName));
        CompressionInputStream codecInputStream = codec.createInputStream(fileInputStream);
        FileOutputStream fileOutputStream = new FileOutputStream(new File(fileName + ".decode"));

        //流的对拷
        IOUtils.copyBytes(codecInputStream,fileOutputStream,1024*1024,false);

        //关闭流
        IOUtils.closeStream(fileOutputStream);
        IOUtils.closeStream(codecInputStream);
        IOUtils.closeStream(fileInputStream);

    }
}

这里桌面上随便新建一个txt文件在这里插入图片描述
对它压缩和解压,在这里插入图片描述hello.txt是原文件,hello.txt.gz是压缩后的文件,hello.txt.gz.decode是解压后的文件。hello.txt.gz.decode重命名为hello.txt就可以读了,和原文件内容一样。

猜你喜欢

转载自blog.csdn.net/liuliusix/article/details/109358876