sudo apt-get install liblzo2-dev
2、安装lzop
sudo apt-get install lzop
3、编译hadoop-lzo.jar
用git在此页面下载源码
https://github.com/kevinweil/hadoop-lzo
编译环境
32位os
export CFLAGS=-m32 export CXXFLAGS=-m32 ant compile-native tar
64位os
export CFLAGS=-m64 export CXXFLAGS=-m64 ant compile-native tar
成功编译后,hadoop-lzo*.jar在build文件夹中
cp hadoop-lzo-0.4.13.jar $HADOOP_HOME/lib/
core-site.xml
<property> <name>io.compression.codecs</name> <value>org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.DefaultCodec,com.hadoop.compression.lzo.LzoCodec,com.hadoop.compression.lzo.LzopCodec </value> </property> <property> <name>io.compression.codec.lzo.class</name> <value>com.hadoop.compression.lzo.LzoCodec</value> </property>
给lzo文件创建索引,例如big_file.lzo文件
单机上创建索引:
hadoop jar /path/to/your/hadoop-lzo.jar com.hadoop.compression.lzo.LzoIndexer big_file.lzo
用map-reduce job创建索引:
hadoop jar /path/to/your/hadoop-lzo.jar com.hadoop.compression.lzo.DistributedLzoIndexer big_file.lzo