3.1.7 KeyValueTextInputFormat使用案例
1.需求
统计输入文件中每一行的第一个单词相同的行数。
(1)输入数据
banzhang ni hao
xihuan hadoop banzhang
banzhang ni hao
xihuan hadoop banzhang
(2)期望结果数据
banzhang 2
xihuan 2
2.需求分析
在本地的Hadoop3.1.2运行输入的数据,得到相应的结果,
3.代码实现
(1)编写Mapper类
/**
* @Author zhangyong
* @Date 2020/3/6 9:00
* @Version 1.0
* Mapper类
*/
public class KVTextMapper extends Mapper<Text, Text, Text, LongWritable> {
// 1 设置value
LongWritable v = new LongWritable (1);
@Override
protected void map(Text key, Text value, Context context)
throws IOException, InterruptedException {
// banzhang ni hao
// 2 写出
context.write (key, v);
}
}
(2)编写Reducer类
/**
* @Author zhangyong
* @Date 2020/3/6 9:00
* @Version 1.0
* Reducer类 统计量
*/
public class KVTextReducer extends Reducer<Text, LongWritable, Text, LongWritable> {
LongWritable v = new LongWritable ();
@Override
protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException {
long sum = 0L;
// 1 汇总统计
for (LongWritable value : values) {
sum += value.get ();
}
v.set (sum);
// 2 输出
context.write (key, v);
}
}
(3)编写Driver类
/**
* @Author zhangyong
* @Date 2020/3/4 9:00
* @Version 1.0
* Driver类 Hadoop入口程序
*/
public class KVTextDriver {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
// 数据输入路径和输出路径
args = new String[2];
args[0] = "src/main/resources/kv/kvi2/";
args[1] = "src/main/resources/kv/kvo2";
Configuration cfg = new Configuration();// 读取配置文件
cfg.set("mapreduce.framework.name", "local");
cfg.set("fs.defaultFS", "file:///");
// 设置切割符
cfg.set(KeyValueLineRecordReader.KEY_VALUE_SEPERATOR, " ");
final FileSystem filesystem = FileSystem.get(cfg);
if (filesystem.exists(new Path(args[0]))) {
filesystem.delete(new Path(args[1]), true);
}
// 1 获取job对象
Job job = Job.getInstance (cfg);
// 2 设置jar包位置,关联mapper和reducer
job.setJarByClass (KVTextDriver.class);
job.setMapperClass (KVTextMapper.class);
job.setReducerClass (KVTextReducer.class);
// 3 设置map输出kv类型
job.setMapOutputKeyClass (Text.class);
job.setMapOutputValueClass (LongWritable.class);
// 4 设置最终输出kv类型
job.setOutputKeyClass (Text.class);
job.setOutputValueClass (LongWritable.class);
// 5 设置输入输出数据路径
FileInputFormat.setInputPaths (job, new Path (args[0]));
// 设置输入格式
job.setInputFormatClass (KeyValueTextInputFormat.class);
// 6 设置输出数据路径
FileOutputFormat.setOutputPath (job, new Path (args[1]));
// 7 提交job
job.waitForCompletion (true);
}
}
(4)项目结构及运行结果