MapReduce - A - 归约 - Combiner

题目:
使用Conbiner进行词频统计

思路:
Combiner在整个Map阶段结束后,进行一次合并,可以理解为提前的一次reduce

代码:

//MyCombiner

package A_Combiner02;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

public class MyCombiner extends Reducer<Text, LongWritable, Text, LongWritable> {
    public LongWritable v = new LongWritable();

    @Override
    protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException {
        int cnt = 0;
        for(LongWritable lw: values){
            cnt += lw.get();
        }
        v.set(cnt);
        context.write(key, v);
    }
}
//MapReduce

package A_Combiner02;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

public class MainDemo {
    public static void main(String[] args){
        try{
            Configuration conf = new Configuration();

            conf.set("fs.defaultFS","hdfs://hadoop105:9000");

            Job job = Job.getInstance(conf, "A_Combiner02-MainDemo");

            job.setJarByClass(MainDemo.class);

            job.setMapperClass(MyMapper.class);
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(LongWritable.class);

            job.setReducerClass(MyReducer.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(LongWritable.class);

            //job设置combiner类
            job.setCombinerClass(MyCombiner.class);

            FileInputFormat.addInputPath(job, new Path("/test/input/t3.txt"));
            FileOutputFormat.setOutputPath(job, new Path("/test/output/08"));

            int success = job.waitForCompletion(true) ? 0: 1;

            System.exit(success);
        }
        catch (Exception e){
            e.printStackTrace();
        }
    }
    //自定义的Mapper类
    public static class MyMapper extends Mapper<Object, Text, Text, LongWritable>{
        Text k = new Text();
        LongWritable v = new LongWritable(1L);
        @Override
        protected void map(Object key, Text value, Context context) throws IOException, InterruptedException {
            String row = value.toString();
            String[] words = row.split(" ");
            for(String st: words){
                k.set(st);
                context.write(k, v);
            }
        }
    }
    //自定义的Reducer类
    public static class MyReducer extends Reducer<Text, LongWritable, Text, LongWritable>{
        @Override
        protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException {
            for(LongWritable lw: values){
                context.write(key, lw);
            }
        }
    }
}
发布了544 篇原创文章 · 获赞 289 · 访问量 23万+

猜你喜欢

转载自blog.csdn.net/BlessingXRY/article/details/100608621
今日推荐