(十)Mapper多输入源及Reduce多输出文件

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/qinshi965273101/article/details/84110098

目的

Mapper多个输入源,处理后变成一个输出。

reduce多输出源,根据输出的数据,按照自己的要求来决定,输出到不同的文件里。

案例

有以下两个文件,作为输入源(Mapper处理)

最后把每个人的成绩打印到不同的文件里(Reduce处理)

tom
math 90
english 98
jary
math 78
english 87
rose
math 87
english 90
tom math 67 english 87
jary math 59 english 80
rose math 79 english 60

 代码实现

  • 写自定义输入输出组件
  • 写两个mapper,分别处理不同的文件
  • Reduce代码实现
package hadoop04;

import java.io.IOException;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;

public class ReduceDemo extends Reducer<Text, Text, Text, Text>{
	
	private MultipleOutputs<Text, Text> mos;
	
	@Override
	protected void setup(Reducer<Text, Text, Text, Text>.Context context) throws IOException, InterruptedException {
		mos = new MultipleOutputs<>(context);
	}
	
	@Override
	protected void reduce(Text key, Iterable<Text> value, Reducer<Text, Text, Text, Text>.Context context)
			throws IOException, InterruptedException {
		
		for(Text txt : value) {
			if(key.toString().equals("jary")){
				mos.write("jary", key, txt);
			}
			if(key.toString().equals("rose")){
				mos.write("rose", key, txt);
			}
			if(key.toString().equals("tom")){
				mos.write("tom", key, txt);
			}
		}
	}

}
  • Driver类实现
package hadoop04;


import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.MultipleInputs;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;

import hadoop03.AuthOutputFormat;


public class DriverDemo {
	public static void main(String[] args) throws Exception {
		Configuration conf = new Configuration();
		Job job = Job.getInstance(conf, "JobName");
		
		job.setJarByClass(DriverDemo.class);
		
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(Text.class);
		
		//如果一个Mapper代码不能通用的解决,则需要分别指定。此时,就不能去设置 setMapperClass()了
		MultipleInputs.addInputPath(job, new Path("hdfs://192.168.80.100:9000/input/test1.txt"), 
				AuthInputFomat.class, MapperDemo01.class);
		
		MultipleInputs.addInputPath(job, new Path("hdfs://192.168.80.100:9000/input/test2.txt"), 
				TextInputFormat.class, MapperDemo02.class);
		
		MultipleOutputs.addNamedOutput(job, "jary", AuthOutputFormat.class, Text.class, Text.class);
		MultipleOutputs.addNamedOutput(job, "tom", AuthOutputFormat.class, Text.class, Text.class);
		MultipleOutputs.addNamedOutput(job, "rose", AuthOutputFormat.class, Text.class, Text.class);
		
		FileOutputFormat.setOutputPath(job, new Path("hdfs://192.168.80.100:9000/result"));
		
		job.waitForCompletion(true);
	}
}

 

猜你喜欢

转载自blog.csdn.net/qinshi965273101/article/details/84110098