mapred代码示例--reduce端join

package join;

import java.net.URI;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

/**
* reduce端join,不同表文件加标识后传入reduce
*
* 输入文件为
*
* user
* 1 zhangshan
* 2 lisi
* 3 wangwu
* 4 zhaoliu
*
* log
* 1 login
* 2 login
* 1 login
* 3 login
*
* 统计登陆次数
* zhangshan 2
* lisi 1
* wangwu 1
* zhaoliu 0
*
* @author Administrator
*
*/
public class ReduceJoinApp {

private static String INPUT_PATH = "hdfs://hadoop:9000/in/reducejoin";
private static String OUT_PATH = "hdfs://hadoop:9000/out";

/**
* @param args
*/
public static void main(String[] args) throws Exception{
Configuration conf = new Configuration();
final FileSystem fileSystem = FileSystem.get(new URI(INPUT_PATH), conf);
fileSystem.delete(new Path(OUT_PATH), true);

Job job = new Job(conf);
job.setJarByClass(ReduceJoinApp.class);
FileInputFormat.setInputPaths(job, INPUT_PATH);
job.setInputFormatClass(TextInputFormat.class);
job.setMapperClass(MyMapper.class);


job.setReducerClass(MyReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileOutputFormat.setOutputPath(job, new Path(OUT_PATH));
job.waitForCompletion(true);

final FSDataInputStream in = fileSystem.open(new Path(OUT_PATH+"/part-r-00000"));
IOUtils.copyBytes(in, System.out, 1024, true);
}

public static class MyMapper extends Mapper<LongWritable, Text, Text, Text>{

Text k2 = new Text();
Text v2 = new Text();

protected void map(LongWritable key, Text value, org.apache.hadoop.mapreduce.Mapper<LongWritable,Text,Text,Text>.Context context) throws java.io.IOException ,InterruptedException {
String[] splited = value.toString().split("\t");
FileSplit inputSplit = (FileSplit) context.getInputSplit();
String name = inputSplit.getPath().getName();
System.out.println(name);
if(name.contains("user")){
v2.set("user_"+splited[1]);
}else{
v2.set("log_"+splited[1]);
}

k2.set(splited[0]);
context.write(k2, v2);

};
}

public static class MyReducer extends Reducer<Text,Text, Text,Text>{

Text k3 = new Text();
Text v3 = new Text();

protected void reduce(Text k2, java.lang.Iterable<Text> v2s, org.apache.hadoop.mapreduce.Reducer<Text,Text,Text,Text>.Context context) throws java.io.IOException ,InterruptedException {
int times = 0;
for (Text v2 : v2s) {
if(v2.toString().startsWith("user")){
k3.set(v2.toString().split("_")[1]);
}else{
times++;
}
}
v3.set(times+"");
context.write(k3, v3);
};
}

}

猜你喜欢

转载自jsh0401.iteye.com/blog/2111922