版权声明:本文为博主原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。
数据:
index1:
I love Beijing and I love China
I love Jinan I love
I love Taian
index2:
Beijing is Beijing is the capital of China
Jinan is the capital city of Shandong
I am am I
index3:
a city in eastern China
the capital of Shandong province
population 2,726,400
I am
Output:
2,726,400 index3:1;
Beijing index2:2;idnex1:1;
China idnex1:1;index2:1;index3:1;
I idnex1:5;index2:2;index3:1;
Jinan idnex1:1;index2:1;
Shandong index2:1;index3:1;
Taian idnex1:1;
a index3:1;
am index2:2;index3:1;
and idnex1:1;
capital index2:2;index3:1;
city index2:1;index3:1;
eastern index3:1;
in index3:1;
is index2:3;
love idnex1:5;
of index2:2;index3:1;
population index3:1;
province index3:1;
the index2:2;index3:1;
代码:
package MapReducer05;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.log4j.BasicConfigurator;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.Set;
import java.util.TreeSet;
public class WordCountJob {
public static class MyMapper extends Mapper<LongWritable, Text, Text, Text>{
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
FileSplit fsp = (FileSplit) context.getInputSplit();
String fname = fsp.getPath().getName();
String[] str = value.toString().split(" ");
for(String s : str){
context.write(new Text(fname+"_"+s), new Text("1"));
}
}
}
/**
* I index1:2 index2:1
* love index1:1
*/
public static class MyReducer extends Reducer<Text, Text, Text, Text>{
Set<MyWritable> set = new TreeSet<MyWritable>();
@Override
protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
for (Text t : values){
MyWritable mw = new MyWritable();
String[] fac = t.toString().split(":");
mw.setFname(fac[0]);
mw.setCount(Integer.parseInt(fac[1]));
set.add(mw);
}
String vres = "";
for(MyWritable m : set){
vres += m;
}
set.clear();
context.write(key, new Text(vres));
}
}
public static void main(String[] args) throws InterruptedException, IOException, ClassNotFoundException {
//System.setProperty("hadoop.home.dir", "F:\\hadoop-2.6.4");
Configuration conf = new Configuration();
BasicConfigurator.configure();
Job job = Job.getInstance(conf, "mr");
job.setJarByClass(WordCountJob.class);
job.setMapperClass(MyMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setCombinerClass(MyCombiner.class);
job.setReducerClass(MyReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileInputFormat.setInputPaths(job,new Path("C:\\Users\\Chen\\Desktop\\input\\dpsy\\*"));
FileOutputFormat.setOutputPath(job,new Path("C:\\Users\\Chen\\Desktop\\12"));
System.exit(job.waitForCompletion(true)?0:1);
}
}
/**
* index1_I 1
* index1_I 1
* index2_I 1
* index1_love 1
*/
class MyCombiner extends Reducer<Text, Text, Text, Text>{
@Override
protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
String[] k = key.toString().split("_");
int count = 0;
for(Text t : values){
count += Integer.parseInt(t.toString());
}
context.write(new Text(k[1]), new Text(k[0]+":"+count));
}
}
class MyWritable implements WritableComparable<MyWritable> {
private String fname;
private int count;
public MyWritable(){}
public MyWritable(String fname, int count) {
this.fname = fname;
this.count = count;
}
public int compareTo(MyWritable o) {
int ff = o.count-this.count;
if(ff==0){
return this.fname.compareTo(o.fname);
} else{
return ff;
}
}
@Override
public String toString() {
return this.fname+":"+this.count+";";
}
public void write(DataOutput out) throws IOException {
out.writeUTF(fname);
out.writeInt(count);
}
public void readFields(DataInput in) throws IOException {
this.fname = in.readUTF();
this.count = in.readInt();
}
public String getFname() {
return fname;
}
public void setFname(String fname) {
this.fname = fname;
}
public int getCount() {
return count;
}
public void setCount(int count) {
this.count = count;
}
}