数据样式:
package com.hnxy.mr.max;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import com.hnxy.mr.max.MaxWrod.MaxMapper;
import com.hnxy.mr.max.MaxWrod.MaxReducer;
public class MaxWrod5 extends Configured implements Tool {
// 定义分隔符
private static final String SPLIT_STP = "\001";
public static class MaxMapper extends Mapper<LongWritable, Text, Text, Text> {
private Text outkey = new Text();
private Text outval = new Text();
private Double maxval = 0D;
private String maxkey = "";
private Double minval = 0D;
private String minkey = "";
String[] star = null;
@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)
throws IOException, InterruptedException {
// 数据 T2018-01-01 001616528 236701 强力VC银翘片 6.0 82.8 69.0
// 按照\t分割
star = value.toString().split("\t");
// 每行正确的数据数组的长度为7 并且部位null
if (star.length == 7 && null != star) {
// maxkey 如果小于数组第六为也就是实收金额 那么就maxval就等于这个数
if (maxval < Double.parseDouble(star[6])) {
maxval = Double.parseDouble(star[6]);
// 这时候maxkey就等于数组的第三位也就是药品名称
maxkey = star[3];
}
// 让最小值先等于第一个值 这样可以正常比小
if (minval <= 0) {
minval = Double.parseDouble(star[6]);
minkey = star[3];
}
// 比小
if (minval > Double.parseDouble(star[6])) {
minval = Double.parseDouble(star[6]);
minkey = star[3];
}
}
}
@Override
protected void cleanup(Mapper<LongWritable, Text, Text, Text>.Context context)
throws IOException, InterruptedException {
// 这里为什么要用cleanup,因为cleanup只执行一次,而且是最后一次执行。因为要释放资源
// 输出最后最大值
outkey.set("max");
outval.set(maxkey + SPLIT_STP + maxval);
context.write(outkey, outval);
// 局部的最小是记录下来
outkey.set("min");
outval.set(minkey + SPLIT_STP + minval);
context.write(outkey, outval);
}
}
public static class MaxReducer extends Reducer<Text, Text, Text, DoubleWritable> {
private Text outkey = new Text();
private DoubleWritable outval = new DoubleWritable();
private Double maxval = 0D;
private String maxkey = "";
private String minkey = "";
private Double minval = 0D;
private String[] strs = null;
@Override
protected void reduce(Text key, Iterable<Text> values,
Reducer<Text, Text, Text, DoubleWritable>.Context context) throws IOException, InterruptedException {
// 迭代器遍历所有的实收金额
if (key.toString().equals("max")) {
for (Text t : values) {
System.out.println(t);
strs = t.toString().split(SPLIT_STP);
// 最后进行判断 将多个map的最大值进行比较
if (maxval < Double.parseDouble(strs[1])) {
maxkey = strs[0];
maxval = Double.parseDouble(strs[1]);
}
}
outkey.set(maxkey);
outval.set(maxval);
context.write(outkey, outval);
} else if (key.toString().equals("min")) {
// 输出最小
for (Text t : values) {
strs = t.toString().split(SPLIT_STP);
// 对比
if (minval <= 0) {
minval = Double.parseDouble(strs[1]);
minkey = strs[0];
}
}
outkey.set(minkey);
outval.set(minval);
// 直接输出即可
context.write(outkey, outval);
}
}
}
@Override
public int run(String[] args) throws Exception {
// 设置Configretion
Configuration conf = this.getConf();
// 设置job
Job job = Job.getInstance();
job.setJarByClass(MaxWrod5.class);
// map reduce类
job.setMapperClass(MaxMapper.class);
job.setReducerClass(MaxReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(DoubleWritable.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
// 设置文件路径
Path in = new Path(args[0]);
Path out = new Path(args[1]);
// 设置hdfs操作对象
FileSystem fs = FileSystem.get(conf);
// 绑定文件输出输入目录
FileInputFormat.addInputPath(job, in);
FileOutputFormat.setOutputPath(job, out);
// 自动删除
if (fs.exists(out)) {
fs.delete(out, true);
// 提示
System.out.println(job.getJobName() + "'s Path Output is deleted");
}
// 执行
boolean con = job.waitForCompletion(true);
if (con) {
System.out.println("ok");
} else {
System.out.println("file");
}
// FileInputFormat.addInputPath(job, path);
return 0;
}
public static void main(String[] args) throws Exception {
// TODO Auto-generated method stub
System.exit(ToolRunner.run(new MaxWrod5(), args));
}
}
总结:求最小值和求最大值 业务逻辑没有区别
那么同时求最大最小值 需要注意reduce的阶段,if判断的值key.toString().equals("max") 是map阶段outkey 到reduce就是key。容易混淆。判断字符串key="max",那么求最大值的逻辑。如果是“min”,那么走最小值的逻辑