MapReduce同时求最大值最小值

数据样式:

package com.hnxy.mr.max;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

import com.hnxy.mr.max.MaxWrod.MaxMapper;
import com.hnxy.mr.max.MaxWrod.MaxReducer;

public class MaxWrod5 extends Configured implements Tool {
	// 定义分隔符
	private static final String SPLIT_STP = "\001";

	public static class MaxMapper extends Mapper<LongWritable, Text, Text, Text> {
		private Text outkey = new Text();
		private Text outval = new Text();
		private Double maxval = 0D;
		private String maxkey = "";
		private Double minval = 0D;
		private String minkey = "";
		String[] star = null;

		@Override
		protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)
				throws IOException, InterruptedException {
			// 数据 T2018-01-01 001616528 236701 强力VC银翘片 6.0 82.8 69.0
			// 按照\t分割
			star = value.toString().split("\t");
			// 每行正确的数据数组的长度为7 并且部位null
			if (star.length == 7 && null != star) {
				// maxkey 如果小于数组第六为也就是实收金额 那么就maxval就等于这个数
				if (maxval < Double.parseDouble(star[6])) {
					maxval = Double.parseDouble(star[6]);
					// 这时候maxkey就等于数组的第三位也就是药品名称
					maxkey = star[3];
				}
				// 让最小值先等于第一个值 这样可以正常比小
				if (minval <= 0) {
					minval = Double.parseDouble(star[6]);
					minkey = star[3];
				}
				// 比小
				if (minval > Double.parseDouble(star[6])) {
					minval = Double.parseDouble(star[6]);
					minkey = star[3];
				}
			}

		}

		@Override
		protected void cleanup(Mapper<LongWritable, Text, Text, Text>.Context context)
				throws IOException, InterruptedException {
			// 这里为什么要用cleanup,因为cleanup只执行一次,而且是最后一次执行。因为要释放资源
			// 输出最后最大值
			outkey.set("max");
			outval.set(maxkey + SPLIT_STP + maxval);
			context.write(outkey, outval);
			// 局部的最小是记录下来
			outkey.set("min");
			outval.set(minkey + SPLIT_STP + minval);
			context.write(outkey, outval);
		}
	}

	public static class MaxReducer extends Reducer<Text, Text, Text, DoubleWritable> {
		private Text outkey = new Text();
		private DoubleWritable outval = new DoubleWritable();
		private Double maxval = 0D;
		private String maxkey = "";
		private String minkey = "";
		private Double minval = 0D;
		private String[] strs = null;

		@Override
		protected void reduce(Text key, Iterable<Text> values,
				Reducer<Text, Text, Text, DoubleWritable>.Context context) throws IOException, InterruptedException {
			// 迭代器遍历所有的实收金额
			if (key.toString().equals("max")) {
				for (Text t : values) {
					System.out.println(t);
					strs = t.toString().split(SPLIT_STP);
					// 最后进行判断 将多个map的最大值进行比较
					if (maxval < Double.parseDouble(strs[1])) {
						maxkey = strs[0];
						maxval = Double.parseDouble(strs[1]);
					}
				}
				outkey.set(maxkey);
				outval.set(maxval);
				context.write(outkey, outval);
			} else if (key.toString().equals("min")) {
				// 输出最小
				for (Text t : values) {
					strs = t.toString().split(SPLIT_STP);
					// 对比
					if (minval <= 0) {
						minval = Double.parseDouble(strs[1]);
						minkey = strs[0];
					}
				}
				outkey.set(minkey);
				outval.set(minval);
				// 直接输出即可
				context.write(outkey, outval);
			}
		}

	}

	@Override
	public int run(String[] args) throws Exception {
		// 设置Configretion
		Configuration conf = this.getConf();
		// 设置job
		Job job = Job.getInstance();
		job.setJarByClass(MaxWrod5.class);
		// map reduce类
		job.setMapperClass(MaxMapper.class);
		job.setReducerClass(MaxReducer.class);
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(Text.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(DoubleWritable.class);
		job.setInputFormatClass(TextInputFormat.class);
		job.setOutputFormatClass(TextOutputFormat.class);
		// 设置文件路径
		Path in = new Path(args[0]);
		Path out = new Path(args[1]);
		// 设置hdfs操作对象
		FileSystem fs = FileSystem.get(conf);
		// 绑定文件输出输入目录
		FileInputFormat.addInputPath(job, in);
		FileOutputFormat.setOutputPath(job, out);
		// 自动删除
		if (fs.exists(out)) {
			fs.delete(out, true);
			// 提示
			System.out.println(job.getJobName() + "'s Path Output is deleted");
		}
		// 执行
		boolean con = job.waitForCompletion(true);
		if (con) {
			System.out.println("ok");
		} else {
			System.out.println("file");
		}
		// FileInputFormat.addInputPath(job, path);
		return 0;
	}

	public static void main(String[] args) throws Exception {
		// TODO Auto-generated method stub
		System.exit(ToolRunner.run(new MaxWrod5(), args));
	}
}

 总结:求最小值和求最大值 业务逻辑没有区别

那么同时求最大最小值 需要注意reduce的阶段,if判断的值key.toString().equals("max") 是map阶段outkey 到reduce就是key。容易混淆。判断字符串key="max",那么求最大值的逻辑。如果是“min”,那么走最小值的逻辑

发布了95 篇原创文章 · 获赞 0 · 访问量 1072

猜你喜欢

转载自blog.csdn.net/weixin_43006131/article/details/103093726