hadoop 入门级 实战

hadoop 实战 

开头:本文介绍hadoop mapreduce开发的一般方法 默认你已经完美的装上了hadoop 并搭建好了开发环境


一、简单的代码框架
加载配置文件
/**加载配置的类 采用单例模式*/

<span style="font-size:14px;">import org.apache.hadoop.conf.Configuration;
public class HadoopConfig {


	private static Configuration config;
	private HadoopConfig(){
		
	}
	public static Configuration getConfig(){
		if(config==null){
			config = new Configuration();
			config.addResource(HadoopConfig.class.getResource("core-site.xml"));
			config.addResource(HadoopConfig.class.getResource("hdfs-site.xml"));
			config.addResource(HadoopConfig.class.getResource("yarn-site.xml"));
		}
		return config;
	}
}</span>



<span style="font-size:14px;">import org.apache.hadoop.fs.Path;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
/** 实现mapreduce的类 */
public class <span style="color:#999999;">X</span>{


	/** mapper类  为了方便就放在这儿 也可以放在别的地方 
	在map这个阶段数据以key和value的格式传入下个阶段
	hadoop将自动把相同的key对应的所有value加入一个列表传到reduce。所以reduce的参数是  key values(注意不是value是values)
	*/
	public static class <span style="color:#c0c0c0;">X</span>Mapper extends Mapper<<span style="color:#c0c0c0;">KEYIN</span>, <span style="color:#c0c0c0;">VALUEIN</span>, <span style="color:#c0c0c0;">KEYOUT</span>, <span style="color:#c0c0c0;">VALUEOUT</span>>{
		@Override
		protected void map(<span style="color:#c0c0c0;">KEYIN</span> key, <span style="color:#c0c0c0;">VALUEIN</span> value, Context context)throws IOException, InterruptedException{
			
		}
	}
	/** reducer 刚开始我以为是hadoop集群上的一个机器只能调用一次然后运行完毕。结果不是这样的,Mapper和Reducer 只是一个接口而已 内部的运行和正常的java程序并不一样 不要关心如何分布式计算的*/
	public static class <span style="color:#c0c0c0;">X</span>Reducer extends Reducer<<span style="color:#c0c0c0;">KEYIN</span>, <span style="color:#c0c0c0;">VALUEIN</span>, <span style="color:#c0c0c0;">KEYOUT</span>, <span style="color:#c0c0c0;">VALUEOUT</span>>{
		@Override
		protected void reduce(<span style="color:#c0c0c0;">KEYIN</span> key, Iterable<<span style="color:#c0c0c0;">VALUEIN</span>> values,
				Context context)throws IOException, InterruptedException{
					
		}
	}
	/** 初始化并执行 */
	public static void main(String[] args){
		//加载配置
		Configuration config = HadoopConfig.getConfig();
		Job job = Job.getInstance(config);
		
		job.setJarByClass(<span style="color:#c0c0c0;">X</span>.class);
		//设置mapper类
		job.setMapperClass(<span style="color:#c0c0c0;">X</span>Mapper.class); 
		//设置mapper类Key的输出类
		job.setMapOutputKeyClass(<span style="color:#c0c0c0;">xxx</span>.class); 
		//设置mapper类Value的输出类
		job.setMapOutputValueClass(<span style="color:#c0c0c0;">xxx</span>.class); 
		
		//设置reducer类
		job.setReducerClass(<span style="color:#c0c0c0;">X</span>Reducer.class);
		//设置reducer类Key的输出类		
		job.setOutputKeyClass(<span style="color:#c0c0c0;">xxx</span>.class); 
		//设置reducer类Value的输出类
		job.setOutputValueClass(<span style="color:#c0c0c0;">xxx</span>.class); 
		
		//设置输入路径
		FileInputFormat.addInputPath(job, new Path("/<span style="color:#c0c0c0;">input</span>/"));  
		//设置输出路径
		FileOutputFormat.setOutputPath(job, new Path("/<span style="color:#c0c0c0;">output</span>/")); 
		System.exit(job.waitForCompletion(true)?0:1);
	} 
}</span>

二、示例
统计天气数据的温度最值
数据格式
年份 月份 日期 小时 最高温 最低温 (后面的不重要了)
2013 09 13 22    28   -10  9986   230    60 -9999 -9999 -9999
2013 09 13 23    27   -14  9992   230    60 -9999 -9999 -9999
2013 09 14 00    26   -14  9997   240    30     3 -9999     0
2013 09 14 01    22   -15 10000   250    20 -9999 -9999 -9999


思路 map阶段以年份为key 温度为value(不关是最高温和最低温) 传到reduce阶段
在reduce阶段遍历values找到最值
ps:hadoop 在map阶段文本传入默认传进的是偏移量和一行的文本



public static class WeatherMapper extends Mapper<LongWritable, Text, Text, DoubleWritable>{


		@Override
		protected void map(LongWritable key, Text value, Context context)
				throws IOException, InterruptedException {
			//得到一行的文本
			String data = value.toString();
			String year = data.substring(0,4);
			//解析出温度
			String max = data.substring(13,13+6).trim();
			String min = data.substring(19,19+6).trim();
			//判断是否有效
			if(!"-9999".equals(max)){
				//传入下一阶段
				context.write(new Text(year), 
					new DoubleWritable(Integer.parseInt(max))); //最大值
			}
			if(!"-9999".equals(min)){
				//传入下一阶段
				context.write(new Text(year), 
					new DoubleWritable(Integer.parseInt(min))); //最小值
			}
		}
}




public static class WeatherReducer extends Reducer<Text, DoubleWritable, Text, DoubleWritable>{


		@Override
		protected void reduce(Text key, Iterable<DoubleWritable> values,
				Context context)
				throws IOException, InterruptedException {
			double max = Double.MIN_VALUE;
			double min = Double.MAX_VALUE;
			for(DoubleWritable value : values){
				if(value.get() > max){
					max = value.get();
				}
				if(value.get() < min){
					min = value.get();
				}
			}
			context.write(new Text("max"), new DoubleWritable(max));
			context.write(new Text("min"), new DoubleWritable(min));
		}
		
	}


猜你喜欢

转载自blog.csdn.net/zzzqqq111222/article/details/45746757
今日推荐