实现map类有两种方法
一 :继承MapReduceBase 实现Mapper接口
package cn.weida.hadoop.read;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapreduce.Mapper.Context;
/**
* 四个泛型参数 依次为输入建 输入值 输出键 输出值
* 查找最高气温的Mapper
* @author lone
*
*/
public class MaxTemperatureMapper extends MapReduceBase implements Mapper<LongWritable,Text , Text, IntWritable> {
private static final int MISSING = 9999;
/**
* 长整形偏移量 输入值为一行文本 输出键:年份 输出值:气温
*/
@Override
public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter arg3)
throws IOException {
// TODO Auto-generated method stub
String line = value.toString();
String year =line.substring(15,19);
int airTemperature;
if (line.charAt(87)== '+' ) { //parseInt doesnot like leading plus signs
airTemperature= Integer.parseInt(line.substring(88, 92));
} else {
airTemperature = Integer.parseInt(line.substring(87, 92));
}
String quality = line.substring(92,93);
if (airTemperature!=MISSING&&quality.matches("[01459]")) {
output.collect(new Text(year) ,new IntWritable(airTemperature)); //输出:年 温度
}
}
@Override
public void configure(JobConf job) {
// TODO Auto-generated method stub
}
@Override
public void close() throws IOException {
// TODO Auto-generated method stub
}
}
二:继承Mapper类
package cn.weida.hadoop.read;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapreduce.Mapper;
public class MaxTemperatureMapper01 extends Mapper<IntWritable, Text, Text, IntWritable>{
private static final int MISSING = 9999;
public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter arg3)
throws IOException {
// TODO Auto-generated method stub
String line = value.toString();
String year =line.substring(15,19);
int airTemperature;
if (line.charAt(87)== '+' ) { //parseInt doesnot like leading plus signs
airTemperature= Integer.parseInt(line.substring(88, 92));
} else {
airTemperature = Integer.parseInt(line.substring(87, 92));
}
String quality = line.substring(92,93);
if (airTemperature!=MISSING&&quality.matches("[01459]")) {
output.collect(new Text(year) ,new IntWritable(airTemperature)); //输出:年 温度
}
}
}
Reduce类实现同样
一:继承Mapper类
package cn.weida.hadoop.read;
import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapreduce.Reducer;
public class MaxTemperatureReduce01 extends Reducer<IntWritable, Text, Text, IntWritable>{
public void reduce(Text key, Iterator<Text> values, OutputCollector<Text, IntWritable> output,
Reporter reporter) throws IOException {
// TODO Auto-generated method stub
int maxValue = Integer.MIN_VALUE;
while(values.hasNext()) {
maxValue = Math.max(maxValue, Integer.parseInt(values.next().toString()));
}
output.collect(key, new IntWritable(maxValue));
}
}
二 :继承MapReduceBase 实现Reducer接口
package cn.weida.hadoop.read;
import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
/**
* 四个参数 reduce函数的输入类型必须匹配map函数的输出类型
* @author lone
*
*/
public class MaxTemperatureReducer extends MapReduceBase implements Reducer<Text,Text,Text, IntWritable> {
@Override
public void reduce(Text key, Iterator<Text> values, OutputCollector<Text, IntWritable> output,
Reporter reporter) throws IOException {
// TODO Auto-generated method stub
int maxValue = Integer.MIN_VALUE;
while(values.hasNext()) {
maxValue = Math.max(maxValue, Integer.parseInt(values.next().toString()));
}
output.collect(key, new IntWritable(maxValue));
}
}
调用map和reduce
package cn.weida.hadoop.read;
import java.io.IOException;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import cn.weida.hadoop.read.*;
public class MaxTemperature {
/**
* job对象指定作业执行规范
* @param args
* @throws IOException
* @throws InterruptedException
* @throws ClassNotFoundException
*/
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
if (args.length!=2) {
System.err.println("Usage: MaxTemperature <input path> <output path");
System.exit(-1);
}
Job job = new Job();
job.setJarByClass(MaxTemperature.class); //传递一个类名。利用类名寻找jar执行包
job.setJobName("Max temperature");
FileInputFormat.addInputPath(job, new Path(args[0])); //定义输入数据路径
FileOutputFormat.setOutputPath(job, new Path(args[1])); //定义输出数据路径
job.setMapperClass(MaxTemperatureMapper01.class); //指定map类和reduce类 只能传递用继承Mapper和Reduce类的map和reduce类
job.setReducerClass(MaxTemperatureReduce01.class);
job.setOutputKeyClass(Text.class); //设置输出键 值的类型 : 与reduce 函数的类型一致
job.setOutputKeyClass(IntWritable.class);
System.exit(job.waitForCompletion(true)? 0 : 1);
}
}