大数据学习(二): MapReduce 初步

实现map类有两种方法

一 :继承MapReduceBase 实现Mapper接口

package cn.weida.hadoop.read;

import java.io.IOException;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapreduce.Mapper.Context;


/**
 * 四个泛型参数 依次为输入建 输入值 输出键 输出值
 * 查找最高气温的Mapper
 * @author lone
 *
 */
public class MaxTemperatureMapper extends MapReduceBase implements Mapper<LongWritable,Text , Text, IntWritable> {

    private   static final int MISSING = 9999;

     /**
      * 长整形偏移量 输入值为一行文本 输出键:年份 输出值:气温
      */
    @Override
    public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter arg3)
            throws IOException {
            // TODO Auto-generated method stub
            String line = value.toString();
            String year =line.substring(15,19);
            int airTemperature;
            if (line.charAt(87)== '+' ) {  //parseInt doesnot like leading plus signs
                airTemperature= Integer.parseInt(line.substring(88, 92));
            } else {
                airTemperature = Integer.parseInt(line.substring(87, 92));
            }
            String quality = line.substring(92,93);
            if (airTemperature!=MISSING&&quality.matches("[01459]")) {
                output.collect(new Text(year) ,new IntWritable(airTemperature));    //输出:年  温度
            }
        }

    @Override
    public void configure(JobConf job) {
        // TODO Auto-generated method stub
        
    }

    @Override
    public void close() throws IOException {
        // TODO Auto-generated method stub
        
    }
    }
    二:继承Mapper类

package cn.weida.hadoop.read;

import java.io.IOException;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapreduce.Mapper;

public class MaxTemperatureMapper01 extends Mapper<IntWritable, Text, Text, IntWritable>{
    
    
    private   static final int MISSING = 9999;
    
    
    public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter arg3)
            throws IOException {
            // TODO Auto-generated method stub
            String line = value.toString();
            String year =line.substring(15,19);
            int airTemperature;
            if (line.charAt(87)== '+' ) {  //parseInt doesnot like leading plus signs
                airTemperature= Integer.parseInt(line.substring(88, 92));
            } else {
                airTemperature = Integer.parseInt(line.substring(87, 92));
            }
            String quality = line.substring(92,93);
            if (airTemperature!=MISSING&&quality.matches("[01459]")) {
                output.collect(new Text(year) ,new IntWritable(airTemperature));    //输出:年  温度
            }
        }

}

Reduce类实现同样

一:继承Mapper类

package cn.weida.hadoop.read;

import java.io.IOException;
import java.util.Iterator;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapreduce.Reducer;

public class MaxTemperatureReduce01 extends Reducer<IntWritable, Text, Text, IntWritable>{
    public void reduce(Text key, Iterator<Text> values, OutputCollector<Text, IntWritable> output,
            Reporter reporter) throws IOException {
        // TODO Auto-generated method stub
        int maxValue = Integer.MIN_VALUE;
         while(values.hasNext()) {
            maxValue = Math.max(maxValue, Integer.parseInt(values.next().toString()));
        }
         output.collect(key, new IntWritable(maxValue));
    }
}

二 :继承MapReduceBase 实现Reducer接口

package cn.weida.hadoop.read;

import java.io.IOException;
import java.util.Iterator;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;


/**
 * 四个参数 reduce函数的输入类型必须匹配map函数的输出类型 
 * @author lone
 *
 */
public class MaxTemperatureReducer extends MapReduceBase implements Reducer<Text,Text,Text, IntWritable> {

    @Override
    public void reduce(Text key, Iterator<Text> values, OutputCollector<Text, IntWritable> output,
            Reporter reporter) throws IOException {
        // TODO Auto-generated method stub
        int maxValue = Integer.MIN_VALUE;
         while(values.hasNext()) {
            maxValue = Math.max(maxValue, Integer.parseInt(values.next().toString()));
        }
         output.collect(key, new IntWritable(maxValue));
    }

}

调用map和reduce 

package cn.weida.hadoop.read;

import java.io.IOException;

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import cn.weida.hadoop.read.*;

public class MaxTemperature {

    
    /**
     * job对象指定作业执行规范 
     * @param args
     * @throws IOException
     * @throws InterruptedException 
     * @throws ClassNotFoundException 
     */
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        if (args.length!=2) {
            System.err.println("Usage: MaxTemperature <input path> <output path");
            System.exit(-1);
        }
        Job job = new Job();
        job.setJarByClass(MaxTemperature.class);      //传递一个类名。利用类名寻找jar执行包
        job.setJobName("Max temperature");
        
        FileInputFormat.addInputPath(job, new Path(args[0]));   //定义输入数据路径    
        FileOutputFormat.setOutputPath(job, new Path(args[1])); //定义输出数据路径
        
        job.setMapperClass(MaxTemperatureMapper01.class);    //指定map类和reduce类 只能传递用继承Mapper和Reduce类的map和reduce类
        job.setReducerClass(MaxTemperatureReduce01.class);
        
        job.setOutputKeyClass(Text.class);              //设置输出键 值的类型  : 与reduce 函数的类型一致
        job.setOutputKeyClass(IntWritable.class);
        
        System.exit(job.waitForCompletion(true)? 0 : 1);
    }
}
 

猜你喜欢

转载自blog.csdn.net/acm160920007/article/details/81142106