hadoop用java API实现mapreduce排序

mapreduce排序依靠的是key键，所以要在输出的key对应的类实现compareTo（）方法

#key对应的类

package org.hadoop.sort;

import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

/*
 * FlowBean要在节点传输，要符合hadoop的序列号机制，实现固定的接口
 * */
public class FlowBean implements WritableComparable<FlowBean> {
    //    上行流量
    private long down_flow;
    //    下行流量
    private long up_flow;
    //  总
    private long total;
    //    电话号码
    private String phone;

    public FlowBean() {
        //为了反射，没有构造函数的时候不用写，如果有别的构造函数这里必须写
    }


    public FlowBean(String phone, long up_flow, long down_flow) {
        this.phone = phone;
        this.up_flow = up_flow;
        this.down_flow = down_flow;
        this.total = up_flow + down_flow;
    }

    public long getDown_flow() {
        return down_flow;
    }

    public long getUp_flow() {
        return up_flow;
    }

    public long getTotal() {
        return total;
    }

//没有这个方法会报错
    public int compareTo(FlowBean o) {
        return this.down_flow>o.getDown_flow()?-1:1;
    }


    //数据写入输出流
    public void write(DataOutput out) throws IOException {
// 写进字节数组
        out.writeUTF(phone);
        out.writeLong(up_flow);
        out.writeLong(down_flow);
        out.writeLong(total);
    }

    //读取要传递的数据,读取的顺序要和写的一致，先进先出
    public void readFields(DataInput in) throws IOException {
// 读出字节数组
        phone = in.readUTF();
        up_flow = in.readLong();
        down_flow = in.readLong();
        total = in.readLong();
    }

    @Override
    public String toString() {
        return ""+up_flow+"-"+down_flow+"-"+total;
    }

}

主类

package org.hadoop.sort;

import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

public class SortRunner{
    public static class SortMapper extends Mapper<LongWritable,Text,FlowBean,NullWritable>
    {
        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            String line = value.toString();
            String[] args = StringUtils.split(line, "\t");
            String phone = args[0];
            long upflow = Long.parseLong(""+args[1].charAt(0));
            long downflow = Long.parseLong(""+args[1].charAt(0));
            context.write(new FlowBean(phone,upflow,downflow),NullWritable.get() );
        }
    }
    private static class SortReduce extends Reducer<FlowBean,NullWritable,FlowBean,NullWritable>
    {
        @Override
        protected void reduce(FlowBean key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {
            context.write(key, NullWritable.get());
        }
    }



    public static void main(String[] args) throws Exception
    {
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS", "hdfs://hadoop101:9000/");
        // 创建一个job
        Job job = Job.getInstance(conf);
        job.setJarByClass(SortRunner.class);
        //设置map
        job.setMapperClass(SortMapper.class);
        //设置reduce
        job.setReducerClass(SortReduce.class);

        //设置map输出
//        job.setMapOutputKeyClass(FlowBean.class);
//        job.setMapOutputValueClass(NullWritable.class);

        //设置reduce输出
        job.setOutputKeyClass(FlowBean.class);
        job.setOutputValueClass(NullWritable.class);


        //给定输入
        FileInputFormat.setInputPaths(job,new Path("hdfs://192.168.117.101:9000/out/test/part-r-00000"));
        //给定输出
        FileOutputFormat.setOutputPath(job, new Path("hdfs://192.168.117.101:9000/newout"));
        job.waitForCompletion(true);
    }
}

hadoop用java API实现mapreduce排序

猜你喜欢