Hadoop MapReduce开发--两个输入数据关联优化方案

将人员的地址ID完善成为地址名称。输出格式:人员ID,姓名,地址

测试数据:

address.txt

#地址ID    地址名称
1    北京
2    上海
3    广州

employee.txt

#人员ID    人员名称    地址ID
1    张三    1
2    李四    2
3    王五    1
4    赵六    3
5    马七    3

User实体类

import org.apache.hadoop.io.WritableComparable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

public class User implements WritableComparable<User> {
    private String userNo = "";
    private String userName = "";
    private String cityNo = "";
    private String cityName = "";
    private int flag = 0; //0:城市/1:人员

    public User() {
    }

    public User(User user) {
        this.userNo = user.getUserNo();
        this.userName = user.getUserName();
        this.cityNo = user.getCityNo();
        this.cityName = user.getCityName();
        this.flag = user.getFlag();
    }

    public User(String userNo, String userName, String cityNo, String cityName, int flag) {
        this.userNo = userNo;
        this.userName = userName;
        this.cityNo = cityNo;
        this.cityName = cityName;
        this.flag = flag;
    }

    @Override
    public String toString() {
        return  this.userNo + "    " + this.userName + "    " + this.cityName;
    }

    @Override
    public int compareTo(User o) {
        return 0;
    }

    @Override
    public void write(DataOutput dataOutput) throws IOException {
        dataOutput.writeUTF(this.userNo);
        dataOutput.writeUTF(this.userName);
        dataOutput.writeUTF(this.cityNo);
        dataOutput.writeUTF(this.cityName);
        dataOutput.writeInt(this.flag);
    }

    @Override
    public void readFields(DataInput dataInput) throws IOException {
        this.userNo = dataInput.readUTF();
        this.userName = dataInput.readUTF();
        this.cityNo = dataInput.readUTF();
        this.cityName = dataInput.readUTF();
        this.flag = dataInput.readInt();
    }

    public String getUserNo() {
        return userNo;
    }

    public void setUserNo(String userNo) {
        this.userNo = userNo;
    }

    public String getUserName() {
        return userName;
    }

    public void setUserName(String userName) {
        this.userName = userName;
    }

    public String getCityNo() {
        return cityNo;
    }

    public void setCityNo(String cityNo) {
        this.cityNo = cityNo;
    }

    public String getCityName() {
        return cityName;
    }

    public void setCityName(String cityName) {
        this.cityName = cityName;
    }

    public int getFlag() {
        return flag;
    }

    public void setFlag(int flag) {
        this.flag = flag;
    }
}

mapper

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

public class JoinTwoMapper extends Mapper<LongWritable, Text, Text, User> {
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        String line = value.toString().trim();
        if(!line.startsWith("#")) {
            String[] arr = line.split("\t");
            if(arr.length == 2) {//城市信息
                User user = new User();
                user.setCityNo(arr[0]);
                user.setCityName(arr[1]);
                user.setFlag(0);
                context.write(new Text(arr[0]), user);
            } else {//人员信息
                User user = new User();
                user.setUserNo(arr[0]);
                user.setUserName(arr[1]);
                user.setCityNo(arr[2]);
                user.setFlag(1);
                context.write(new Text(arr[2]), user);
            }
        }
    }
}

reducer

import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

public class JoinTwoReducer extends Reducer<Text, User, NullWritable, Text> {
    @Override
    protected void reduce(Text key, Iterable<User> values, Context context) throws IOException, InterruptedException {
        User cityInfo = null;
        List<User> list = new ArrayList<User>();

        for(User user : values) {
            if(user.getFlag() == 0) {//城市信息
                cityInfo = new User(user);
            } else if(user.getFlag() == 1) {//人员信息
                list.add(new User(user));
            }
        }

        //遍历人员信息
        for(User user : list) {
            user.setCityName(cityInfo.getCityName());
            context.write(NullWritable.get(), new Text(user.toString()));
        }
    }
}

job

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

/**
 * 将人员的地址ID完善成为地址名称,输出格式:人员ID,姓名,地址
 */
public class JobMain {
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        if(args.length != 2) {
            System.err.println("Usage: Join<input path> <output path>");
            System.exit(-1);
        }

        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf,"Join job2");
        job.setJarByClass(JobMain.class);

        job.setMapperClass(JoinTwoMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(User.class);

        job.setReducerClass(JoinTwoReducer.class);
        job.setOutputKeyClass(NullWritable.class);
        job.setOutputValueClass(Text.class);

        FileInputFormat.addInputPath(job, new Path(args[0]));

        Path outDirPath = new Path(args[1]);
        FileSystem fs = FileSystem.get(conf);
        if (fs.exists(outDirPath)) {
            fs.delete(outDirPath, true);
        }
        FileOutputFormat.setOutputPath(job, outDirPath);

        System.exit(job.waitForCompletion(true) ? 0 : 1);
    }
}

结果:

3    王五    北京
1    张三    北京
2    李四    上海
5    马七    广州
4    赵六    广州

猜你喜欢

转载自blog.csdn.net/fengzhif0001/article/details/86305096