MapReduce 实现对时间的简单排序

首先 MapReduce在处理数据的过程中会对数据排序(map输出的kv对传输到reduce之前会排序)，排序的依据是map输出的key。
因此如果要改变排序规则，就要将key位置的值进行修改，具体可以输入一个继承了writablecomparable接口的bean对象

下面为实例代码

MyValueWritable.java

package mywork02;

import java.io.DataInput;

import java.io.DataOutput;
import java.io.IOException;
import java.text.ParseException;
import java.text.SimpleDateFormat;

import org.apache.hadoop.io.WritableComparable;

/**
 * 自定义writable
 * @author wuhon
 *
 */
public class MyValueWritable implements WritableComparable<MyValueWritable>{

	private String user;
	private String timeStamp;
    SimpleDateFormat sdf=new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
	
	public void set(String []split) {
		this.user=split[0];
		this.timeStamp=split[1];
	}

	@Override
	public void readFields(DataInput in) throws IOException {
		timeStamp=in.readUTF();
		user=in.readUTF();
	}

	@Override
	public void write(DataOutput out) throws IOException {
		out.writeUTF(timeStamp);
		out.writeUTF(user);
	}

	@Override
	public String toString() {
		return user + "\t" + timeStamp ;
	}
	@Override
	public int compareTo(MyValueWritable o) {
		try {
			long thisValue = sdf.parse(this.timeStamp).getTime();
			long thatValue=sdf.parse(o.timeStamp).getTime();
			return (thisValue<thatValue?1:(thisValue == thatValue ? 0 : -1));//若this<that为1，是倒序，若this<that 为-1,是正序
		} catch (ParseException e) {
			e.printStackTrace();
		}
		return -2;
	}

	public String getUser() {
		return user;
	}

	public void setUser(String user) {
		this.user = user;
	}

	public String getTimeStamp() {
		return timeStamp;
	}

	public void setTimeStamp(String timeStamp) {
		this.timeStamp = timeStamp;
	}


	
	
}

TxtCounter_job.java

package mywork02;
import java.io.IOException;



import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.io.Text;




public class TxtCounter_job {
	
	   //获取数据并处理
    // userA,2018-01-01 08:00:00
    // userA,2018-01-01 09:00:00
    // userA,2018-01-01 10:00:00
    // userA,2018-01-01 11:00:00


//LongWritable为读取的偏移量，text为类型
public static class WorldCounterMap extends Mapper<LongWritable, Text, MyValueWritable, NullWritable>{
	
	MyValueWritable mvw=new MyValueWritable();
	protected void map(LongWritable key ,Text value,Context context) throws IOException,InterruptedException{
		String [] strs=value.toString().split(",");
		System.out.println(strs.length);
		mvw.set(strs);
		context.write(mvw, NullWritable.get());
			
	}
}
public static class WordCountReduce extends Reducer<MyValueWritable, NullWritable, MyValueWritable, NullWritable>{
	protected void reduce(MyValueWritable key,Iterable<NullWritable> values ,Context context)throws IOException,InterruptedException{
		context.write(key, NullWritable.get());
	}
}
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
	String inputPath="hdfs://0.0.0.0:8020/input/";
	String outputPath="hdfs://0.0.0.0:8020/output";
	args=new String[] {inputPath,outputPath};
	Configuration conf=new Configuration();
	Job job=Job.getInstance(conf);
	job.setJarByClass(TxtCounter_job.class);
	
	job.setOutputKeyClass(MyValueWritable.class);
	job.setOutputValueClass(NullWritable.class);
	
	job.setMapperClass(WorldCounterMap.class);
	job.setReducerClass(WordCountReduce.class);
	
	FileInputFormat.addInputPath(job, new Path(args[0]));
	FileOutputFormat.setOutputPath(job, new Path(args[1]));
	
	job.waitForCompletion(true);
	}
}

原数据：

userA,2018-01-01 11:00:00
userA,2018-01-01 08:00:00
userA,2018-01-01 10:00:00
userA,2018-01-01 09:00:00

输出结果：

userA	2018-01-01 11:00:00
userA	2018-01-01 10:00:00
userA	2018-01-01 09:00:00
userA	2018-01-01 08:00:00

MapReduce 实现对时间的简单排序

MapReduce 实现对时间的简单排序

猜你喜欢