MapReduce 实现对时间的简单排序
首先 MapReduce在处理数据的过程中会对数据排序(map输出的kv对传输到reduce之前会排序),排序的依据是map输出的key。
因此如果要改变排序规则,就要将key位置的值进行修改,具体可以输入一个继承了writablecomparable接口的bean对象
下面为实例代码
MyValueWritable.java
package mywork02;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import org.apache.hadoop.io.WritableComparable;
/**
* 自定义writable
* @author wuhon
*
*/
public class MyValueWritable implements WritableComparable<MyValueWritable>{
private String user;
private String timeStamp;
SimpleDateFormat sdf=new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
public void set(String []split) {
this.user=split[0];
this.timeStamp=split[1];
}
@Override
public void readFields(DataInput in) throws IOException {
timeStamp=in.readUTF();
user=in.readUTF();
}
@Override
public void write(DataOutput out) throws IOException {
out.writeUTF(timeStamp);
out.writeUTF(user);
}
@Override
public String toString() {
return user + "\t" + timeStamp ;
}
@Override
public int compareTo(MyValueWritable o) {
try {
long thisValue = sdf.parse(this.timeStamp).getTime();
long thatValue=sdf.parse(o.timeStamp).getTime();
return (thisValue<thatValue?1:(thisValue == thatValue ? 0 : -1));//若this<that为1,是倒序,若this<that 为-1,是正序
} catch (ParseException e) {
e.printStackTrace();
}
return -2;
}
public String getUser() {
return user;
}
public void setUser(String user) {
this.user = user;
}
public String getTimeStamp() {
return timeStamp;
}
public void setTimeStamp(String timeStamp) {
this.timeStamp = timeStamp;
}
}
TxtCounter_job.java
package mywork02;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.io.Text;
public class TxtCounter_job {
//获取数据并处理
// userA,2018-01-01 08:00:00
// userA,2018-01-01 09:00:00
// userA,2018-01-01 10:00:00
// userA,2018-01-01 11:00:00
//LongWritable为读取的偏移量,text为类型
public static class WorldCounterMap extends Mapper<LongWritable, Text, MyValueWritable, NullWritable>{
MyValueWritable mvw=new MyValueWritable();
protected void map(LongWritable key ,Text value,Context context) throws IOException,InterruptedException{
String [] strs=value.toString().split(",");
System.out.println(strs.length);
mvw.set(strs);
context.write(mvw, NullWritable.get());
}
}
public static class WordCountReduce extends Reducer<MyValueWritable, NullWritable, MyValueWritable, NullWritable>{
protected void reduce(MyValueWritable key,Iterable<NullWritable> values ,Context context)throws IOException,InterruptedException{
context.write(key, NullWritable.get());
}
}
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
String inputPath="hdfs://0.0.0.0:8020/input/";
String outputPath="hdfs://0.0.0.0:8020/output";
args=new String[] {inputPath,outputPath};
Configuration conf=new Configuration();
Job job=Job.getInstance(conf);
job.setJarByClass(TxtCounter_job.class);
job.setOutputKeyClass(MyValueWritable.class);
job.setOutputValueClass(NullWritable.class);
job.setMapperClass(WorldCounterMap.class);
job.setReducerClass(WordCountReduce.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.waitForCompletion(true);
}
}
原数据:
userA,2018-01-01 11:00:00
userA,2018-01-01 08:00:00
userA,2018-01-01 10:00:00
userA,2018-01-01 09:00:00
输出结果:
userA 2018-01-01 11:00:00
userA 2018-01-01 10:00:00
userA 2018-01-01 09:00:00
userA 2018-01-01 08:00:00