package com.sparktest; import java.io.Serializable; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.Function; import org.apache.spark.api.java.function.PairFunction; import org.apache.spark.api.java.function.VoidFunction; import scala.Tuple2; import scala.math.Ordered; /** * 二次排序(按第一个值升序,按第二个值降序) * 部分原始数据: * 5 6 4 1 6 7 6 4 7 2 4 1 */ public class SecondSort { public static void main(String[] args) { SparkConf conf = new SparkConf() .setAppName("SecondSort") .setMaster("local"); JavaSparkContext sc = new JavaSparkContext(conf); JavaRDD<String> linesRDD = sc.textFile("d://spark_data//secondSort.txt"); /** * 把每一行转为tuple: (num1,num2) */ JavaPairRDD<MySortKey, String> tupleRDD = linesRDD.mapToPair(new PairFunction<String, MySortKey, String>() { @Override public Tuple2<MySortKey, String> call(String line) throws Exception { String data[] = line.split(" "); int num1 = Integer.parseInt(data[0]); int num2 = Integer.parseInt(data[1]); return new Tuple2<MySortKey, String>(new MySortKey(num1,num2),line); } }); JavaPairRDD<MySortKey, String> sortByKeyRDD = tupleRDD.sortByKey(); JavaRDD<String> sortedRDD = sortByKeyRDD.map(new Function<Tuple2<MySortKey,String>, String>() { @Override public String call(Tuple2<MySortKey, String> tuple) throws Exception { return tuple._2; } }); sortedRDD.foreach(new VoidFunction<String>() { @Override public void call(String line) throws Exception { System.out.println(line); } }); sc.close(); } } /** * 自定义排序key,按第一个值升序,按第二个值降序 */ class MySortKey implements Ordered<MySortKey>,Serializable{ /** * (域的意义、目的、功能) */ private static final long serialVersionUID = -8363444941537305530L; private int first; private int second; public MySortKey(int first,int second) { this.first = first; this.second = second; } public int getFirst() { return first; } public void setFirst(int first) { this.first = first; } public int getSecond() { return second; } public void setSecond(int second) { this.second = second; } @Override public boolean $greater(MySortKey other) { if(this.first > other.getFirst()){ return true; }else if(this.first == other.getFirst() && this.second < other.getSecond()){ return true; } return false; } @Override public boolean $greater$eq(MySortKey other) { if(this.$greater(other)){ return true; }else if(this.first == other.getFirst() && this.second == other.getSecond()){ return true; } return false; } @Override public boolean $less(MySortKey other) { return !this.$greater$eq(other); } @Override public boolean $less$eq(MySortKey other) { return !this.$greater(other); } @Override public int compare(MySortKey other) { if(this.first - other.first != 0){ return this.first - other.getFirst(); }else{ return -(this.second - other.getSecond()); } } @Override public int compareTo(MySortKey other) { if(this.first - other.first != 0){ return this.first - other.getFirst(); }else{ return -(this.second - other.getSecond()); } } }
Spark二次排序(java)
猜你喜欢
转载自blog.csdn.net/js54100804/article/details/79972641
今日推荐
周排行