上一篇 : 自定义排序 I
package com.ws.demo
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
/**
* 自定义排序2
*/
object CustomSort2 {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("CustomSort2").setMaster("local[4]")
val sc = new SparkContext(conf)
val data = Array("ws 18 150", "tt 19 145", "nn 20 130", "bb 16 120", "cb 19 150")
val dataRdd: RDD[String] = sc.parallelize(data)
//切数据
val dataMapRDD: RDD[(String,Int,Int)] = dataRdd.map(line => {
val lineArr = line.split(" ")
val name = lineArr(0)
val age = lineArr(1).toInt
val score = lineArr(2).toInt
(name,age,score)
})
//排序,传入一个排序规则,不会改变数据结构
val sort: RDD[(String, Int, Int)] = dataMapRDD.sortBy(p => Person(p._2,p._3))
println(sort.collect().toBuffer)
sc.stop()
}
}
/**
* 使用case 类不需要实现序列化就可以实现排序,比上一个更方便,而且只需要关注需要使用的参数
*/
case class Person(age: Int, score: Int) extends Ordered[Person]{
//重写排序规则
override def compare(that: Person): Int = {
if (this.score == that.score) {
this.age - that.age
} else {
-(this.score - that.score)
}
}
}
结果 :
ArrayBuffer((ws,18,150), (cb,19,150), (tt,19,145), (nn,20,130), (bb,16,120))