自定义排序 II case class类排序,避免序列化

上一篇 : 自定义排序 I

package com.ws.demo
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}

/**
  * 自定义排序2
  */
object CustomSort2 {

  def main(args: Array[String]): Unit = {
    val conf = new SparkConf().setAppName("CustomSort2").setMaster("local[4]")

    val sc = new SparkContext(conf)

    val data = Array("ws 18 150", "tt 19 145", "nn 20 130", "bb 16 120", "cb 19 150")

    val dataRdd: RDD[String] = sc.parallelize(data)

    //切数据
    val dataMapRDD: RDD[(String,Int,Int)] = dataRdd.map(line => {
      val lineArr = line.split(" ")
      val name = lineArr(0)
      val age = lineArr(1).toInt
      val score = lineArr(2).toInt
      (name,age,score)
    })

    //排序,传入一个排序规则,不会改变数据结构
    val sort: RDD[(String, Int, Int)] = dataMapRDD.sortBy(p => Person(p._2,p._3))

    println(sort.collect().toBuffer)

    sc.stop()
  }
}

/**
  * 使用case 类不需要实现序列化就可以实现排序,比上一个更方便,而且只需要关注需要使用的参数
  */
case class Person(age: Int, score: Int) extends Ordered[Person]{

  //重写排序规则
  override def compare(that: Person): Int = {
    if (this.score == that.score) {
      this.age - that.age
    } else {
      -(this.score - that.score)
    }
  }
}

结果 :

ArrayBuffer((ws,18,150), (cb,19,150), (tt,19,145), (nn,20,130), (bb,16,120))

猜你喜欢

转载自blog.csdn.net/bb23417274/article/details/82937792