demand:
On student achievement in each class, take out the top three. (Packets take TopN)
Implementation steps:
- Traversing each group, each group get results
- Converting a set of results into a buffer array
- Descending sort buffer array
- The array buffer sorted whichever first three
- Print output
GroupTop3 {Object DEF main (args: Array [String]): Unit = { ... the Spark Val = SparkSession.builder () appName ( "GroupTop3") Master ( "local") getOrCreate () // Create the initial RDD Val Lines spark.sparkContext.textFile = ( "D: \\ score.txt") // initial segmentation of the text line by RDD space key mapped to key-value pair val pairs = lines.map (line => (line. Split ( "") (0), line.split ( "") (. 1) .toInt)) // pairs of key value pairs packet Val groupedPairs pairs.groupByKey = () // Get each packet 3 before and after results Val top3Score = groupedPairs.map (classScores => { var className = classScores._1 // Get results of each group to convert it into a buffer array, in accordance descending order, whichever three var top3 = classScores ._2.toBuffer.sortWith (_> _).take(3) Tuple2(className,top3) }) top3Score.foreach(m => { println(m._1) for(s <- m._2) println(s) println("------------------") }) }