Spark 核心编程(10)-Top N

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/u012292754/article/details/86023645

1 TopN

1.1 对文件内数字,取最大的前 3 个

在这里插入图片描述

  • Java 版本
package topn;

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.PairFunction;
import org.codehaus.janino.Java;
import scala.Tuple2;

import java.util.List;

public class Top3 {
    public static void main(String[] args) {
        SparkConf conf = new SparkConf().setAppName("Top3").setMaster("local");
        JavaSparkContext sc = new JavaSparkContext(conf);

        JavaRDD<String> lines = sc.textFile("D:/topn.txt");

        JavaPairRDD<Integer, String> pairs = lines.mapToPair(new PairFunction<String, Integer, String>() {
            @Override
            public Tuple2<Integer, String> call(String s) throws Exception {
                return new Tuple2<>(Integer.valueOf(s), s);
            }
        });

        JavaPairRDD<Integer, String> sortedPairs = pairs.sortByKey(false);

        JavaRDD<Integer> sortedNums = sortedPairs.map(new Function<Tuple2<Integer, String>, Integer>() {
            @Override
            public Integer call(Tuple2<Integer, String> v1) throws Exception {
                return v1._1;
            }
        });

        List<Integer> list = sortedNums.take(3);
        for (Integer e : list) {
            System.out.println(e);
        }

    }
}

  • Scala 版本
import org.apache.spark.{SparkConf, SparkContext}

object Top3 {
    def main(args: Array[String]): Unit = {
        val conf = new SparkConf().setAppName("Top3").setMaster("local")
        val sc = new SparkContext(conf)

        val lines = sc.textFile("D:/topn.txt")
        val pairs = lines.map(line=>(line.toInt,line))
        val sortedPairs = pairs.sortByKey(false)

        val sortedNums = sortedPairs.map(_._1)

        val top3 = sortedNums.take(3)

        top3.foreach(println)

    }
}

在这里插入图片描述

2 对每个班级内的学生成绩,取出前3

  • 分组取 topN
    在这里插入图片描述

2.1 Java 版本

package topn;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.api.java.function.VoidFunction;
import scala.Tuple2;

import java.util.Arrays;
import java.util.Iterator;
;

public class GroupTop3 {
    public static void main(String[] args) {
        SparkConf conf = new SparkConf().setAppName("Top3").setMaster("local");
        JavaSparkContext sc = new JavaSparkContext(conf);

        JavaRDD<String> lines = sc.textFile("D:/scores.txt");

        JavaPairRDD<String, Integer> pairs = lines.mapToPair(new PairFunction<String, String, Integer>() {
            @Override
            public Tuple2<String, Integer> call(String s) throws Exception {
                String[] lineSplited = s.split(" ");
                return new Tuple2<>(lineSplited[0], Integer.valueOf(lineSplited[1]));
            }
        });

        JavaPairRDD<String, Iterable<Integer>> groupedPairs = pairs.groupByKey();

        JavaPairRDD<String, Iterable<Integer>> top3Score = groupedPairs.mapToPair(new PairFunction<Tuple2<String, Iterable<Integer>>, String, Iterable<Integer>>() {
            @Override
            public Tuple2<String, Iterable<Integer>> call(Tuple2<String, Iterable<Integer>> classScores) throws Exception {

                Integer[] top3 = new Integer[3];
                String className = classScores._1;
                Iterator<Integer> scores = classScores._2.iterator();

                while (scores.hasNext()) {
                    Integer score = scores.next();

                    for (int i = 0; i < 3; i++) {
                        if (top3[i] == null) {
                            top3[i] = score;
                            break;
                        } else if (score > top3[i]) { //后移一位
                            for (int j = 2; j > i; j--) {
                                top3[j] = top3[j - 1];
                            }

                            top3[i] = score;
                            break;
                        }
                    }
                }

                return new Tuple2<String, Iterable<Integer>>(className, Arrays.asList(top3));
            }
        });

        top3Score.foreach(new VoidFunction<Tuple2<String, Iterable<Integer>>>() {
            @Override
            public void call(Tuple2<String, Iterable<Integer>> t) throws Exception {
                System.out.println("calss: " + t._1);

                Iterator<Integer> it = t._2.iterator();
                while (it.hasNext()) {
                    Integer score = it.next();
                    System.out.println(score);
                }

                System.out.println("==============================");
            }
        });

    }
}

在这里插入图片描述

猜你喜欢

转载自blog.csdn.net/u012292754/article/details/86023645