Spark核心编程-WordCount

一、Java版

public class WordCountLocal {

  public static void main(String[] args) {
    SparkConf conf = new SparkConf().setAppName("wordCountLocal").setMaster("local");
    JavaSparkContext sc = new JavaSparkContext(conf);
    // 数据源
    JavaRDD<String> sourceRdd = sc.parallelize(Arrays.asList("hello world", "hello you"));
    // 拆分
    JavaRDD<String> splitedRdd = sourceRdd.flatMap(new FlatMapFunction<String, String>() {
      private static final long serialVersionUID = 1L;

      public Iterable<String> call(String row) throws Exception {
        return Arrays.asList(row.split(" "));
      }
    });
    // map
    JavaPairRDD<String, Integer> mapRdd = splitedRdd
        .mapToPair(new PairFunction<String, String, Integer>() {
          private static final long serialVersionUID = 1L;

          public Tuple2<String, Integer> call(String row) throws Exception {
            return new Tuple2<String, Integer>(row, 1);
          }
        });
    // reduce
    JavaPairRDD<String, Integer> reduceRdd = mapRdd
        .reduceByKey(new Function2<Integer, Integer, Integer>() {
          private static final long serialVersionUID = 1L;

          public Integer call(Integer v1, Integer v2) throws Exception {
            return v1 + v2;
          }
        });
    // 循环打印
    reduceRdd.foreach(new VoidFunction<Tuple2<String, Integer>>() {
      private static final long serialVersionUID = 1L;

      public void call(Tuple2<String, Integer> t) throws Exception {
        System.out.println("word:" + t._1 + ",count:" + t._2);
      }
    });
    // 关闭
    sc.close();
  }
}

二、Scala版

val conf = new SparkConf()
        .setAppName("WordCount");
val sc = new SparkContext(conf)
  
val lines = sc.textFile("hdfs://spark1:9000/spark.txt", 1); 
val words = lines.flatMap { line => line.split(" ") }   
val pairs = words.map { word => (word, 1) }   
val wordCounts = pairs.reduceByKey { _ + _ }
    
wordCounts.foreach(wordCount => println(wordCount._1 + " appeared " + wordCount._2 + " times."))

三、原理剖析

猜你喜欢

转载自blog.csdn.net/Anbang713/article/details/81588697