RDD
CalculateScoreSum.scala
package net.sherry.rdd
import org.apache.spark.{
SparkConf, SparkContext}
object CalculateScoreSum {
def main(args: Array[String]): Unit = {
val conf = new SparkConf()
.setAppName("CalculateScoreSum")
.setMaster("local[*]")
val sc = new SparkContext(conf)
val scores = List(
("张钦林", 78), ("张钦林", 90), ("张钦林", 76),
("陈燕文", 95), ("陈燕文", 88), ("陈燕文", 98),
("卢志刚", 78), ("卢志刚", 80), ("卢志刚", 60)
)
val rdd1 = sc.makeRDD(scores)
val rdd2 = rdd1.reduceByKey((x, y) => x + y)
rdd2.collect.foreach(println)
}
}
CalculateScoureSum02.scala
package net.sherry.rdd
import org.apache.spark.{
SparkConf, SparkContext}
import scala.collection.mutable.ListBuffer
object CalculateScoreSum02 {
def main(args: Array[String]): Unit = {
val conf = new SparkConf()
.setAppName("CalculateScoreSum")
.setMaster("local[*]")
val sc = new SparkContext(conf)
val scores = List(
("张钦林", 78, 90, 76),
("陈燕文", 95, 88, 98),
("卢志刚", 78, 80, 60)
)
val newScores = new ListBuffer[(String, Int)]();
scores.foreach(score => {
newScores += Tuple2(score._1, score._2)
newScores += Tuple2(score._1, score._3)
newScores += Tuple2(score._1, score._4)}
)
val rdd1 = sc.makeRDD(newScores)
val rdd2 = rdd1.reduceByKey((x, y) => x + y)
rdd2.collect.foreach(println)
}
}
WordCount.scala
package net.sherry.rdd
import org.apache.spark.{
SparkConf, SparkContext}
object WordCount {
def main(args: Array[String]): Unit = {
val conf = new SparkConf()
.setAppName("SparkRDDWordCount")
.setMaster("local[*]")
val sc = new SparkContext(conf)
var inputPath = ""
var outputPath = ""
if (args.length == 0){
val inputPath = "hdfs://hadoop102:8020/wordcount/input/words.txt"
val outputPath = "hdfs://hadoop102:8020/wordcount/output"
}else if (args.length == 2){
inputPath = args(0)
outputPath = args(1)
}else{
println("温馨提示:命令提示行参数个数只能是0或2~")
return
}
val wc = sc.textFile(inputPath)
.flatMap(_.split(" "))
.map((_,1))
.reduceByKey(_+_)
.sortBy(_._2, false)
wc.collect.foreach(println)
wc.saveAsTextFile(outputPath)
sc.stop()
}
}