版权声明:未经同意,不得转载。 https://blog.csdn.net/qq_36235275/article/details/82503372
例如各大商城实时显示数据的案例:
package com.jiangnan.spark
import org.apache.spark.SparkConf
import org.apache.spark.streaming.{Seconds, StreamingContext}
object TestStreamUpdate extends App {
//配置对象
val conf = new SparkConf().setAppName("").setMaster("local[2]")
//创建StreamContext
val ssc = new StreamingContext(conf,Seconds(5))
//重要:检查点目录的配置
ssc.sparkContext.setCheckpointDir("E:\\cd\\data")
//从socket接收数据
val lineDStream = ssc.socketTextStream("master",8888)
val words = lineDStream.flatMap(_.split(" ")).map(x=>(x,1))
//使用updateStateByKey进行状态更新,统计从运行开始以来的单词总数
val state = words.updateStateByKey[Int]((values:Seq[Int],state:Option[Int])=>{
//values应该是新的数据
val currentCount = values.foldLeft(0)(_+_)
val stateCount = state.getOrElse(0)
Some(currentCount+stateCount)
})
state.print()
ssc.start()
ssc.awaitTermination()
}