窗口操作就是把多个采集周期设置成一个窗口,一起来计算,然后进行滑动,根据设置的滑动大小。
窗口大小和滑动大小,要是采集周期的倍数
package date_10_17_SparkStreaming
import org.apache.spark.SparkConf
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.streaming.dstream.DStream
import org.apache.spark.streaming.kafka.KafkaUtils
object ss {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("wordCount").setMaster("local[*]")
val streamingContext = new StreamingContext(conf,Seconds(3))
streamingContext.checkpoint("cp")
val kafkaStream = KafkaUtils.createStream(streamingContext,"chun1:2181","chun",Map("chun"->3))
val windowDStream = kafkaStream.window(Seconds(6),Seconds(3))
val mapDStream = windowDStream.flatMap(_._2.split(" ")).map((_,1))
val resultDStream = mapDStream.reduceByKey(_+_)
resultDStream.print()
streamingContext.start()
streamingContext.awaitTermination()
}
}