SparkStreaming - 窗口函数(窗口操作)

窗口操作就是把多个采集周期设置成一个窗口,一起来计算,然后进行滑动,根据设置的滑动大小。

窗口大小和滑动大小,要是采集周期的倍数

package date_10_17_SparkStreaming

import org.apache.spark.SparkConf
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.streaming.dstream.DStream
import org.apache.spark.streaming.kafka.KafkaUtils

object ss {
  def main(args: Array[String]): Unit = {

    //Scala中的窗口
//    val ints = List(1,2,3,4,5)
//
//    val ites = ints.sliding(2,2)
//
//    for (list <- ites){
//      println(list.mkString(","))
//    }



    //SparkStreaming窗口

    val conf = new SparkConf().setAppName("wordCount").setMaster("local[*]")
    val streamingContext = new StreamingContext(conf,Seconds(3))



    streamingContext.checkpoint("cp")

    //连接kafka
    val kafkaStream = KafkaUtils.createStream(streamingContext,"chun1:2181","chun",Map("chun"->3))

    //一个是窗口大小和滑动大小,要是采集周期的倍数
    val windowDStream = kafkaStream.window(Seconds(6),Seconds(3))

    //wordcount运算
    val mapDStream = windowDStream.flatMap(_._2.split(" ")).map((_,1))

    val resultDStream = mapDStream.reduceByKey(_+_)

    resultDStream.print()

    //启动采集器
    streamingContext.start()

    //等待采集器关闭才关闭Driver
    streamingContext.awaitTermination()
  }

}

发布了83 篇原创文章 · 获赞 61 · 访问量 9184

猜你喜欢

转载自blog.csdn.net/weixin_43736084/article/details/102611196
今日推荐