版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/lv_yishi/article/details/84074125
import org.apache.spark.sql.{DataFrame, Dataset, SparkSession}
object StructuredStreamingDemo {
def main(args: Array[String]): Unit = {
//获取程序入口sparkSession
val sparksession = SparkSession.builder().appName("StructuredStreamingDemo")
.master("local[2]").getOrCreate()
import sparksession.implicits._
val lines: DataFrame = sparksession.readStream.format("socket")
.option("host", "192.168.88.130")
.option("port", "6666").load()
val lineds: Dataset[String] = lines.as[String]
val res = lineds.flatMap(_.split(" ")).groupBy("value").count()
//将结果写到控制台
val res2 = res.writeStream.outputMode("complete").format("console").start()
res2.awaitTermination()
}
}