foreachRDD:将于把数据写出到外部存储
- 1.需求
使用foreachRDD写入到MySQL中 - 2.代码实现
import java.util.Properties
import org.apache.spark.SparkConf
import org.apache.spark.sql.SparkSession
import org.apache.spark.streaming.{
Seconds, StreamingContext}
object OutDemo1 {
val props = new Properties()
props.setProperty("user","root")
props.setProperty("password","root")
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setMaster("local[*]").setAppName("Transform")
val ssc = new StreamingContext(conf, Seconds(3))
ssc.checkpoint("ck2")
ssc
.socketTextStream("hadoop102", 9999)
.flatMap(_.split("\\W+"))
.map((_, 1))
.updateStateByKey((seq: Seq[Int], opt: Option[Int])=>Some(seq.sum+opt.getOrElse(0)))
/* .foreachRDD(rdd=>{
rdd.foreachPartition(it=>{
//连接到mysqk
//写数据
//关闭mysql
})
})*/
.foreachRDD(rdd=>{
//把rdd转成df
//1.先创建sparkSession
val spark = SparkSession.builder()
.config(rdd.sparkContext.getConf)
.getOrCreate()
import spark.implicits._
//2.转换
val df = rdd.toDF("word", "count")
//3.写
df.write.mode("overwrite").jdbc("jdbc:mysql://hadoop102:3306/test","word1602",props)
})
ssc.start()
ssc.awaitTermination()
}
}
- hadoop102上启动 netcat
nc -lk 9999
- 查看结果