总体流程
1.在linux开启nc服务,数据源来源
2.利用scala开发flink词频统计服务
3.统计结果落入mysql数据库
安装nc服务
yum install nc
nc -l 7777
本地词频统计服务
package com.gt.wordcount
import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
import org.apache.flink.api.scala._
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction
import org.apache.flink.streaming.api.windowing.assigners.TumblingProcessingTimeWindows
import org.apache.flink.streaming.api.windowing.time.Time
import java.sql.{Connection, DriverManager, PreparedStatement}
object StreamWordCount {
def main(args: Array[String]): Unit = {
// 1.创建流处理运行环境
val env = StreamExecutionEnvironment.getExecutionEnvironment
// 2.创建DataStream Source来源
val inputDataStream: DataStream[String] = env.socketTextStream("hadoop001", 7777)
//3.开发Transformation转换逻辑
val wcStream = inputDataStream.flatMap(_.split("\\s"))
.filter(_.nonEmpty)
.map((_,1))
.keyBy(0)
.window(TumblingProcessingTimeWindows.of(Time.seconds(3)))
.sum(1)
//4.添加Sink接收器
wcStream.addSink(new MySqlSink)
// 5.启动程序
env.execute("Stream Word Count")
}
}
/**
* 自定义MysqlSink
*
* CREATE TABLE word (
* word varchar(40) comment '单词',
* cnt int(20) comment '单词数量'
* ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
*
*/
class MySqlSink extends RichSinkFunction[(String, Int)] {
var connection: Connection = null;
var ps: PreparedStatement = null;
override def open(parameters: Configuration): Unit = {
// 1.加载MySql驱动
Class.forName("com.mysql.jdbc.Driver")
// 2.创建连接
connection = DriverManager.getConnection("jdbc:mysql://hadoop002/flink?useSSL=false", "root", "hadoop")
// 3.创建PreparedStatement
val sql = "insert into word(word,cnt) values(?,?)"
ps = connection.prepareStatement(sql)
}
override def invoke(value: (String,Int)): Unit = {
// 执行插入
ps.setString(1,value._1)
ps.setInt(2,value._2)
ps.executeUpdate()
}
override def close(): Unit = {
// 关闭连接
if(connection!=null){
connection.close()
}
if(ps!=null){
ps.close()
}
}
}
mysql表结构
create table word
(
word varchar(100) null,
cnt int null
);
运行程序,然后在nc端输入数据,查看mysql结果