flink实现简单的词频统计实验

总体流程

1.在linux开启nc服务,数据源来源
2.利用scala开发flink词频统计服务
3.统计结果落入mysql数据库

安装nc服务

yum install nc
nc -l 7777

本地词频统计服务

package com.gt.wordcount
import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
import org.apache.flink.api.scala._
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction
import org.apache.flink.streaming.api.windowing.assigners.TumblingProcessingTimeWindows
import org.apache.flink.streaming.api.windowing.time.Time


import java.sql.{Connection, DriverManager, PreparedStatement}


object StreamWordCount {


  def main(args: Array[String]): Unit = {


    // 1.创建流处理运行环境
    val env = StreamExecutionEnvironment.getExecutionEnvironment


    // 2.创建DataStream Source来源
    val inputDataStream: DataStream[String] = env.socketTextStream("hadoop001", 7777)


    //3.开发Transformation转换逻辑
    val wcStream = inputDataStream.flatMap(_.split("\\s"))
      .filter(_.nonEmpty)
      .map((_,1))
      .keyBy(0)
      .window(TumblingProcessingTimeWindows.of(Time.seconds(3)))
      .sum(1)


    //4.添加Sink接收器
    wcStream.addSink(new MySqlSink)


    // 5.启动程序
    env.execute("Stream Word Count")
  }
}


/**
 * 自定义MysqlSink
 *
 * CREATE TABLE word (
 *    word varchar(40) comment '单词',
 *    cnt  int(20)      comment '单词数量'
 *  ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
 *
 */
class MySqlSink extends RichSinkFunction[(String, Int)] {


  var connection: Connection = null;
  var ps: PreparedStatement = null;


  override def open(parameters: Configuration): Unit = {


    // 1.加载MySql驱动
    Class.forName("com.mysql.jdbc.Driver")
    // 2.创建连接
    connection = DriverManager.getConnection("jdbc:mysql://hadoop002/flink?useSSL=false", "root", "hadoop")
    // 3.创建PreparedStatement
    val sql = "insert into word(word,cnt) values(?,?)"
    ps = connection.prepareStatement(sql)
  }


  override def invoke(value: (String,Int)): Unit = {
    // 执行插入
    ps.setString(1,value._1)
    ps.setInt(2,value._2)
    ps.executeUpdate()
  }




  override def close(): Unit = {
    // 关闭连接
    if(connection!=null){
      connection.close()
    }
    if(ps!=null){
      ps.close()
    }
  }
}

mysql表结构

create table word
(
    word varchar(100) null,
    cnt  int          null
);

运行程序,然后在nc端输入数据,查看mysql结果

a5d1e21235d3af50fc71b6e31f8eded3.png

51a7055918cd444ccc3fb0e06dd8b441.png

猜你喜欢

转载自blog.csdn.net/dot_life/article/details/123366952