Flink示例——Table、SQL

Flink示例——Table、SQL

版本信息

产品 版本
Flink 1.7.2
Java 1.8.0_231
Scala 2.11.12

Mavan依赖

  • pom.xml 依赖部分
    <dependency>
        <groupId>org.apache.flink</groupId>
        <artifactId>flink-java</artifactId>
        <version>${flink.version}</version>
    </dependency>
    <dependency>
        <groupId>org.apache.flink</groupId>
        <artifactId>flink-streaming-java_2.11</artifactId>
        <version>${flink.version}</version>
    </dependency>
    <dependency>
        <groupId>org.apache.flink</groupId>
        <artifactId>flink-clients_2.11</artifactId>
        <version>${flink.version}</version>
    </dependency>
    <!-- Table、SQL -->
    <dependency>
        <groupId>org.apache.flink</groupId>
        <artifactId>flink-table_2.11</artifactId>
        <version>${flink.version}</version>
    </dependency>
    <!-- TableAPI的代码由Scala编写,因此必须导入相关包 -->
    <dependency>
        <groupId>org.apache.flink</groupId>
        <artifactId>flink-scala_2.11</artifactId>
        <version>${flink.version}</version>
    </dependency>
    <dependency>
        <groupId>org.apache.flink</groupId>
        <artifactId>flink-streaming-scala_2.11</artifactId>
        <version>${flink.version}</version>
    </dependency>
    

自定义SourceFunction

  • 提供一个SourceFunction,方便后面测试
    public class CustomSourceFunction extends RichSourceFunction<String> {
    
        private boolean flag = true;
    
        private long idAdder = 0L;
    
        @Override
        public void run(SourceContext<String> ctx) throws Exception {
            List<String> nameList = Arrays.asList("xiaowang", "lilei", "yangyang", "zhangsan", "lisi", "wangwu", "meimei");
            List<String> addressList = Arrays.asList("beijing", "chongqing", "shanghai", "nanjing", "chengdu", "guangzhou");
            Random random = new Random();
            while (flag) {
                Thread.sleep(100);
    
                // 模拟数据 id,name,age,address,eventTime
                long id = idAdder++;
                String name = nameList.get(random.nextInt(nameList.size()));
                int age = random.nextInt(20) + 10;
                String address = addressList.get(random.nextInt(addressList.size()));
                // 随机模拟业务时间
                long eventTime = System.currentTimeMillis() + (random.nextInt(nameList.size()) - nameList.size() / 2) * 1000;
    
                ctx.collect(id + "," + name+ "," + age + "," + address + "," + eventTime);
            }
        }
    
        @Override
        public void cancel() {
            flag = false;
        }
    
    }
    

TableAPI、SQL 简单示例

  • 使用Tuple方式转换DataStrem(较繁琐)
    public class SimpleDemo {
    
        public static void main(String[] args) {
            // Environment
            StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
            env.setParallelism(3);
            // 获取TableEnvironment
            StreamTableEnvironment tableEnv = StreamTableEnvironment.getTableEnvironment(env);
    
            // 自定义数据源
            CustomSourceFunction sourceFunction = new CustomSourceFunction();
            DataStreamSource<String> customDS = env.addSource(sourceFunction);
    
            // 使用TableAPI处理DataStream
    
            // 使用Tuple方式,较繁琐
            // returns必须写
            DataStream<Tuple5<Long, String, Integer, String, Long>> personDS = customDS.map(line -> {
                String[] fields = line.split(",");
                long id = Long.parseLong(fields[0]);
                String name = fields[1];
                int age = Integer.parseInt(fields[2]);
                String address = fields[3];
                long eventTime = Long.parseLong(fields[4]);
                
                return Tuple5.of(id, name, age, address, eventTime);
            }).returns(Types.TUPLE(Types.LONG, Types.STRING, Types.INT, Types.STRING, Types.LONG));
    
            // 将DataStream转为Table
            Table srcTable = tableEnv.fromDataStream(personDS, "id, name, age, address, eventTime");
            // 打印Schema
            srcTable.printSchema();
    
            // 注意此处SQL较标准SQL不同
            // 数字20不可写为'20'
            // &&不可写为AND
            Table table  = srcTable
                    .filter("age > 20 && age < 25")
                    .select("name, age, address");
            // 将Table转为DataStream
            tableEnv.toAppendStream(table, Row.class)
                    .print();
    
            try {
                env.execute();
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    
    }
    
  • 使用POJO方式转换DataStrem(需要定义POJO类)
    • POJO类 Person
      public class Person {
      
          // 字段必须public
          public long id;
          public String name;
          public int age;
          public String address;
          public long eventTime;
      
          // 必须声明public的无参构造
          public Person() {
          }
      
          public Person(long id, String name, int age, String address, long eventTime) {
              this.id = id;
              this.name = name;
              this.age = age;
              this.address = address;
              this.eventTime = eventTime;
          }
      
          /**
           * 定义一个工厂方法,用于解析CSV格式的数据
           * @param line 逗号分隔的csv格式数据,
           * @return 对象 {@link Person}
           */
          public static Person parseCSV(String line) {
              String[] fields = line.split(",");
              long id = Long.parseLong(fields[0]);
              String name = fields[1];
              int age = Integer.parseInt(fields[2]);
              String address = fields[3];
              long eventTime = Long.parseLong(fields[4]);
      
              return new Person(id, name, age, address, eventTime);
          }
      
      }
      
    • 处理代码
      // 使用POJO方式转换DataStrem(需要定义POJO类)
      // 处理,解析一行数据为Person对象
      DataStream<Person> personDS = customDS.map(Person::parseCSV);
      
      Table table = tableEnv.fromDataStream(personDS)
              .filter("age > 20 && age < 25")
              .select("name, age, address");
      
      tableEnv.toAppendStream(table, Row.class)
              .print();
      
  • 使用SQL处理DataStream
    DataStream<Person> personDS = customDS.map(Person::parseCSV);
    // 将personDS注册为一张表
    tableEnv.registerDataStream("tb_person", personDS);
    // 此处必须写AND,不能用&&
    Table table = tableEnv.sqlQuery(
            "SELECT name, age, address FROM tb_person WHERE age > '20' AND age < '25'"
    );
    tableEnv.toAppendStream(table, Row.class).print();
    

TableAPI、SQL 窗口聚合示例

  • 代码 WindowAggDemo
    public class WindowAggDemo {
    
        public static void main(String[] args) {
            // Environment
            StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
            env.setParallelism(3);
            // 获取TableEnvironment
            StreamTableEnvironment tableEnv = StreamTableEnvironment.getTableEnvironment(env);
    
            // 自定义数据源
            CustomSourceFunction sourceFunction = new CustomSourceFunction();
            DataStreamSource<String> customDS = env.addSource(sourceFunction);
    
            // 处理,解析一行数据为Person对象
            DataStream<Person> personDS = customDS.map(Person::parseCSV);
    
            // 将DataStream转为Table
            // UserActionTime.proctime表示使用处理时间
            // UserActionTime.rowtime表示事件时间
            Table srcTable = tableEnv.fromDataStream(personDS, "id, name, age, address, eventTime, UserActionTime.proctime");
    
            // 创建窗口,5秒一个窗口,将UserActionTime字段作为时间,窗口别名为myWindow
            Window myWindow = Tumble.over("5.seconds").on("UserActionTime").as("myWindow");
            // 窗口+聚合
            Table table = srcTable.window(myWindow)
                    .groupBy("myWindow, address") // 第一个字段为窗口别名
                    .select("address, age.avg"); // age.avg即对年龄求均值
    
            // 输出
            tableEnv.toRetractStream(table, Row.class)
                    .filter(tuple2 -> tuple2.f0) // 只要新增数据
                    .print();
    
            try {
                env.execute();
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    
    }
    
发布了146 篇原创文章 · 获赞 54 · 访问量 17万+

猜你喜欢

转载自blog.csdn.net/alionsss/article/details/104282584