inner join 相当于全局窗口,之前的消息也一直保存着,来了一条能关联上的消息,则输出新消息的笛卡尔积!
package SQL;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.source.SourceFunction;
import org.apache.flink.table.api.EnvironmentSettings;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.java.StreamTableEnvironment;
import org.apache.flink.types.Row;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Date;
import java.util.Random;
import java.util.concurrent.TimeUnit;
/**
* @Author you guess
* @Date 2021/1/6 15:22
* @Version 1.0
* @Desc
*/
public class DataStreamSql1_Join {
private static final Logger LOG = LoggerFactory.getLogger(MinMinByMaxMaxBy.MinMinByMaxMaxByTest.class);
private static final String[] TYPE = {"a苹果", "b梨", "c西瓜", "d葡萄", "e火龙果"};
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
EnvironmentSettings envSettings = EnvironmentSettings.newInstance().useOldPlanner().inStreamingMode().build();
StreamTableEnvironment stEnv = StreamTableEnvironment.create(env, envSettings);
//添加自定义数据源,每秒发出一笔订单信息{商品名称,商品数量}
DataStreamSource<Order> orderSourceA = env.addSource(new SourceFunction<Order>() {
private volatile boolean isRunning = true;
private final Random random = new Random();
@Override
public void run(SourceContext<Order> ctx) throws Exception {
while (isRunning) {
TimeUnit.SECONDS.sleep(1);
Order order = new Order(TYPE[random.nextInt(TYPE.length)], Long.valueOf(random.nextInt(10)));
System.out.println(new Date() + ",orderSourceA提交元素:" + order);
ctx.collect(order);
}
}
@Override
public void cancel() {
isRunning = false;
}
}, "order-infoA");
DataStreamSource<Order> orderSourceB = env.addSource(new SourceFunction<Order>() {
private volatile boolean isRunning = true;
private final Random random = new Random();
@Override
public void run(SourceContext<Order> ctx) throws Exception {
while (isRunning) {
TimeUnit.SECONDS.sleep(1);
Order order = new Order(TYPE[random.nextInt(TYPE.length)], Long.valueOf(random.nextInt(10)));
System.out.println(new Date() + ",orderSourceB提交元素:" + order);
ctx.collect(order);
}
}
@Override
public void cancel() {
isRunning = false;
}
}, "order-infoB");
stEnv.registerDataStream("tableA", orderSourceA);
stEnv.registerDataStream("tableB", orderSourceB);
Table result = stEnv.sqlQuery("SELECT A.name,A.qtty,B.qtty from tableA A inner join tableB B on A.name = B.name");
stEnv.toRetractStream(result, Row.class).print();//这里要用Row类型
env.execute("Flink Streaming Java API Skeleton");
}
/**
* Simple POJO.
*/
public static class Order {
public String name;
public Long qtty;
public Order() {
}
public Order(String name, Long qtty) {
this.name = name;
this.qtty = qtty;
}
@Override
public String toString() {
return "Order{" +
"name='" + name + '\'' +
", qtty=" + qtty +
'}';
}
}
}
复制代码
Wed Jan 06 15:28:04 CST 2021,orderSourceA提交元素:Order{name='d葡萄', qtty=9}
Wed Jan 06 15:28:04 CST 2021,orderSourceB提交元素:Order{name='b梨', qtty=8}
Wed Jan 06 15:28:05 CST 2021,orderSourceA提交元素:Order{name='b梨', qtty=4}
Wed Jan 06 15:28:05 CST 2021,orderSourceB提交元素:Order{name='b梨', qtty=6}
5> (true,b梨,4,8)
5> (true,b梨,4,6)
Wed Jan 06 15:28:06 CST 2021,orderSourceA提交元素:Order{name='a苹果', qtty=3}
Wed Jan 06 15:28:06 CST 2021,orderSourceB提交元素:Order{name='e火龙果', qtty=1}
Wed Jan 06 15:28:07 CST 2021,orderSourceA提交元素:Order{name='e火龙果', qtty=4}
Wed Jan 06 15:28:07 CST 2021,orderSourceB提交元素:Order{name='a苹果', qtty=6}
9> (true,a苹果,3,6)

4> (true,e火龙果,4,1)
Wed Jan 06 15:28:08 CST 2021,orderSourceB提交元素:Order{name='e火龙果', qtty=9}
Wed Jan 06 15:28:08 CST 2021,orderSourceA提交元素:Order{name='e火龙果', qtty=6}
4> (true,e火龙果,4,9)
4> (true,e火龙果,6,1)
4> (true,e火龙果,6,9)
Wed Jan 06 15:28:09 CST 2021,orderSourceA提交元素:Order{name='e火龙果', qtty=1}
Wed Jan 06 15:28:09 CST 2021,orderSourceB提交元素:Order{name='e火龙果', qtty=2}
4> (true,e火龙果,4,2)
4> (true,e火龙果,6,2)
4> (true,e火龙果,1,1)
4> (true,e火龙果,1,2)
4> (true,e火龙果,1,9)
Wed Jan 06 15:28:10 CST 2021,orderSourceA提交元素:Order{name='c西瓜', qtty=5}
Wed Jan 06 15:28:10 CST 2021,orderSourceB提交元素:Order{name='e火龙果', qtty=6}
4> (true,e火龙果,4,6)
4> (true,e火龙果,6,6)
4> (true,e火龙果,1,6)
Wed Jan 06 15:28:11 CST 2021,orderSourceA提交元素:Order{name='c西瓜', qtty=6}
Wed Jan 06 15:28:11 CST 2021,orderSourceB提交元素:Order{name='c西瓜', qtty=0}
8> (true,c西瓜,5,0)
8> (true,c西瓜,6,0)
Wed Jan 06 15:28:12 CST 2021,orderSourceA提交元素:Order{name='b梨', qtty=2}
Wed Jan 06 15:28:12 CST 2021,orderSourceB提交元素:Order{name='d葡萄', qtty=7}//之前没有与orderSourceA提交元素:Order{name='d葡萄', qtty=9} 相匹配上的,所以没输出,现在才能匹配上!!
3> (true,d葡萄,9,7)
5> (true,b梨,2,8)
5> (true,b梨,2,6)
Wed Jan 06 15:28:13 CST 2021,orderSourceB提交元素:Order{name='d葡萄', qtty=1}
Wed Jan 06 15:28:13 CST 2021,orderSourceA提交元素:Order{name='d葡萄', qtty=5}
3> (true,d葡萄,9,1)
3> (true,d葡萄,5,1)
3> (true,d葡萄,5,7)
Wed Jan 06 15:28:14 CST 2021,orderSourceB提交元素:Order{name='d葡萄', qtty=5}
Wed Jan 06 15:28:14 CST 2021,orderSourceA提交元素:Order{name='e火龙果', qtty=9}
3> (true,d葡萄,5,5)
3> (true,d葡萄,9,5)
4> (true,e火龙果,9,6)
4> (true,e火龙果,9,1)
4> (true,e火龙果,9,2)
4> (true,e火龙果,9,9)
Wed Jan 06 15:28:15 CST 2021,orderSourceB提交元素:Order{name='e火龙果', qtty=5} //每次新来一个元素,若能匹配上,都是输出笛卡尔积
Wed Jan 06 15:28:15 CST 2021,orderSourceA提交元素:Order{name='e火龙果', qtty=4} //每次新来一个元素,若能匹配上,都是输出笛卡尔积
4> (true,e火龙果,4,5)
4> (true,e火龙果,6,5)
4> (true,e火龙果,1,5)
4> (true,e火龙果,9,5)
4> (true,e火龙果,4,5)
4> (true,e火龙果,4,6)
4> (true,e火龙果,4,1)
4> (true,e火龙果,4,2)
4> (true,e火龙果,4,9)
不同flink版本的语法差别挺大:
<flink.version>1.9.2</flink.version>
复制代码
jdk1.8