在前面的文章中,博主分享了flink的四大assigner,有需要的发烧友点击链接https://blog.csdn.net/qq_44962429/article/details/112912432查询,欢迎指出问题,交流分享!
本文主要介绍flink的window functions
window functions
当系统认定窗口就绪之后会调用Window Functions对窗口实现聚合计算。常见的Window Functions有以下形式: ReduceFunction
, AggregateFunction
, FoldFunction
或者ProcessWindowFunction
|WindowFunction(古董|旧版)
。
(1)ReduceFunction
class SumReduceFunction extends ReduceFunction[(String,Int)]{
override def reduce(v1: (String, Int), v2: (String, Int)): (String, Int) = {
(v1._1,v1._2+v2._2)
}
}
var env=StreamExecutionEnvironment.getExecutionEnvironment
env.socketTextStream("centos",9999)
.flatMap(_.split("\\s+"))
.map((_,1))
.keyBy(0)
.window(TumblingProcessingTimeWindows.of(Time.seconds(5)))
.reduce(new SumReduceFunction)// .reduce((v1,v2)=>(v1._1,v1._2+v2._2))
.print()
env.execute("window")
(2) AggregateFunction
class SumAggregateFunction extends AggregateFunction[(String,Int),(String,Int),(String,Int)]{
override def createAccumulator(): (String,Int) = {
("",0)
}
override def merge(a: (String,Int), b: (String,Int)): (String,Int) = {
(a._1,a._2+b._2)
}
override def add(value: (String, Int), accumulator: (String,Int)): (String,Int) = {
(value._1,accumulator._2+value._2)
}
override def getResult(accumulator: (String,Int)): (String, Int) = {
accumulator
}
}
var env=StreamExecutionEnvironment.getExecutionEnvironment
env.socketTextStream("CentOS",9999)
.flatMap(_.split("\\s+"))
.map((_,1))
.keyBy(0)
.window(TumblingProcessingTimeWindows.of(Time.seconds(5)))
.aggregate(new SumAggregateFunction)
.print()
env.execute("window")
(3) FoldFunction
class SumFoldFunction extends FoldFunction[(String,Int),(String,Long)]{
override def fold(accumulator: (String, Long), value: (String, Int)): (String, Long) = {
(value._1,accumulator._2+value._2)
}
}
var env=StreamExecutionEnvironment.getExecutionEnvironment
env.socketTextStream("centos",8877)
.flatMap(_.split("\\s+"))
.map((_,1))
.keyBy(0)
.window(TumblingProcessingTimeWindows.of(Time.seconds(1)))
//.fold(("",0L),new SumFoldFunction)
.fold(("",0L))((acc,v)=>(v._1,acc._2+v._2))
.print()
env.execute("window")
(4) ProcessWindowFunction
var env=StreamExecutionEnvironment.getExecutionEnvironment
env.socketTextStream("centos",7788)
.flatMap(_.split("\\s+"))
.map((_,1))
.keyBy(_._1)
.window(SlidingProcessingTimeWindows.of(Time.seconds(4),Time.seconds(2)))
.process(new ProcessWindowFunction[(String,Int),(String,Int),String,TimeWindow]{
override def process(key: String, context: Context,
elements: Iterable[(String, Int)],
out: Collector[(String,Int)]): Unit = {
val results = elements.reduce((v1,v2)=>(v1._1,v1._2+v2._2))
out.collect(results)
}
}).print()
env.execute("window")
ProcessWindowFunction可以获取到窗口有关的信息,如:窗口的起始时间,终止时间等等,还可以获取到 globalState() 和 windowState()
globalState()
, 允许访问不限于窗口的键控状态windowState()
, 允许访问也仅限于于窗口的键控状态
var env=StreamExecutionEnvironment.getExecutionEnvironment
val globalTag = new OutputTag[(String,Int)]("globalTag")
val countsStream = env.socketTextStream("centos", 7788)
.flatMap(_.split("\\s+"))
.map((_, 1))
.keyBy(_._1)
.window(TumblingProcessingTimeWindows.of(Time.seconds(4), Time.seconds(2)))
.process(new ProcessWindowFunction[(String, Int), (String, Int), String, TimeWindow] {
var wvds: ValueStateDescriptor[Int] = _
var gvds: ValueStateDescriptor[Int] = _
override def open(parameters: Configuration): Unit = {
wvds = new ValueStateDescriptor[Int]("window-value", createTypeInformation[Int])
gvds = new ValueStateDescriptor[Int]("global-value", createTypeInformation[Int])
}
override def process(key: String, context: Context,
elements: Iterable[(String, Int)],
out: Collector[(String, Int)]): Unit = {
val total = elements.map(_._2).sum
val ws = context.windowState.getState(wvds)
val gs=context.globalState.getState(gvds)
val historyWindowValue = ws.value()
val historyGlobalValue = gs.value()
out.collect((key, historyWindowValue + total))
context.output(globalTag, (key, historyGlobalValue + total))
ws.update(historyWindowValue + total)
gs.update(historyGlobalValue + total)
}
})
countsStream.print("窗口统计")
countsStream.getSideOutput(globalTag).print("全局输出")
env.execute("window")
注意:ProcessWindowFunction对于窗口的每哥元素都会进行循环遍历,这样效率很低,在实际生产中,还可以配合其他function进行一起使用
(5)ReduceFunction+ProcessWindowFunction
var env=StreamExecutionEnvironment.getExecutionEnvironment
val globalTag = new OutputTag[(String,Int)]("globalTag")
val countsStream = env.socketTextStream("centos", 7788)
.flatMap(_.split("\\s+"))
.map((_, 1))
.keyBy(_._1)
.window(TumblingProcessingTimeWindows.of(Time.seconds(4), Time.seconds(2)))
.reduce(new SumReduceFunction,new ProcessWindowFunction[(String, Int), (String, Int), String, TimeWindow] {
override def process(key: String, context: Context,
elements: Iterable[(String, Int)],
out: Collector[(String, Int)]): Unit = {
val total = elements.map(_._2).sum
out.collect((key, total))
}
})
countsStream.print("窗口统计")
countsStream.getSideOutput(globalTag).print("全局输出")
env.execute("window")
(5) FoldFunction+ProcessWindowFunction
var env=StreamExecutionEnvironment.getExecutionEnvironment
val countsStream = env.socketTextStream("centos", 7788)
.flatMap(_.split("\\s+"))
.map((_, 1))
.keyBy(_._1)
.window(TumblingProcessingTimeWindows.of(Time.seconds(4), Time.seconds(2)))
.fold(("",0L),new SumFoldFunction,new ProcessWindowFunction[(String, Long), (String, Long), String, TimeWindow] {
override def process(key: String, context: Context,
elements: Iterable[(String, Long)],
out: Collector[(String, Long)]): Unit = {
val total = elements.map(_._2).sum
out.collect((key, total))
}
}).print()
env.execute("window")
(6) WindowFunction(不常用)
一般用ProcessWindowFunction替代
env.socketTextStream("centos",7788)
.flatMap(_.split("\\s+"))
.map((_,1))
.keyBy(_._1) //不能按照position进行keyBy()
.window(TumblingProcessingTimeWindows.of(Time.seconds(1)))
.apply(new WindowFunction[(String,Int),(String,Int),String,TimeWindow] {
override def apply(key: String,
window: TimeWindow,
input: Iterable[(String, Int)],
out: Collector[(String, Int)]): Unit = {
out.collect((key,input.map(_._2).sum))
}
}).print()
env.execute("window")