1. map operator (anonymous function, anonymous class, custom function, rich function four types of implementation methods)
package com.qu.udf
import com.qu.source.SensorReading
import org.apache.flink.api.common.functions.{IterationRuntimeContext, MapFunction, RichMapFunction, RuntimeContext}
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.api.scala._
import org.apache.flink.configuration.Configuration
//udf: 就是数据处理函数
// 在我们对数据做map、filter、flatMap等操作时,可以传入一个
// 1.匿名函数、2.(MapFunction、FilterFunction、FlapMapFunction)匿名类、3.自己创建的一个(MapFunction、FilterFunction、FlapMapFunction)类
// 4.富函数RichMapFunction,RichFilterFunction、RichFlapMapFunction 富函数里面包含生命周期 和 上下文内容 的方法调用 可以在方法执行前后进行一些其他操作
object MapUDFTest {
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
// 读取一个本地文件
val inputPath = "/Users/xietong/IdeaProjects/FlinkTutorial/src/main/resources/1.txt"
val inputDataSet = env.readTextFile(inputPath)
//map: 1.匿名函数
val map1 = inputDataSet.map{ line=>
SensorReading(line.split(",")(0),line.split(",")(1).toLong,line.split(",")(2).toDouble)
}
map1.print("udf map 匿名函数:")
//map: 2.匿名类
val map2 = inputDataSet.map {
new MapFunction[String, SensorReading] {
override def map(line: String): SensorReading = {
SensorReading(line.split(",")(0), line.split(",")(1).toLong, line.split(",")(2).toDouble)
}
}
}
map2.print("udf map MapFunction匿名类:")
//map: 3.自定义MapFunction
val map3 = inputDataSet.map( new MyMapFunction())
map3.print("udf map 自定义MapFunction:")
//map: 4.富函数RichMapFunction 实现方法完全一致 但是其自带一些父类方法 生命周期 上下文方法 open close getRuntimeContext
val map4 = inputDataSet.map( new MyRichMapFunction())
map4.print("udf map 自定义富函数RichMapFunction:")
env.execute("udf test")
}
}
class MyMapFunction extends MapFunction[String, SensorReading]{
override def map(line: String): SensorReading =
SensorReading(line.split(",")(0), line.split(",")(1).toLong, line.split(",")(2).toDouble)
}
class MyRichMapFunction extends RichMapFunction[String, SensorReading]{
override def map(line: String): SensorReading =
SensorReading(line.split(",")(0), line.split(",")(1).toLong, line.split(",")(2).toDouble)
override def setRuntimeContext(t: RuntimeContext): Unit = {
super.setRuntimeContext(t)
}
override def getRuntimeContext: RuntimeContext = super.getRuntimeContext
override def getIterationRuntimeContext: IterationRuntimeContext = super.getIterationRuntimeContext
override def open(parameters: Configuration): Unit = {
super.open(parameters)
//在此可以执行算法前进行一些操作
}
override def close(): Unit = {
super.close()
//在此可以执行算法后进行一些操作
}
}
2. FlatMap operator (anonymous function, anonymous class, custom function, rich function four types of implementation methods)
package com.qu.udf
import com.qu.source.SensorReading
import org.apache.flink.api.common.functions.{FlatMapFunction, MapFunction}
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.api.scala._
import org.apache.flink.util.Collector
//udf: 就是数据处理函数
// 在我们对数据做map、filter、flatMap等操作时,可以传入一个
// 1.匿名函数、2.(MapFunction、FilterFunction、FlapMapFunction)匿名类、3.自己创建的一个(MapFunction、FilterFunction、FlapMapFunction)类
// 4.富函数RichMapFunction,RichFilterFunction、RichFlapMapFunction 富函数里面包含生命周期 和 上下文内容 的方法调用 可以在方法执行前后进行一些其他操作
object FlatMapUDFTest {
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
// 读取一个本地文件
val inputPath = "/Users/xietong/IdeaProjects/FlinkTutorial/src/main/resources/1.txt"
val inputDataSet = env.readTextFile(inputPath)
//flatMap: 1.匿名函数
val flatMap11 = inputDataSet.flatMap(line =>
List((line.split(",")(0), line.split(",")(2))))//等价于 _.split(",")
val flatMap12 = inputDataSet.flatMap(line =>
List(SensorReading(line.split(",")(0), line.split(",")(1).toLong, line.split(",")(2).toDouble))
)
flatMap11.print("udf flatMap 匿名函数(转成Touple):")
flatMap12.print("udf flatMap 匿名函数(转成对象):")
//flatMap: 2.匿名类
val flatMap21 = inputDataSet.flatMap {
new FlatMapFunction[String, (String, String)] {
override def flatMap(line: String, out: Collector[(String, String)]): Unit = {
val ss = line.split(",")
out.collect((ss(0), ss(2)))
}
}
}
flatMap21.print("udf flatMap FlatMapFunction匿名类(转成Touple):")
val flatMap22 = inputDataSet.flatMap {
new FlatMapFunction[String, SensorReading] {
override def flatMap(line: String, out: Collector[SensorReading]): Unit = {
val ss = line.split(",")
out.collect(SensorReading(ss(0), ss(1).toLong, ss(2).toDouble))
}
}
}
flatMap22.print("udf flatMap FlatMapFunction匿名类(转成对象):")
//flatMap: 3.自定义FlatMapFunction
val flatMap3 = inputDataSet.flatMap( new MyFlatMapFunction())
flatMap3.print("udf flatMap 自定义FlatMapFunction:")
//flatMap: 4.富函数不举例(map已举例)
env.execute("udf test")
}
}
class MyFlatMapFunction extends FlatMapFunction[String, SensorReading]{
override def flatMap(line: String, out: Collector[SensorReading]): Unit = {
val ss = line.split(",")
out.collect(SensorReading(ss(0), ss(1).toLong, ss(2).toDouble))
}
}
3. Filter operator (anonymous function, anonymous class, custom function, rich function four types of implementation methods)
package com.qu.udf
import org.apache.flink.api.common.functions.{FilterFunction}
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
//udf: 就是数据处理函数
// 在我们对数据做map、filter、flatMap等操作时,可以传入一个
// 1.匿名函数、2.(MapFunction、FilterFunction、FlapMapFunction)匿名类、3.自己创建的一个(MapFunction、FilterFunction、FlapMapFunction)类
// 4.富函数RichMapFunction,RichFilterFunction、RichFlapMapFunction 富函数里面包含生命周期 和 上下文内容 的方法调用 可以在方法执行前后进行一些其他操作
object FilterUDFTest {
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
// 读取一个本地文件
val inputPath = "/Users/xietong/IdeaProjects/FlinkTutorial/src/main/resources/1.txt"
val inputDataSet = env.readTextFile(inputPath)
//filter: 1.匿名函数
val filter1 = inputDataSet.filter(_.split(",")(2).toDouble > 30)
filter1.print("udf filter 匿名函数:")
//filter: 2.匿名类
val filter2 = inputDataSet.filter {
new FilterFunction[String] {
override def filter(line: String): Boolean = {
line.split(",")(2).toDouble > 30
}
}
}
filter2.print("udf filter FilterFunction匿名类:")
//filter: 3.自定义FlatMapFunction
val filter3 = inputDataSet.filter( new MyFilterMapFunction())
filter3.print("udf filter 自定义FilterMapFunction:")
//filter: 4.富函数不举例(map已举例
env.execute("udf test")
}
}
class MyFilterMapFunction extends FilterFunction[String]{
override def filter(line: String): Boolean = line.split(",")(2).toDouble > 30
}