spark的UDF(User-define-function)使用很简单
如下是spark-shell使用和idea代码使用
scala> val df = spark.read.json("/root/a.json")
df: org.apache.spark.sql.DataFrame = [age: bigint, name: string]
scala> df.show()
±—±------+
| age| name|
±—±------+
|null|Michael|
| 30| Andy|
| 19| Justin|
±—±------+
//类似函数声明
scala> spark.udf.register(“addName”, (x:String)=> “Name:”+x)
res5: org.apache.spark.sql.expressions.UserDefinedFunction = UserDefinedFunction(,StringType,Some(List(StringType)))
scala> df.createOrReplaceTempView(“people”)
scala> spark.sql(“Select addName(name), age from people”).show()
±----------------±—+
|UDF:addName(name)| age|
±----------------±—+
| Name:Michael|null|
| Name:Andy| 30|
| Name:Justin| 19|
±----------------±—+
代码实现
object SparkSql05_UDF {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setMaster("local[*]").setAppName("")
val spark: SparkSession = SparkSession.builder().config(conf).getOrCreate()
//生成dataFrame
val df1: DataFrame = spark.read.json("./json")
df1.show()
//创建视图
df1.createOrReplaceTempView("user")
//UDF定义
spark.udf.register("myName", (x: String) => {
"name:" + x
})
//UDF函数使用
val df2 = spark.sql("select myName(name) from user")
//结果显示
df2.show()
spark.stop()
}
}