package sparksql.day01
import org.apache.log4j.{Level, Logger}
import org.apache.spark.SparkConf
import org.apache.spark.sql.SparkSession
object sparkUdf {
def main(args: Array[String]): Unit = {
Logger.getLogger("org").setLevel(Level.ERROR)
System.setProperty("hadoop.home.dir", "D:\\spark")
val conf = new SparkConf().setAppName("spakrsql").setMaster("local[*]")
val spark = SparkSession.builder().config(conf).getOrCreate()
import spark.implicits._
val df = spark.read.json("data/user.json")
//df.show()
df.createOrReplaceTempView("user")
spark.udf.register("addname",(name:String)=>{
"name"+name
})
spark.sql("select addname(username) from user").show()
}
}
user.json数据为:
{"username":"zhangsan001","age":30}
{"username":"zhangsan002","age":31}
{"username":"zhangsan003","age":32}
{"username":"zhangsan004","age":33}
在自定义udf函数的时候,需要使用spark.udf.register去注册定义的udf函数,其中第一个参数为udf函数的函数名称,后面为函数的参数列表
("addname",(name:String)=>{
"name"+name//执行具体的功能
})