sparkSQL 自定义UDF函数

package sparksql.day01

import org.apache.log4j.{Level, Logger}
import org.apache.spark.SparkConf
import org.apache.spark.sql.SparkSession

object sparkUdf {
  def main(args: Array[String]): Unit = {



    Logger.getLogger("org").setLevel(Level.ERROR)
    System.setProperty("hadoop.home.dir", "D:\\spark")
    val conf = new SparkConf().setAppName("spakrsql").setMaster("local[*]")


    val spark = SparkSession.builder().config(conf).getOrCreate()
    import spark.implicits._


    val df = spark.read.json("data/user.json")
    //df.show()

    df.createOrReplaceTempView("user")
    spark.udf.register("addname",(name:String)=>{
      "name"+name
    })
    spark.sql("select addname(username) from user").show()
  }

}

user.json数据为:

{"username":"zhangsan001","age":30}
{"username":"zhangsan002","age":31}
{"username":"zhangsan003","age":32}
{"username":"zhangsan004","age":33}

在自定义udf函数的时候,需要使用spark.udf.register去注册定义的udf函数,其中第一个参数为udf函数的函数名称,后面为函数的参数列表

("addname",(name:String)=>{
      "name"+name//执行具体的功能
    })

猜你喜欢

转载自blog.csdn.net/weixin_38638777/article/details/114460462