1.创建SparkSession
val spark = SparkSession.builder()
.master("local[2]")
.appName("UDFApp")
.getOrCreate()
2. 使用sc的方式读取数据
import spark.implicits._
val df1 = spark.sparkContext.textFile("spark-sql/data/nest.json")
.map(_.split(",")).map(x => Likes(x(0), x(1))).toDF
case class Likes(name: String, hobbys: String)
3.直接读取数据
val df = spark.read.json("spark-sql/data/nest.json")
df.createTempView("access")
4.自定义函数实现功能
val UDFfriens_length: UserDefinedFunction = spark.udf.register("friens_length", (input: Array[String]) => {
input.size
})
5.使用自定义函数
import spark.implicits._
spark.sql("select name,age,gender,friens_length(xxx) from access")
df.select($"name", $"age", $"gender", UDFfriens_length($"friends.name")).show()