spark dataframe 转成 dataset

package com.immooc.spark


import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.{Encoders, SparkSession}

object DataSetTest {

  case class Person(name:String, age:Long)

  def main(args: Array[String]): Unit = {
    val sparkConf = new SparkConf().setMaster("local[2]").setAppName("DataSetTest")

    val ssc = new SparkContext(sparkConf)



    val spark = SparkSession
      .builder()
      .appName("Spark SQL basic example")
      .config("spark.some.config.option", "some-value")
      .getOrCreate()


    import spark.implicits._



    val df = spark.read.json("file:///usr/local/Cellar/spark-2.3.0/examples/src/main/resources/people.json")


    val peopleDS = df.as[Person]

    peopleDS.map(line => (line.name, line.age)).show()
    //peopleDS.show()

    spark.close()
  }
}

dataset 主要是强类型的。

http://www.waitingfy.com/archives/4306

猜你喜欢

转载自blog.csdn.net/fox64194167/article/details/80737500