spark sql 中的DataFrame和DataSet读取文本并实现split

spark sql 中的DataFrame和DataSet读取文本

  • spark 读取文本代码
package ml.test
import org.apache.spark.sql.functions.split
import org.apache.spark.sql.{DataFrame, Dataset, SparkSession}
import org.slf4j.LoggerFactory

object TestSpark {

  private val logger = LoggerFactory.getLogger(TestSpark.getClass)
  private val path: String = "D:\\table.txt"

  def main(args: Array[String]): Unit = {

    logger.info("创建spark连接")
    val spark: SparkSession = SparkSession.builder()
      .appName("WordCount")
      .master("local[*]")
      .getOrCreate()

    //spark日志级别
    spark.sparkContext.setLogLevel("ERROR")

    //spark读取text文件
    val frame: DataFrame = spark.read.text(path).toDF("XXX")
    frame.show()

    import spark.implicits._
    frame.withColumn("_tmp", split(frame.col("XXX"), ","))
      .select($"_tmp".getItem(0).as("name"),
        $"_tmp".getItem(1).as("age"),
        $"_tmp".getItem(2).as("addr"))
      .drop("_tmp").show()

    frame.withColumn("_tmp", split($"XXX", ","))
      .withColumn("name", $"_tmp".getItem(0))
      .withColumn("age", $"_tmp".getItem(1))
      .withColumn("addr", $"_tmp".getItem(2))
      .drop("_tmp").show()

    println("=========================================")

    val ds: Dataset[String] = spark.read.textFile(path)
    ds.show()

    ds.withColumn("_tmp", split($"value", ","))
      .select($"_tmp".getItem(0).as("name"),
        $"_tmp".getItem(1).as("age"),
        $"_tmp".getItem(2).as("addr"))
      .drop("_tmp").show()
  }
}
  • 显示结果
+-------------------+
|                XXX|
+-------------------+
|      name,age,addr|
|       tom,17,china|
|    lili,18,america|
|zhangsan,100,canada|
+-------------------+

+--------+---+-------+
|    name|age|   addr|
+--------+---+-------+
|    name|age|   addr|
|     tom| 17|  china|
|    lili| 18|america|
|zhangsan|100| canada|
+--------+---+-------+

+-------------------+--------+---+-------+
|                XXX|    name|age|   addr|
+-------------------+--------+---+-------+
|      name,age,addr|    name|age|   addr|
|       tom,17,china|     tom| 17|  china|
|    lili,18,america|    lili| 18|america|
|zhangsan,100,canada|zhangsan|100| canada|
+-------------------+--------+---+-------+

=========================================
+-------------------+
|              value|
+-------------------+
|      name,age,addr|
|       tom,17,china|
|    lili,18,america|
|zhangsan,100,canada|
+-------------------+

+--------+---+-------+
|    name|age|   addr|
+--------+---+-------+
|    name|age|   addr|
|     tom| 17|  china|
|    lili| 18|america|
|zhangsan|100| canada|
+--------+---+-------+

猜你喜欢

转载自blog.csdn.net/godlovebinlee/article/details/85719360
今日推荐