spark篇3:spark操作ftp

spark篇3:spark操作ftp

废话不多说,直接上干货

package com.iflytek.ftp

import java.text.SimpleDateFormat
import java.util._

import com.alibaba.fastjson.JSON
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{DataFrame, Dataset, SparkSession}

object spark2ftp {
  def main(args: Array[String]): Unit = {
     val sparkSession = SparkSession.builder()
       .master("local")
      .appName("appName")
      .config("spark.testing.memory","471859200")
      .getOrCreate()
    sparkSession.sql(s"use carbondata");
    val rq= new SimpleDateFormat("yyyy-MM-dd").format(new Date().getTime)

    val dataSource = "ftp://账号:密码@ip:端口/目录/"
    //从ftp整个目录下读取数据
    val ftpInput: RDD[(String, String)] = sparkSession.sparkContext.wholeTextFiles(dataSource)
    val value: RDD[String] = ftpInput.map(_._2)

    val xq_sb: RDD[(String, String)] = value.map(json => {
      val nObject = JSON.parseObject(json)
      val bodyObject1 = nObject.getJSONObject("body")
      val bodyObject2 = bodyObject1.getJSONObject("body")
      val sbmc = bodyObject2.get("id").toString
      val xqbm = bodyObject2.get("name").toString
      (sbmc, xqbm)
    })

 
    val ds: Dataset[(String, String)] = sparkSession.createDataset(xq_sb)
    val frame: DataFrame = ds.toDF("sbmc","xqbm")
    frame.filter(s"sbmc like '%名称%'").where("1=1 and 2=2").limit(2).select("sbmc").show()

  }
}
发布了11 篇原创文章 · 获赞 4 · 访问量 690

猜你喜欢

转载自blog.csdn.net/qq_38740498/article/details/103474395