spark 写文件到hive


import java.io.File

import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession}
object TestSparkSQLHive {

  def main(args: Array[String]): Unit = {
    val warehouseLocation = "/user/hive/warehouse"
    val Master="spark://10.176.1.158:7077"
    val spark= SparkSession
      .builder()
      .appName("Java Spark Hive Example")
      .master("local")
      .enableHiveSupport()
      .config("spark.sql.warehouse.dir", warehouseLocation)
      .getOrCreate();
    val df=spark.sql("select * from test.jsonphone")
    df.show()
    val tablenName="test.parquetphone2";
    val Path="File:///C:/Users/qijzhou/TestData/spark-shell/phone.txt"
    //"File:///C:/Users/qijzhou/TestData/spark-shell/123.txt"
    //"/zqj/phone.txt"
    val lines:RDD[String]= spark.sparkContext.textFile(Path).flatMap(x=>x.split("\n"))
    val m:RDD[Array[String]]=lines.map(w=> {
      val  lineSplit:Array[String]=w.split(",")
      //println("line:"+lineSplit(0))
      //val phoneItem=Phone(lineSplit(0).toInt,lineSplit(1),lineSplit(2).toInt,lineSplit(3))
      //  spark.sql(insertString(tablenName,phoneItem))
      lineSplit
    })

    val recordsDF =  spark.createDataFrame(lines.map{ w=>{
      val  lineSplit=w.split(",")
      val phoneItem=Phone(lineSplit(0).toInt,lineSplit(1),lineSplit(2).toInt,lineSplit(3))
      phoneItem;
    }})
    recordsDF.show()
    recordsDF.createOrReplaceTempView("yahoo_stocks_temp")
    val results:DataFrame = spark.sql("SELECT * FROM yahoo_stocks_temp")
    results.write.format("json").mode(SaveMode.Append).saveAsTable("test.jsonphone")
   // results.show()
    recordsDF.printSchema()
   // recordsDF.write.format("parquet").mode(SaveMode.Append).saveAsTable(tablenName)

   /* val item=m.map((lineSplit:Array[String])=>{
      println("line:"+lineSplit(0))
      val phoneItem=Phone(lineSplit(0).toInt,lineSplit(1),lineSplit(2).toInt,lineSplit(3))
      //spark.sql(insertString(tablenName,phoneItem))
      insertString(tablenName,phoneItem)
    })*/
    //item.take(5).foreach(spark.sql)

    // spark.sql("insert into test.phone  values(5,\"tom\",25,\"131888881111\")");
    val df1=spark.sql("select * from test.jsonphone")
    df1.show()
  }


  def insertString(table:String,phone: Phone): String =
  {

    val String="insert into "+table+"  values("+phone.id+",\""+phone.name+"\","+phone.age+",\""+phone.tel+"\")"
    println(String)
    String
  }

  case class Phone(id:Int,name:String,age:Int,tel:String)



}
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>com.paypal.streamingproject</groupId>
    <artifactId>streamingproject</artifactId>
    <version>1.0-SNAPSHOT</version>
    <build>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <configuration>
                    <source>7</source>
                    <target>7</target>
                </configuration>
            </plugin>

            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-assembly-plugin</artifactId>
                <version>2.6</version>
                <configuration>
                    <archive>
                        <manifest>
                            <mainClass>TestDemo.TestStreaming</mainClass>
                        </manifest>
                    </archive>
                    <descriptorRefs>
                        <descriptorRef>jar-with-dependencies</descriptorRef>
                    </descriptorRefs>
                </configuration>
                <executions>
                    <execution>
                        <id>make-assembly</id>
                        <phase>package</phase>
                        <goals>
                            <goal>single</goal>
                        </goals>
                    </execution>
                </executions>
            </plugin>
        </plugins>
    </build>


    <dependencies>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-streaming_2.11</artifactId>
            <version>2.3.1</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-core_2.11</artifactId>
            <version>2.3.1</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-hive_2.11</artifactId>
            <version>2.3.1</version>

        </dependency>


    </dependencies>

</project>

猜你喜欢

转载自blog.csdn.net/z471365897/article/details/81103368