import java.io.File
import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession}
object TestSparkSQLHive {
def main(args: Array[String]): Unit = {
val warehouseLocation = "/user/hive/warehouse"
val Master="spark://10.176.1.158:7077"
val spark= SparkSession
.builder()
.appName("Java Spark Hive Example")
.master("local")
.enableHiveSupport()
.config("spark.sql.warehouse.dir", warehouseLocation)
.getOrCreate();
val df=spark.sql("select * from test.jsonphone")
df.show()
val tablenName="test.parquetphone2";
val Path="File:///C:/Users/qijzhou/TestData/spark-shell/phone.txt"
//"File:///C:/Users/qijzhou/TestData/spark-shell/123.txt"
//"/zqj/phone.txt"
val lines:RDD[String]= spark.sparkContext.textFile(Path).flatMap(x=>x.split("\n"))
val m:RDD[Array[String]]=lines.map(w=> {
val lineSplit:Array[String]=w.split(",")
//println("line:"+lineSplit(0))
//val phoneItem=Phone(lineSplit(0).toInt,lineSplit(1),lineSplit(2).toInt,lineSplit(3))
// spark.sql(insertString(tablenName,phoneItem))
lineSplit
})
val recordsDF = spark.createDataFrame(lines.map{ w=>{
val lineSplit=w.split(",")
val phoneItem=Phone(lineSplit(0).toInt,lineSplit(1),lineSplit(2).toInt,lineSplit(3))
phoneItem;
}})
recordsDF.show()
recordsDF.createOrReplaceTempView("yahoo_stocks_temp")
val results:DataFrame = spark.sql("SELECT * FROM yahoo_stocks_temp")
results.write.format("json").mode(SaveMode.Append).saveAsTable("test.jsonphone")
// results.show()
recordsDF.printSchema()
// recordsDF.write.format("parquet").mode(SaveMode.Append).saveAsTable(tablenName)
/* val item=m.map((lineSplit:Array[String])=>{
println("line:"+lineSplit(0))
val phoneItem=Phone(lineSplit(0).toInt,lineSplit(1),lineSplit(2).toInt,lineSplit(3))
//spark.sql(insertString(tablenName,phoneItem))
insertString(tablenName,phoneItem)
})*/
//item.take(5).foreach(spark.sql)
// spark.sql("insert into test.phone values(5,\"tom\",25,\"131888881111\")");
val df1=spark.sql("select * from test.jsonphone")
df1.show()
}
def insertString(table:String,phone: Phone): String =
{
val String="insert into "+table+" values("+phone.id+",\""+phone.name+"\","+phone.age+",\""+phone.tel+"\")"
println(String)
String
}
case class Phone(id:Int,name:String,age:Int,tel:String)
}
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.paypal.streamingproject</groupId>
<artifactId>streamingproject</artifactId>
<version>1.0-SNAPSHOT</version>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>7</source>
<target>7</target>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<version>2.6</version>
<configuration>
<archive>
<manifest>
<mainClass>TestDemo.TestStreaming</mainClass>
</manifest>
</archive>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
<dependencies>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming_2.11</artifactId>
<version>2.3.1</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>2.3.1</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-hive_2.11</artifactId>
<version>2.3.1</version>
</dependency>
</dependencies>
</project>