SparkSql篇1:SparkSql读写Phoenix
本文使用Phoenix4.7.0,不支持spark2,如需支持spark2请使用Phoenix4.10.0以后版本
废话不多说,直接上干货
package com.iflytek.phoneix
import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.hbase.mapred.TableOutputFormat
import org.apache.spark.sql.{DataFrame, SparkSession}
object sparkPhoenix2 {
def main(args: Array[String]): Unit = {
val warehouse = "hdfs://cdh1:8020/user/hive/warehouse/carbon.store"
val sparkSession = {SparkSession.builder()
.master("local")
.appName("appName")
.config("spark.testing.memory", "471859200")
.config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
.enableHiveSupport().getOrCreateCarbonSession(warehouse)}
//spark 操作Phoenix有多种方式,我这里只写了最方便的一种
//spark从Phoenix中读取数据
val df2: DataFrame = {
sparkSession.sqlContext.read
.format("org.apache.phoenix.spark")
.option("table", "rxjl")
.option("zkUrl", "cdh01:2181")
.load()
}
// 在hive中建立临时表
df2.createTempView("xydate1")
sparkSession.sqlContext.cacheTable("xydate1")
val data=sparkSession.sql(s"select * from xydate1")
// 将读取的数据写入phoenix
{data.write
.format("org.apache.phoenix.spark")
.mode("overwrite")
.option("table", "rxjl2")
.option("zkUrl", "cdh01:2181")
.save()}
sparkSession.sqlContext.uncacheTable("xydate1")
sparkSession.stop()
}
}
pom如下:
<dependency>
<groupId>org.apache.phoenix</groupId>
<artifactId>phoenix-core</artifactId>
<version>${phoenix.version}</version>
</dependency>
<dependency>
<groupId>org.apache.phoenix</groupId>
<artifactId>phoenix-spark</artifactId>
<version>${phoenix.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>${hbase.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-common</artifactId>
<version>${hbase.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-server</artifactId>
<version>${hbase.version}</version>
<scope>compile</scope>
</dependency>