SparkSql篇2:SparkSql读写elasticsearch
废话不多说,直接上干货
package com.iflytek.elasticsearch
import org.apache.spark.sql.{SparkSession, _}
import org.elasticsearch.spark.sql._
object sparkEs {
def main(args: Array[String]): Unit = {
val sparkSession = SparkSession.builder()
.master("local")
.appName("appName")
.config("es.index.auto.create", "true")
.config("es.nodes", "cdh01:9200")
.config("es.write.operation", "index")
.config("spark.testing.memory","471859200")
.getOrCreate()
//利用spark从es中读数据
val match_all =
"""| {"query":{"match_all":{}}}
""".stripMargin
val xydate1: DataFrame = sparkSession.esDF("xydate_201909/capture", match_all)
// 在hive中建立临时表
xydate1.createTempView("xydate1")
sparkSession.sqlContext.cacheTable("xydate1")
// 像操作hive一样操作es(es的临时表)
val xydate3 = sparkSession.sql("select * from xydate1")
//把spark查询的结果再写入es
EsSparkSQL.saveToEs(xydate3,"xydate3/capture",Map("es.mapping.id"->"jlId"))//让es的_id 为我们sql中的jlid
sparkSession.sqlContext.uncacheTable("xydate1")
sparkSession.stop()
}
}
pom如下:
<properties>
<spark.version>2.3.2</spark.version>
<scala.version>2.11.8</scala.version>
<es.version>5.4.3</es.version>
</properties>
<dependency>
<groupId>org.elasticsearch</groupId>
<artifactId>elasticsearch</artifactId>
<version>${es.version}</version>
</dependency>
<dependency>
<groupId>org.elasticsearch</groupId>
<artifactId>elasticsearch-spark-20_2.11</artifactId>
<version>${es.version}</version>
</dependency>
<dependency>
<groupId>org.elasticsearch.client</groupId>
<artifactId>transport</artifactId>
<version>${es.version}</version>
</dependency>