Spark Streaming篇1:Spark Streaming 把数据写到hbase,并拼接rowkey

Spark Streaming篇1:Spark Streaming 把数据写到hbase,并拼接rowkey

废话不多说,直接上干货

package com.iflytek.kafka

import java.text.SimpleDateFormat

import com.alibaba.fastjson.JSON
import com.iflytek.kafkaManager.HbaseSink
import org.apache.hadoop.hbase.TableName
import org.apache.hadoop.hbase.client.Put
import org.apache.hadoop.hbase.util.Bytes
import org.apache.kafka.clients.consumer.ConsumerRecord
import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.spark.rdd.RDD
import org.apache.spark.streaming.StreamingContext
import org.apache.spark.streaming.dstream.{DStream, InputDStream}
import org.apache.spark.streaming.kafka010.ConsumerStrategies.Subscribe
import org.apache.spark.streaming.kafka010.KafkaUtils
import org.apache.spark.streaming.kafka010.LocationStrategies.PreferConsistent
import org.slf4j.LoggerFactory

import scala.util.Try

/*

 */
object WC2Hbase {
  @transient lazy val logger=LoggerFactory.getLogger(this.getClass())
  def send2MysqlMian(ssc: StreamingContext):Unit={
    ssc.checkpoint("hdfs://cdh01:8020/user/hive/warehouse/checkpointed/sdf")
    val kafkaParams = Map[String, Object](
      "bootstrap.servers" -> "cdh01:9092",
      "key.deserializer" -> classOf[StringDeserializer],
      "value.deserializer" -> classOf[StringDeserializer],
      "group.id" -> "xx001",
      "auto.offset.reset" -> "latest", //earliest latest
      "enable.auto.commit" -> (false: java.lang.Boolean)
    )
    val topics = Array("pd_ry_txjl")
    val stream: InputDStream[ConsumerRecord[String, String]] =
      KafkaUtils.createDirectStream[String, String](
        ssc,
        PreferConsistent,
        Subscribe[String, String](topics, kafkaParams))
    val kv: DStream[(String, String)] = stream.map(record => (record.key, record.value))
    val value: DStream[String] = stream.map(_.value())

    val mapDS = value.map(x => {
      val dataFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
      val nObject = JSON.parseObject(x)
      val bodyObject1 = nObject.getJSONObject("body")
      val bodyObject2 = bodyObject1.getJSONObject("body")
      val xqbm = bodyObject2.get("name").toString
      (scala.util.Random.nextInt(10)+"_"+xqbm, 1)//这里我写得玩的,加了一个随机数前缀,实际操作可以在这里拼row_key,加个时间前缀
    })
    
    mapDS.foreachRDD(rdd=>{
      if(!rdd.isEmpty()){
        send2Hbase(rdd)
      }
    })

  }

  def send2Hbase(rdd:RDD[(String, Int)]):Unit={
    if(!rdd.isEmpty){
      rdd.foreachPartition(fp=>{
//        @transient lazy  val  conn = HbaseSink.getHbaseConn
        val  conn = HbaseSink.getHbaseConn
        fp.foreach(f => {
          //         获取指定表的连接
          val table = conn.getTable(TableName.valueOf("xy"))
          try {
            val put = new Put(Bytes.toBytes(f._1.toString))
            put.addColumn(Bytes.toBytes("cf1"), Bytes.toBytes("count"), Bytes.toBytes(f._2.toString))
            Try(table.put(put)).getOrElse(table.close())
          } catch {
            case e: Exception => e.printStackTrace()
            logger.info("rdd写入hbase失败")
          } finally {
            table.close
          }
        })
      })
      logger.info("写入hbase成功")
    }
  }
}
package com.iflytek.kafkaManager

import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.hbase.client.{Connection, ConnectionFactory}
import org.apache.log4j.{LogManager, Logger}
import org.slf4j.LoggerFactory

object HbaseSink extends Serializable{
  @transient lazy val logger=LoggerFactory.getLogger(this.getClass())
  private val conf=HBaseConfiguration.create()

  conf.set("hbase.zookeeper.quorum","cdh01,cdh02,cdh03")
  conf.set("hbase.zookeeper.property.clientPort", "2181")
  conf.set("zookeeper.znode.parent", "/hbase")

  private val conn=ConnectionFactory.createConnection(conf)
//  获取hbase连接
  def getHbaseConn:Connection=conn

}

pom如下:

    <properties>
        <spark.version>2.3.2</spark.version>
        <scala.version>2.11.8</scala.version>
        <hbase.version>1.2.1</hbase.version>
    </properties>

    <dependencies>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-core_2.11</artifactId>
            <version>${spark.version}</version>
            <scope>compile</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-sql_2.11</artifactId>
            <version>${spark.version}</version>
            <scope>compile</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-hive_2.11</artifactId>
            <version>${spark.version}</version>
            <scope>compile</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-yarn_2.11</artifactId>
            <version>${spark.version}</version>
            <scope>compile</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-sql_2.11</artifactId>
            <version>${spark.version}</version>
            <scope>compile</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-streaming_2.11</artifactId>
            <version>${spark.version}</version>
            <scope>compile</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-streaming-kafka-0-10_2.11</artifactId>
            <version>${spark.version}</version>
            <scope>compile</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-sql-kafka-0-10_2.11</artifactId>
            <version>${spark.version}</version>
            <scope>compile</scope>
        </dependency>
        <dependency>
            <groupId>com.alibaba</groupId>
            <artifactId>fastjson</artifactId>
            <version>1.2.47</version>
            <scope>compile</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.kafka</groupId>
            <artifactId>kafka-clients</artifactId>
            <version>0.11.0.0</version>
            <scope>compile</scope>
        </dependency>
        
        <dependency>
            <groupId>mysql</groupId>
            <artifactId>mysql-connector-java</artifactId>
            <version>5.1.31</version>
            <scope>compile</scope>
        </dependency>

        <dependency>
            <groupId>c3p0</groupId>
            <artifactId>c3p0</artifactId>
            <version>0.9.1.2</version>
            <scope>compile</scope>
        </dependency>
        <dependency>
            <groupId>com.jolbox</groupId>
            <artifactId>bonecp</artifactId>
            <version>0.8.0.RELEASE</version>
            <scope>compile</scope>
        </dependency>


        <dependency>
            <groupId>org.apache.zookeeper</groupId>
            <artifactId>zookeeper</artifactId>
            <version>3.4.13</version>
            <scope>compile</scope>
        </dependency>

        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-client</artifactId>
            <version>${hbase.version}</version>
            <scope>compile</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-common</artifactId>
            <version>${hbase.version}</version>
            <scope>compile</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-server</artifactId>
            <version>${hbase.version}</version>
            <scope>compile</scope>
        </dependency>
        <!--<dependency>-->
            <!--<groupId>org.apache.hadoop</groupId>-->
            <!--<artifactId>hadoop-client</artifactId>-->
            <!--<version>2.7.2</version>-->
        <!--</dependency>-->
        <!--&lt;!&ndash;guava和hadoop版本得对应&ndash;&gt;-->
        <!--<dependency>-->
            <!--<groupId>com.google.guava</groupId>-->
            <!--<artifactId>guava</artifactId>-->
            <!--<version>18.0</version>-->
        <!--</dependency>-->

            <dependency>
                <groupId>org.scala-lang</groupId>
                <artifactId>scala-library</artifactId>
                <version>${scala.version}</version>
                <scope>compile</scope>
            </dependency>


    </dependencies>

    <build>
        <plugins>
            <plugin>
                <groupId>net.alchim31.maven</groupId>
                <artifactId>scala-maven-plugin</artifactId>
                <version>3.2.0</version>
                <executions>
                    <execution>
                        <id>compile-scala</id>
                        <phase>compile</phase>
                        <goals>
                            <goal>add-source</goal>
                            <goal>compile</goal>
                        </goals>
                    </execution>
                    <execution>
                        <id>test-compile-scala</id>
                        <phase>test-compile</phase>
                        <goals>
                            <goal>add-source</goal>
                            <goal>testCompile</goal>
                        </goals>
                    </execution>
                </executions>
                <configuration>
                    <scalaVersion>2.11.8</scalaVersion>
                </configuration>
            </plugin>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <version>3.6.1</version>
                <configuration>
                    <source>1.8</source>
                    <target>1.8</target>
                    <compilerArgs>
                        <arg>-extdirs</arg>
                        <arg>${project.basedir}/lib</arg>
                    </compilerArgs>
                </configuration>
            </plugin>
        </plugins>
    </build>
发布了11 篇原创文章 · 获赞 4 · 访问量 690

猜你喜欢

转载自blog.csdn.net/qq_38740498/article/details/103473563