实现第一步创建Kafka客户端生产者及消费者
江湖规矩,先贴Pom
注意版本问题
<properties>
<scala.version>2.11</scala.version>
<spark.version>2.4.3</spark.version>
<hadoop.version>2.6.0</hadoop.version>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_${scala.version}</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming_${scala.version}</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_${scala.version}</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-hive_${scala.version}</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.47</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-mllib_${scala.version}</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>0.98.12-hadoop2</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-server</artifactId>
<version>0.98.12-hadoop2</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-protocol</artifactId>
<version>1.2.6</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-shaded-client</artifactId>
<version>1.2.6</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming-kafka-0-10_2.11</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>org.specs</groupId>
<artifactId>specs</artifactId>
<version>1.2.5</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>net.jpountz.lz4</groupId>
<artifactId>lz4</artifactId>
<version>1.3.0</version>
</dependency>
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka-clients</artifactId>
<version>0.11.0.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.kafka/kafka -->
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka_2.12</artifactId>
<version>0.11.0.0</version>
</dependency>
<!-- sparkstreaming整合flume依赖-->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming-flume_2.11</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming-flume-sink_2.11</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.5</version>
</dependency>
<!-- <dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming-kafka-0-8_2.11</artifactId>
<version>${spark.version}</version>
</dependency>
-->
<!-- https://mvnrepository.com/artifact/org.apache.avro/avro -->
<dependency>
<groupId>org.apache.avro</groupId>
<artifactId>avro</artifactId>
<version>1.8.2</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.avro/avro-ipc -->
<dependency>
<groupId>org.apache.avro</groupId>
<artifactId>avro-ipc</artifactId>
<version>1.8.2</version>
</dependency>
<!-- https://mvnrepository.com/artifact/log4j/log4j -->
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<version>1.2.17</version>
</dependency>
<dependency>
<groupId>org.apache.flume.flume-ng-clients</groupId>
<artifactId>flume-ng-log4jappender</artifactId>
<version>1.6.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/com.alibaba/fastjson -->
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.58</version>
</dependency>
</dependencies>
启动zookeeper
zkServer.sh start
启动三台kafka集群
kafka-server-start.sh /opt/kafka/kafka/config/server.properties
创建topic
kafka-topics.sh --create --zookeeper hadoop:2181 --replication-factor 1 --partitions 1 --topic day3
编写生产者Application
import java.util.{Properties, Random}
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}
import scala.collection.mutable.Map
object Kafka_Productor {
def main(args: Array[String]): Unit = {
val props = new Properties()
//主机及端口
props.put("bootstrap.servers","hadoop:9092")
//kv的序列化
props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer")
props.put("value.serializer","org.apache.kafka.common.serialization.StringSerializer")
while(true){
//获取系统当前时间
val timestamp =System.currentTimeMillis()
//创建list包含两个省份
val provinces = List("beijing","henan")
//定义城市类型结构
val cities = Map[String,List[String]]()
//向cities中填入省份对应的两个城市
cities.put("beijing",List("chaoyang","haidian"))
cities.put("henan",List("zhengzhou","kaifeng"))
//定义随机数
val ran = new Random()
//定义useID和地址ID
val useId = ran.nextInt(4)
val adid = ran.nextInt(4)
//随机产生省份
val province = provinces(ran.nextInt(2))
//随机产生省份对应的城市
val city = cities.getOrElse(province,List(""))(ran.nextInt(2))
//产生的日志信息
val log = timestamp+" "+province+" "+city+" "+useId+" "+adid
println(log)
//定义生产者
val productor = new KafkaProducer[String,String](props)
//发送消息 参数topic和,日志信息
productor.send(new ProducerRecord[String,String]("day3",log))
//线程休眠1s
Thread.sleep(1000)
}
}
// productor.close()
}
编写消费者Application
import java.time.Duration
import java.util
import java.util.Properties
import org.apache.kafka.clients.consumer.{ConsumerRecords, KafkaConsumer}
object Kafka_Consumer {
def main(args: Array[String]): Unit = {
//Properties四条配置
val props = new Properties()
//broker的ip地址和端口号
props.put("bootstrap.servers","hadoop:9092")
//消费者组id
props.setProperty("group.id","day3")
//k,v的反序列化
props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer")
props.put("value.deserializer","org.apache.kafka.common.serialization.StringDeserializer")
//创建kafka消费客户端
val consumer = new KafkaConsumer[String,String](props)
//订阅,topic名称列表
consumer.subscribe(util.Arrays.asList("day3"))
while(true){
//每隔多久拉取一次数据
val records:ConsumerRecords[String,String] = consumer.poll(Duration.ofMillis(1000).toMillis)
//利用迭代器取出records中数据
while(records.iterator().hasNext){
val res = records.iterator().next()
println(res.value())
}
}
}
}
下篇 https://blog.csdn.net/qq_44868502/article/details/103515122