kafka-spark偏移量提交至redis kafka1.0版本

kafka版本 1.0.0

spark版本 spark-streaming-kafka-0-10_2.11

class KafkaManagerByRedis(kafkaParams: Map[String, Object]) extends Serializable {


private val jedis = JedisUtil.getInstance().getJedis

/**
* def createDirectStream:InputDStream
**/

def createDirectStream[K: ClassTag, V: ClassTag](ssc: StreamingContext, topics: Seq[String]): InputDStream[ConsumerRecord[K, V]] = {
//1:readOffset
val groupId = kafkaParams("group.id").toString
val topic = topics(0)
val redisKey = topic + ":" + kafkaParams("group.id").toString

val offsetInfo = jedis.hgetAll(redisKey)
val stream: InputDStream[ConsumerRecord[K, V]] =
if ( offsetInfo.size() == 0) {
val newKafkaParams = mutable.Map[String, Object]()
newKafkaParams ++= kafkaParams
newKafkaParams.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest")
// 程序第一次启动
KafkaUtils.createDirectStream[K, V](
ssc,
PreferConsistent,
Subscribe[K, V](topics, newKafkaParams)
)
} else {
// val topicPartition: Map[TopicPartition, Long] = readOffset(topic, groupId)
// //2:KafkaUtils.createDirectStream ---> InputDStream
// KafkaUtils.createDirectStream[K, V](
// ssc,
// PreferConsistent,
// ConsumerStrategies.Subscribe[K, V](topics, kafkaParams, topicPartition)

val newKafkaParams = mutable.Map[String, Object]()
newKafkaParams ++= kafkaParams
newKafkaParams.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest")
// 程序第一次启动
KafkaUtils.createDirectStream[K, V](
ssc,
PreferConsistent,
Subscribe[K, V](topics, newKafkaParams)
)
}

stream
}

/**
* 读取偏移量
*
* @param topics
* @param groupId 消费组
* @return Map[car-1 , car-2 , Long]
**/

private def readOffset(topic: String, groupId: String): Map[TopicPartition, Long] = {
val topicPartitionMap = collection.mutable.HashMap.empty[TopicPartition, Long]
//拿topic和分区信息
/**
* 在redis中进行 存储 topic_consumerId:partition,offset
*/
val redisKey = topic + ":" + kafkaParams("group.id").toString
val map: util.Map[String, String] = jedis.hgetAll(redisKey)
val topicAndPartitionMaps: mutable.Map[String, String] = mapAsScalaMap(map)
topicAndPartitionMaps.foreach(partitionAndOffset => {
//将topic 、分区、偏移量返回出去
topicPartitionMap.put(new TopicPartition(topic, Integer.valueOf(partitionAndOffset._1.toInt)), partitionAndOffset._2.toLong)
})
//currentoffset 、 earliestoffset leatestOffset
//cur < ear || cur > leaty ==> 矫正--> ear
//TODO 矫正
val earliestOffsets = getEarliestOffsets(kafkaParams, topic)
val topics = List(topic)
val latestOffsets = getLatestOffsets(kafkaParams, topics)
for ((k, v) <- topicPartitionMap) {
val current = v
val earliest = earliestOffsets.get(k).get
val latest = latestOffsets.get(k).get
if (current < earliest || current > latest) {
topicPartitionMap.put(k, earliest)
}
}
topicPartitionMap.toMap
}


/**
* 获取最早的偏移量
*
* @param kafkaParams
* @param topics
* @return
*/
private def getEarliestOffsets(kafkaParams: Map[String, Object], topic: String) = {
val newKafkaParams = mutable.Map[String, Object]()
newKafkaParams ++= kafkaParams
newKafkaParams.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest")
//kafka api
val consumer = new KafkaConsumer(kafkaParams)
//订阅
val topics = Seq[String](topic)
consumer.subscribe(topics)
val noOffsetForPartitionExceptionSet: mutable.Set[Nothing] = mutable.Set()
try {
consumer.poll(0)
} catch {
case e: NoOffsetForPartitionException =>
// noOffsetForPartitionExceptionSet.add(e.partition())
//邮件报警
}
//获取 分区信息
val topicp = consumer.assignment().toSet
//暂定消费
consumer.pause(topicp)
//从头开始
consumer.seekToBeginning(topicp)
val toMap = topicp.map(line => line -> consumer.position(line)).toMap
val earliestOffsetMap = toMap
consumer.unsubscribe()
consumer.close()
earliestOffsetMap
}


private def getLatestOffsets(kafkaParams: Map[String, Object], topic: Seq[String]) = {
val newKafkaParams = mutable.Map[String, Object]()
newKafkaParams ++= kafkaParams
newKafkaParams.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "latest")

//kafka api
val consumer = new KafkaConsumer[String, Array[Byte]](newKafkaParams)
//订阅
consumer.subscribe(topic)
val noOffsetForPartitionExceptionSet = mutable.Set()
try {
consumer.poll(0)
} catch {
case e: NoOffsetForPartitionException =>
// noOffsetForPartitionExceptionSet.add(e.partition())
//邮件报警
}
//获取 分区信息
val topicp = consumer.assignment().toSet
//暂定消费
consumer.pause(topicp)
//从尾开始
consumer.seekToEnd(topicp)
val toMap: Map[TopicPartition, Long] = topicp.map(line => line -> consumer.position(line)).toMap
val earliestOffsetMap = toMap
consumer.unsubscribe()
consumer.close()
earliestOffsetMap
}


def persistOffset[K, V](rdd: RDD[ConsumerRecord[K, V]], storeOffset: Boolean = true, topic: String) = {

val groupId = kafkaParams("group.id").toString

val offsetRanges: Array[OffsetRange] = rdd.asInstanceOf[HasOffsetRanges].offsetRanges

offsetRanges.foreach(offsetRange => {
val redisKey = offsetRange.topic + "_" + groupId
val data = if (storeOffset) offsetRange.untilOffset else offsetRange.fromOffset
jedis.hset(redisKey, offsetRange.partition.toString, data.toString)
println("topic:" + offsetRange.topic + "分区:" + offsetRange.partition + "开始消费" + offsetRange.fromOffset + "消费到" + offsetRange.untilOffset + "共计" + offsetRange.count())
})


}

猜你喜欢

转载自www.cnblogs.com/hejunhong/p/12081028.html