自定拦截器flume

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/wangshuminjava/article/details/83931977

# example.conf: A single-node Flume configuration

# Name the components on this agent
a1.sources = r1
a1.sinks = k1
a1.channels = c1

# Describe/configure the source
a1.sources.r1.type = org.apache.flume.source.kafka.KafkaSource
a1.sources.r1.kafka.bootstrap.servers = hdp4:6667,hdp5:6667,hdp6:6667
a1.sources.r1.kafka.consumer.group.id = flume_test
a1.sources.r1.kafka.topics = IOT_DS_DATA_BACK2

a1.sources.r1.interceptors =i1
# 自定义拦截器
a1.sources.r1.interceptors.i1.type =com.springboot.MongoDB.flume.kafkaInterceptor$Builder
# Describe the sink
a1.sinks.k1.type = hdfs
a1.sinks.k1.hdfs.path = hdfs://hdp1.hdp:8020/IOT/10/100010010/20%y-%m-%d/%H-%M
#a1.sinks.k1.hdfs.path = hdfs://hdp1.hdp:8020/IOT/%{orgId100}/%{orgId}/20%y-%m-%d/%H-%M
a1.sinks.k1.hdfs.useLocalTimeStamp = true
#下沉后, 生成的文件类型,默认是Sequencefile,可用DataStream,则为普通文本  a1.sinks.k1.hdfs.fileType = DataStream
a1.sinks.k1.hdfs.writeFormat = Text
a1.sinks.k1.hdfs.fileType = DataStream
#前缀
a1.sinks.k1.hdfs.filePrefix = data
#后缀
a1.sinks.k1.hdfs.fileSuffix=.json
#文件回滚之前等待的时时间单位 秒
a1.sinks.k1.hdfs.rollInterval = 1800
#文件滚动大的大小限制(bytes)128000000  /1024/1024
a1.sinks.k1.hdfs.rollSize =  800000000
#写入多少个 event 数据后滚动文件(事件个数)
a1.sinks.k1.hdfs.rollCount = 70000
#hdfs sink启动的操作HDFS的线程数。
a1.sinks.k1.hdfs.threadsPoolSize = 300
#默认值:1,hdfs sink 启动的根据时间滚动文件的线程数。
a1.sinks.k1.hdfs.rollTimerPoolSize = 10
# 事件就往里面写入
a1.sinks.k1.hdfs.batchSize = 10000

# 10 分钟就创建文件
a1.sinks.k1.hdfs.round = true
a1.sinks.k1.hdfs.roundValue = 10
a1.sinks.k1.hdfs.roundUnit = minute
#用本地时间格式化目录
a1.sinks.k1.hdfs.useLocalTimeStamp = true

# Use a channel which buffers events in memory
a1.channels.c1.type = memory
a1.channels.c1.capacity = 8000000
a1.channels.c1.transactionCapacity = 100000
a1.channels.c1.keep-alive = 100


# Bind the source and sink to the channel
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1

猜你喜欢

转载自blog.csdn.net/wangshuminjava/article/details/83931977