【flume】自定义source

1.java代码

package cn.eud360.flume.source;

import org.apache.commons.io.FileUtils;
import org.apache.flume.Context;
import org.apache.flume.Event;
import org.apache.flume.EventDrivenSource;
import org.apache.flume.channel.ChannelProcessor;
import org.apache.flume.conf.Configurable;
import org.apache.flume.event.EventBuilder;
import org.apache.flume.source.AbstractSource;
import org.apache.flume.source.ExecSource;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.charset.Charset;
import java.util.concurrent.Executor;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;

/**
 * 用来监听一个文件的source
 * 特点:实时监听一个文件,只要有有新的内容产生,则收集起来
 * 可以记录偏移量信息(读到哪一行),如果flume出现问题,那么重启以后,需要接着上一次的偏移量继续读取
 */
public class TailFileSource extends AbstractSource implements EventDrivenSource, Configurable {
    private static final Logger logger = LoggerFactory.getLogger(ExecSource.class);
    //数据从哪里来
    private String filePath;

    //偏移量存到那里去
    private String posiPath;

    //采集数据间隔时间
    private Long interval;

    //数据字符集
    private String charset;

    private FileRunner fileRunner;

    private ExecutorService executor;

    /**
     * a1.sources.r1.type = cn.edu360.flume.source.TailFileSource
     * a1.sources.r1.filePath = /root/modules/nginx-1.14.2/logs/access.log
     * a1.sources.r1.posiFile = /root/modules/apache-flume-1.8.0-bin/posi.txt
     * a1.sources.r1.interval = 2000
     * a1.sources.r1.charset = UTF-8
     * @param context
     */
    @Override
    public void configure(Context context) {
        filePath = context.getString("filePath");
        posiPath = context.getString("posiFile");
        interval = context.getLong("interval");
        charset = context.getString("charset");
    }

    @Override
    public synchronized void start() {
        logger.info("TailFile source starting ...");
        //启动一个线程,用于监听对应的日志文件
        //只有一个线程(单线程)的线程池
        this.executor = Executors.newSingleThreadExecutor();

        //用channelProcessor发送数据给Channel
        ChannelProcessor channelProcessor = getChannelProcessor();
        fileRunner = new FileRunner(filePath, posiPath, interval,charset ,channelProcessor);

        executor.execute(fileRunner);
        super.start();
    }

    @Override
    public synchronized void stop() {
        //停止线程
        fileRunner.setFlag(false);
        //回收线程池
        executor.shutdown();

        /**
         * shutdown方法:平滑的关闭ExecutorService,当此方法被调用时,ExecutorService停止接收新的任务并且等待已经提交的任务(包含提交正在执行和提交未执行)执行完成。当所有提交任务执行完毕,线程池即被关闭。
         * awaitTermination方法:接收人timeout和TimeUnit两个参数,用于设定超时时间及单位。当等待超过设定时间时,会监测ExecutorService是否已经关闭,若关闭则返回true,否则返回false。一般情况下会和shutdown方法组合使用。
         */
        while(!this.executor.isTerminated()) {
            logger.debug("Waiting for exec executor service to stop");

            try {
                this.executor.awaitTermination(500L, TimeUnit.MILLISECONDS);
            } catch (InterruptedException var2) {
                logger.debug("Interrupted while waiting for exec executor service to stop. Just exiting.");
                Thread.currentThread().interrupt();
            }
        }
        super.stop();
    }

    private static class FileRunner implements Runnable{
        //数据从哪里来
        private String filePath;

        //偏移量存到那里去
        private String posiPath;

        //采集数据间隔时间
        private Long interval;

        //数据字符集
        private String charset;

        //获取存储的偏移量
        private Long offset =0L;

        //读取日志文件
        private RandomAccessFile raf;

        //偏移量文件实例
        private File pfile;

        private ChannelProcessor channelProcessor;

        private Boolean flag = true;

        //定义一个构造方法,接收外部传参
        private FileRunner(String filePath, String posiPath, long interval, String charset, ChannelProcessor channelProcessor) {
            this.filePath = filePath;
            this.posiPath = posiPath;
            this.interval = interval;
            this.charset = charset;
            this.channelProcessor = channelProcessor;

            //1:需要判断偏移量文件是否存在,如果存在就读取偏移量,如果不存在,则创建
            pfile = new File(posiPath);
            if(!pfile.exists()){
                try {
                    pfile.createNewFile();
                } catch (IOException e) {
                    logger.error("create position file error...");
                }
            }

            try {
                //读取偏移量文件的信息
                String offsetStr = FileUtils.readFileToString(pfile);
                if(offsetStr != null && !"".equals(offsetStr)){
                    offset = Long.parseLong(offsetStr);
                }

                //如果有偏移量,那么直接跳到偏移量对应的文件位置进行读取
                raf = new RandomAccessFile(filePath, "r");
                raf.seek(offset);
            } catch (IOException e) {
                logger.error("read position file error...");
            }
        }

        @Override
        public void run() {
            //需要不停的监听一个文件,如果有数据那么就需要不断的读取,如果没有的话,就休息
            //把读取的数据封装成Event对象
            //通过channelProcessor发送给channel
            //获取最新的偏移量写入到偏移量文件中
            while(flag){
                try {
                    String line = raf.readLine();
                    if (line != null){
                        //将数据包装到Event对象中
                        Event event = EventBuilder.withBody(line, Charset.forName(charset));
                        //发送Event对象给Channel
                        channelProcessor.processEvent(event);
                        //获取最新的偏移量数据,更新偏移量
                        offset = raf.getFilePointer();
                        FileUtils.writeStringToFile(pfile, offset.toString());
                    }else{
                        Thread.sleep(interval);
                    }
                } catch (IOException e) {
                    logger.error("read line error...");
                } catch (InterruptedException e){
                    logger.error("thread sleep error...");
                }
            }
        }

        public void setFlag(boolean flag){
            this.flag = flag;
        }
    }
}

2.将写好的java代码打包

3.将打包完成的jar包上传到安装flume的虚拟机上,

上传到对应flume的lib目录下

4.在flume的安装目录下,创建属于自己的conf文件

nginx-logger.conf

#定义agent名, source、channel、sink的名称
a1.sources = r1
a1.channels = c1
a1.sinks = k1

#具体定义source
#监听具体的class类(全类名)
a1.sources.r1.type = cn.edu360.flume.source.TailFileSource
#监听目录文件
a1.sources.r1.filePath = /root/modules/nginx-1.14.2/logs/access.log
#偏移量文件(存在读取,不存在创建)
a1.sources.r1.posiFile = /root/modules/apache-flume-1.8.0-bin/posi.txt
#时间间隔
a1.sources.r1.interval = 2000
#编码集
a1.sources.r1.charset = UTF-8


a1.sinks.k1.type = logger

#Event数据存储在内存中
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
#Channel每次提交的Event数量
a1.channels.c1.transactionCapacity = 100

a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1

5.启动nginx

6.启动flume
 【

bin/flume-ng agent -n a1 -c conf -f myconf/nginx-logger.conf -Dflume.root.logger=INFO,console

7.刷新nginx页面查看flume是否可以准确的捕获日志信息

猜你喜欢

转载自blog.csdn.net/weixin_39227099/article/details/86584711