1.java代码
package cn.eud360.flume.source;
import org.apache.commons.io.FileUtils;
import org.apache.flume.Context;
import org.apache.flume.Event;
import org.apache.flume.EventDrivenSource;
import org.apache.flume.channel.ChannelProcessor;
import org.apache.flume.conf.Configurable;
import org.apache.flume.event.EventBuilder;
import org.apache.flume.source.AbstractSource;
import org.apache.flume.source.ExecSource;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.charset.Charset;
import java.util.concurrent.Executor;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
/**
* 用来监听一个文件的source
* 特点:实时监听一个文件,只要有有新的内容产生,则收集起来
* 可以记录偏移量信息(读到哪一行),如果flume出现问题,那么重启以后,需要接着上一次的偏移量继续读取
*/
public class TailFileSource extends AbstractSource implements EventDrivenSource, Configurable {
private static final Logger logger = LoggerFactory.getLogger(ExecSource.class);
//数据从哪里来
private String filePath;
//偏移量存到那里去
private String posiPath;
//采集数据间隔时间
private Long interval;
//数据字符集
private String charset;
private FileRunner fileRunner;
private ExecutorService executor;
/**
* a1.sources.r1.type = cn.edu360.flume.source.TailFileSource
* a1.sources.r1.filePath = /root/modules/nginx-1.14.2/logs/access.log
* a1.sources.r1.posiFile = /root/modules/apache-flume-1.8.0-bin/posi.txt
* a1.sources.r1.interval = 2000
* a1.sources.r1.charset = UTF-8
* @param context
*/
@Override
public void configure(Context context) {
filePath = context.getString("filePath");
posiPath = context.getString("posiFile");
interval = context.getLong("interval");
charset = context.getString("charset");
}
@Override
public synchronized void start() {
logger.info("TailFile source starting ...");
//启动一个线程,用于监听对应的日志文件
//只有一个线程(单线程)的线程池
this.executor = Executors.newSingleThreadExecutor();
//用channelProcessor发送数据给Channel
ChannelProcessor channelProcessor = getChannelProcessor();
fileRunner = new FileRunner(filePath, posiPath, interval,charset ,channelProcessor);
executor.execute(fileRunner);
super.start();
}
@Override
public synchronized void stop() {
//停止线程
fileRunner.setFlag(false);
//回收线程池
executor.shutdown();
/**
* shutdown方法:平滑的关闭ExecutorService,当此方法被调用时,ExecutorService停止接收新的任务并且等待已经提交的任务(包含提交正在执行和提交未执行)执行完成。当所有提交任务执行完毕,线程池即被关闭。
* awaitTermination方法:接收人timeout和TimeUnit两个参数,用于设定超时时间及单位。当等待超过设定时间时,会监测ExecutorService是否已经关闭,若关闭则返回true,否则返回false。一般情况下会和shutdown方法组合使用。
*/
while(!this.executor.isTerminated()) {
logger.debug("Waiting for exec executor service to stop");
try {
this.executor.awaitTermination(500L, TimeUnit.MILLISECONDS);
} catch (InterruptedException var2) {
logger.debug("Interrupted while waiting for exec executor service to stop. Just exiting.");
Thread.currentThread().interrupt();
}
}
super.stop();
}
private static class FileRunner implements Runnable{
//数据从哪里来
private String filePath;
//偏移量存到那里去
private String posiPath;
//采集数据间隔时间
private Long interval;
//数据字符集
private String charset;
//获取存储的偏移量
private Long offset =0L;
//读取日志文件
private RandomAccessFile raf;
//偏移量文件实例
private File pfile;
private ChannelProcessor channelProcessor;
private Boolean flag = true;
//定义一个构造方法,接收外部传参
private FileRunner(String filePath, String posiPath, long interval, String charset, ChannelProcessor channelProcessor) {
this.filePath = filePath;
this.posiPath = posiPath;
this.interval = interval;
this.charset = charset;
this.channelProcessor = channelProcessor;
//1:需要判断偏移量文件是否存在,如果存在就读取偏移量,如果不存在,则创建
pfile = new File(posiPath);
if(!pfile.exists()){
try {
pfile.createNewFile();
} catch (IOException e) {
logger.error("create position file error...");
}
}
try {
//读取偏移量文件的信息
String offsetStr = FileUtils.readFileToString(pfile);
if(offsetStr != null && !"".equals(offsetStr)){
offset = Long.parseLong(offsetStr);
}
//如果有偏移量,那么直接跳到偏移量对应的文件位置进行读取
raf = new RandomAccessFile(filePath, "r");
raf.seek(offset);
} catch (IOException e) {
logger.error("read position file error...");
}
}
@Override
public void run() {
//需要不停的监听一个文件,如果有数据那么就需要不断的读取,如果没有的话,就休息
//把读取的数据封装成Event对象
//通过channelProcessor发送给channel
//获取最新的偏移量写入到偏移量文件中
while(flag){
try {
String line = raf.readLine();
if (line != null){
//将数据包装到Event对象中
Event event = EventBuilder.withBody(line, Charset.forName(charset));
//发送Event对象给Channel
channelProcessor.processEvent(event);
//获取最新的偏移量数据,更新偏移量
offset = raf.getFilePointer();
FileUtils.writeStringToFile(pfile, offset.toString());
}else{
Thread.sleep(interval);
}
} catch (IOException e) {
logger.error("read line error...");
} catch (InterruptedException e){
logger.error("thread sleep error...");
}
}
}
public void setFlag(boolean flag){
this.flag = flag;
}
}
}
2.将写好的java代码打包
3.将打包完成的jar包上传到安装flume的虚拟机上,
上传到对应flume的lib目录下
4.在flume的安装目录下,创建属于自己的conf文件
nginx-logger.conf
#定义agent名, source、channel、sink的名称
a1.sources = r1
a1.channels = c1
a1.sinks = k1
#具体定义source
#监听具体的class类(全类名)
a1.sources.r1.type = cn.edu360.flume.source.TailFileSource
#监听目录文件
a1.sources.r1.filePath = /root/modules/nginx-1.14.2/logs/access.log
#偏移量文件(存在读取,不存在创建)
a1.sources.r1.posiFile = /root/modules/apache-flume-1.8.0-bin/posi.txt
#时间间隔
a1.sources.r1.interval = 2000
#编码集
a1.sources.r1.charset = UTF-8
a1.sinks.k1.type = logger
#Event数据存储在内存中
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
#Channel每次提交的Event数量
a1.channels.c1.transactionCapacity = 100
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1
5.启动nginx
6.启动flume
【
bin/flume-ng agent -n a1 -c conf -f myconf/nginx-logger.conf -Dflume.root.logger=INFO,console
】
7.刷新nginx页面查看flume是否可以准确的捕获日志信息