自定义的sink需要继承Flume的AbstractSink类,最好实现Configurable接口,实现了该接口以后,自定义组件中的一些参数就可以通过在配置文件中实现。
process方法定义了事件的处理逻辑。configure方法用于获取组件的自定参数。
可以配置的参数的值都通过该方法来获取。不需要配置的参数,但是又必须的参数可以以实例成员变量的方式给出。通过这种灵活的手段,可以设置一些既可以让用户配置,又有默认值的一些参数。具体方法是,在configure方法中,先获取用户自定义的值,如果该值不合法,或者不合逻辑的话,就使用程序中提供的默认值。比如SqlServerSink组件的batchSize参数,该数值既可以由用户来设置,也有默认的值。
process方法和configure方法是必须实现的。
start方法是可以重写的。在该方法中,进行sqlserver连接的初始化。比如加载驱动,获得连接,为了提高程序的运行效率,在插入数据的时候,使用批量提交数据。所以在start方法中可以将JDBC的自动提交功能关闭。
destroyConnection方法是自定义方法,该方法在程序遇到异常或者终止(SqlServerSink的stop方法被调用)的时候,用于释放本地资源。
toString方法用于输出关于自定义Sink组件的一些信息。
代码中有部分信息需要读者自己去实现,如,获得sql语句的方法,批量处理失败的时候,是否需要将此类信息以email的方式或者其他方式通知某些人。
整个过程结构清晰,不需要过多解释,如有不明白,欢迎留言。
代码如下:
package cn.ancony.flumeSink; import com.google.common.collect.Lists; import org.apache.flume.*; import org.apache.flume.conf.Configurable; import org.apache.flume.sink.AbstractSink; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.sql.*; import java.util.List; public class SqlServerSink extends AbstractSink implements Configurable { private static final Logger LOG = LoggerFactory.getLogger(SqlServerSink.class); private static final int DEFAULT_BATCHSIZE = 500; private String url;//必须配置的字段 private String user;//必须配置的字段 private String password;//必须配置的字段 private Integer batchSize;//可选配置的字段 private Connection connection; private Statement statement; @Override public Status process() throws EventDeliveryException { Status status = Status.READY; Channel channel = getChannel(); Transaction transaction = channel.getTransaction(); List<String> actions = Lists.newArrayList(); try { transaction.begin(); // This try clause includes whatever Channel/Event operations you want to do long processedEvent = 0; for (; processedEvent < batchSize; processedEvent++) { // Receive new data Event event = channel.take(); if (event == null) { status = Status.BACKOFF; break; } else { //TODO 得到事件的header和body, //TODO 如果需要事件的header信息,按照如下的方式获取,前提是在source中配置了header的信息。 //String file = event.getHeaders().get("file"); //LOG.info("file is: " + file); //String body = new String(event.getBody()); //LOG.info("body is: " + body); //TODO 根据header和body进行处理,得到需要执行的sql语句。 //TODO 得到需要的sql语句,根据实际情况自定义 String sql = null; //sql = SqlServerUtils.getSQL(file, body); LOG.info("sql is: " + sql); if (sql == null) { continue; } //TODO 将执行的语句放入set之中。 actions.add(sql); } } if (actions.size() > 0) { for (String sql : actions) { statement.addBatch(sql); LOG.info("add sql to batch:{}", sql); } statement.executeBatch(); connection.commit(); } status = Status.READY; transaction.commit(); } catch (ChannelException e) { transaction.rollback(); LOG.error("Unable to get event from channel. Exception follows.", e); status = Status.BACKOFF; } catch (BatchUpdateException e) { transaction.rollback(); //将异常写入Jermey的报警系统 //TODO 如果批量提交失败,请提供自己的处理逻辑 //SqlServerUtils ssu = new SqlServerUtils(); //ssu.alarm(e); LOG.error("Batch update exception, is: " + e + ", it's stack trace is :"); e.printStackTrace(); } catch (Exception e) { transaction.rollback(); LOG.error("Unable to communicate with sqlserver server. Exception follows.", e); status = Status.BACKOFF; destroyConnection(); } finally { transaction.close(); } return status; } //获得数据库的连接,并且初始化statement. @Override public synchronized void start() { LOG.info("SqlServerSink start..."); try { //1.加载驱动 Class.forName("com.microsoft.sqlserver.jdbc.SQLServerDriver"); LOG.info("sqlserver driver load success."); } catch (Exception e) { e.printStackTrace(); LOG.error("Unable to load sqlserver driver using com.microsoft.sqlserver.jdbc.SQLServerDriver. exception is: " + e); throw new RuntimeException(e); } try { //2.获得连接 connection = DriverManager.getConnection(url, user, password); //TODO 3.准备statement。是否需要一些参数。 statement = connection.createStatement(ResultSet.TYPE_SCROLL_SENSITIVE, ResultSet.CONCUR_READ_ONLY); //关闭自动提交 connection.setAutoCommit(false); } catch (Exception e) { LOG.error("Unable to create sqlserver connection using url=" + url + ", user=" + user + ", password=" + password + ".exception is: " + e); /* Try to prevent leaking resources. */ destroyConnection(); throw new RuntimeException(e); } super.start(); LOG.info("SqlServerSink sink {} started", this.getName()); } //连接的销毁。一个是销毁statement,另外一个是销毁connection. private void destroyConnection() { if (statement != null) { try { statement.close(); } catch (SQLException e) { e.printStackTrace(); } } statement = null; if (connection != null) { LOG.debug("Destroying connection to: {}", url); try { connection.close(); } catch (SQLException e) { e.printStackTrace(); } } connection = null; } //sqlServer在停止的时候需要销毁与数据库的连接 @Override public synchronized void stop() { LOG.info("sqlserver sink {} stopping", getName()); destroyConnection(); super.stop(); LOG.info("sqlserver sink {} stopped", this.getName()); } @Override public void configure(Context context) { url = context.getString("url"); if (url == null) { throw new IllegalArgumentException("url config setting is not " + "specified for sink " + getName()); } user = context.getString("user"); if (user == null) { throw new IllegalArgumentException("user config setting is not " + "specified for sink " + getName()); } password = context.getString("password"); if (password == null) { throw new IllegalArgumentException("password config setting is not " + "specified for sink " + getName()); } //对于非必须配置的值,先读取配置文件,如果有值就读取,如果没有值,就使用默认的值。并作为一个警告。 batchSize = context.getInteger("batchSize", DEFAULT_BATCHSIZE); if (batchSize < 0) { LOG.warn(getName() + ". batchSize must be positive number. Defaulting to " + DEFAULT_BATCHSIZE); batchSize = DEFAULT_BATCHSIZE; } } @Override public String toString() { return "{ Sink type:" + getClass().getSimpleName() + ", name:" + getName() + " }"; } }