Ck(三) 灵活批量写入大量数据至多张集群表

一、前言

今天有一个批量写入多张集群表数据的任务。要求集群每个节点都要写入一定数据。于是我多花了几个小时,写了一个灵活的批量写入代码。以后再有类似任务可以直接拿来用。经测试是没问题的,但是细节可能写的不好。有空再改。

主要解决问题:Clickhouse批量写入多个节点数据。

二、开始

1、首先是Clickhouse的数据源初始化。正常使用是考虑使用BalancedClickhouseDataSource,但是我想代码控制写入数据量的均匀,所以使用的ClickHouseDataSource。

public class ClickhouseDataSource {

    private volatile static List<DataSource> sources = null;

    public static List<DataSource> getDataSource() {
        if (sources == null) {
            synchronized (ClickhouseDataSource.class) {
                if (sources == null) {
                    List<DataSource> result = Lists.newArrayList();
                    List<Map<String, Object>> collect = ConfigUtils.getConfig().getList("store.clickhouse.connect").stream().map(configValue -> (Map<String, Object>) configValue.unwrapped())
                            .collect(Collectors.toList());
                    List<String> urlList = collect.stream().map(node -> (String) node.get("connection-url")).collect(Collectors.toList());
                    Map<String, Object> unwrapped = (Map<String, Object>) ConfigUtils.getConfig().getValue("store.clickhouse.param").unwrapped();
                    ClickHouseProperties ckProperties = new ClickHouseProperties();
                    ckProperties.setMaxBlockSize(80000 * 1000);
                    ckProperties.setMaxMemoryUsage(300000000000L);
                    ckProperties.setMaxTotal(1000);
                    ckProperties.setUseServerTimeZone(false);
                    ckProperties.setUseServerTimeZoneForDates(false);
                    ckProperties.setUseTimeZone((String) unwrapped.get("zone"));
                    ckProperties.setDefaultMaxPerRoute(500);
                    ckProperties.setConnectionTimeout(1500 * 1000);
                    ckProperties.setKeepAliveTimeout(-1);
                    ckProperties.setSocketTimeout(Integer.MAX_VALUE);
                    //ckProperties.setUser((String) unwrapped.get("user"));
                    //ckProperties.setPassword((String) unwrapped.get("password"));
                    sources = Lists.newArrayList();
                    for (String url : urlList) {
                        DataSource ds = new ClickHouseDataSource(url, ckProperties);
                        sources.add(ds);
                    }

                }
            }
        }
        return sources;
    }
}

2、ck 批量写入。将数据转成TSV后写入。ClickhouseDao中通过dbName和tableName查询system.columns来获得列的名称和类型。即知道类型那么即可根据类型来模拟数据

public class BatchStringHttpEntity extends AbstractHttpEntity {

    private List<String> batchRows;
    public static final String NULL = "\\N";

    public BatchStringHttpEntity(List<String> batchRows) {
        this.batchRows = batchRows;
    }

    @Override
    public boolean isStreaming() {
        return false;
    }

    @Override
    public boolean isRepeatable() {
        return true;
    }

    @Override
    public long getContentLength() {
        return -1;
    }

    @Override
    public InputStream getContent() throws IOException, IllegalStateException {
        throw new UnsupportedOperationException();
    }

    @Override
    public void writeTo(OutputStream outputStream) throws IOException {
        for (String row : batchRows) {
            outputStream.write(row.getBytes(StreamUtils.UTF_8));
        }
    }
}
@Slf4j
public class ClickhouseDao {

    private DataSource dataSource;
    public ClickhouseDao(DataSource dataSource){
        this.dataSource = dataSource;
    }


    private void close(ResultSet resultSet, Statement statement, Connection connection){
        if(resultSet != null){
            try {
                resultSet.close();
            } catch (SQLException e) {
                e.printStackTrace();
            }
        }
        if(statement != null){
            try {
                statement.close();
            } catch (SQLException e) {
                e.printStackTrace();
            }
        }
        if(connection != null){
            try {
                connection.close();
            } catch (SQLException e) {
                e.printStackTrace();
            }
        }
    }


    public void batchInsert(String databaseName,String tableName) {
        log.info("batch Insert start ! db : "+databaseName+",table : " +tableName);
        LinkedHashMap<String, String> paramType = getParamType(databaseName, tableName);
        log.info("start mock data ! db : "+databaseName+",table : " +tableName);
        List<LinkedHashMap<String, Object>> paramList = mockData(paramType);
        log.info("end mock data ! db : "+databaseName+",table : " +tableName);
        List<String> batch = transferBatch(paramList,paramType);
        log.info("transfer data to TSV end ! db : "+databaseName+",table : " +tableName);
        try {
            String sql = "INSERT INTO `"+ databaseName +  "`.`" + tableName + "`";
            basicBatchInsert(sql,batch);
            log.info("batch Insert end ! db : "+databaseName+",table : " +tableName);
        } catch (SQLException e) {
            log.error("batchInsert Data Failed ! exception is {}",e);
            throw new RuntimeException("batchInsert Data Failed !");
        }
    }


    private List<LinkedHashMap<String, Object>> mockData(LinkedHashMap<String, String> paramType){
        return MockData.mockData(paramType);
    }
    private LinkedHashMap<String, String> getParamType(String databaseName, String tableName) {
        String sql = "SELECT name,type FROM system.columns where database = '"+databaseName+"' AND table = '"+ tableName+"' AND name not in ('_cw_uuid','_cw_insert_time')";
        List<CkColumn> ckColumns = queryMetaBySql(sql);
        LinkedHashMap<String, String> result = Maps.newLinkedHashMap();
        for (CkColumn property : ckColumns){
            result.put(property.getName(),property.getType());
        }
        return result;
    }


    public List<String> transferBatch(List<LinkedHashMap<String, Object>> paramList,Map<String, String> paramType) {

        List<String> batch = Lists.newArrayList();
        Map<String,String> nameAndType = paramType;
        Object value = null;
        for (int i = 0; i < paramList.size(); i++) {
            LinkedHashMap<String, Object> param = paramList.get(i);
            StringBuilder tsvSb = new StringBuilder();
            List<String> strings = new ArrayList(nameAndType.keySet());
            int size = strings.size();
            for (int j =0 ; j < size ; j++) {
                String columnName = strings.get(j);
                value = param.containsKey(columnName) ? param.get(columnName) : null;
                if (value == null || StringUtils.isEmpty(value.toString())) {
                    tsvSb.append(BatchStringHttpEntity.NULL);
                } else {
                    tsvSb.append(value.toString());
                }

                tsvSb.append(j < nameAndType.keySet().size()-1 ? '\t' : '\n');

            }
            batch.add(tsvSb.toString());
        }
        return batch;
    }

    public void basicBatchInsert(String sql, List<String> batch) throws SQLException {
        Connection connection = null;
        ClickHouseStatementImpl statement = null;
        try {
            connection = dataSource.getConnection();
            statement = (ClickHouseStatementImpl)connection.createStatement();
            statement.sendStream(new BatchStringHttpEntity(batch),sql);

        }finally {
            statement.close();
            connection.close();
        }
    }


    public List<CkColumn> queryMetaBySql(String sql){
        Connection conn = null;
        PreparedStatement stmt = null;
        ResultSet rs = null;
        try {
            conn = dataSource.getConnection();
            stmt = conn.prepareStatement(sql);
            rs = stmt.executeQuery(sql);
            ResultSetMetaData metaData = rs.getMetaData();
            List<CkColumn> list = readRows(CkColumn.class, rs, true);
            return list;
        } catch (SQLException e) {
            e.printStackTrace();
            throw new RuntimeException("Get sql connect failed ! message :"+ExceptionUtils.getRootCauseMessage(e));
        }finally {
            close(rs,stmt,conn);
        }
    }


    protected <T> List readRows(Class<T> clazz, ResultSet resultSet, boolean formatJson) throws SQLException {
        ImmutableList.Builder rows = ImmutableList.builder();
        Enhancer enhancer = new Enhancer();
        if (clazz != null) {
            enhancer.setSuperclass(clazz);
            enhancer.setCallback(new MethodInterceptor() {
                @Override
                public Object intercept(Object obj, Method method, Object[] args, MethodProxy proxy) throws Throwable {
                    return proxy.invokeSuper(obj, args);
                }
            });
        }

        Map<String, FastMethod> fastMethods = null;
        Map<String, Integer> fieldNameIndexMap = new HashMap<>();
        if (null != clazz) {
            fastMethods = getClassSetFastMethodMap(clazz);
        }
        ResultSetMetaData resultSetMetaData = resultSet.getMetaData();
        for (int i = 1; i < resultSetMetaData.getColumnCount() + 1; i++) {
            String columnName = resultSetMetaData.getColumnName(i);
            if (null != clazz) {
                if (fastMethods.containsKey(columnName)) {
                    fieldNameIndexMap.put(columnName, i);
                }
            }
            else {
                fieldNameIndexMap.put(columnName, i);
            }
        }

        while (resultSet.next()) {
            if (null != clazz) {
                T t = (T) enhancer.create();
                for (String field : fieldNameIndexMap.keySet()) {
                    FastMethod fastMethod = fastMethods.get(field);
                    if (fastMethod != null) {
                        try {
                            fastMethod.invoke(t, new Object[]{resultSet.getObject(fieldNameIndexMap.get(field))});
                        } catch (InvocationTargetException e) {
                            log.error("ClickhouseBaseDao searchListBySql column:[{}] fastMethod [{}] set resultSet error {}", field, fastMethod.getName(),ExceptionUtils.getRootCause(e));
                            throw new RuntimeException(e);
                        }
                    }
                }
                rows.add(t);
            } else {
                if (formatJson) {
                    Map<String, Object> map = new HashMap<>();
                    for (String field : fieldNameIndexMap.keySet()) {
                        map.put(field, resultSet.getObject(fieldNameIndexMap.get(field)));
                    }
                    rows.add(map);
                } else {
                    List<Object> list = new ArrayList<>();
                    for (int i = 1; i <= fieldNameIndexMap.size(); i++) {
                        list.add(resultSet.getObject(i));
                    }
                    rows.add(list);
                }
            }
        }
        return rows.build();
    }


    private static Map<String, Map<String, FastMethod>> classSetFastMethodMap = new HashMap<String, Map<String, FastMethod>>();

    public static Map<String, FastMethod> getClassSetFastMethodMap(Class clazz) {

        String className = clazz.getName();
        FastClass fastClass = FastClass.create(clazz);
        Map<String, FastMethod> setFastMethodMap;
        if (classSetFastMethodMap.containsKey(className)) {
            setFastMethodMap = classSetFastMethodMap.get(className);
        } else {
            setFastMethodMap = new HashMap<String, FastMethod>();
            List<Field> fields = GenericsUtils.getAllFields(clazz);

            if (CollectionUtils.isNotEmpty(fields)) {
                for (int i = 0, length = fields.size(); i < length; i++) {
                    java.lang.reflect.Field field = fields.get(i);
                    if (!field.isAnnotationPresent(Transient.class)) {
                        String fieldName = field.getName();
                        setFastMethodMap.put(fieldName, fastClass.getMethod(String.format("set%s", StringUtils.capitalize(fieldName)),
                                new Class[]{field.getType()}));
                    }
                }
            }
            classSetFastMethodMap.put(className, setFastMethodMap);
        }
        return setFastMethodMap;
    }
}
public class MockData {

    public static List<LinkedHashMap<String, Object>> mockData(LinkedHashMap<String, String> paramType){

        List<LinkedHashMap<String,Object>> result = Lists.newArrayList();
        for (int i = 0; i < 5000; i++) {
            LinkedHashMap<String, Object> stringObjectMap = mockOneData(paramType);
            result.add(stringObjectMap);
        }
        return result;
    }

    private static LinkedHashMap<String,Object> mockOneData(LinkedHashMap<String, String> paramType){
        LinkedHashMap<String,Object> data = Maps.newLinkedHashMap();
        for (String name : paramType.keySet()){
            String ckType =  paramType.get(name);
            CkTypeEnum anEnum = CkTypeEnum.getEnum(ckType);
            Object o = null;
            if(anEnum != null){
                o = anEnum.mockData();
            }
            data.put(name,o);
        }
        return data;
    }
}
public enum CkTypeEnum {
    Int64{
        @Override
        Object mockData() {
            return randomDate();
        }
    },Int32{
        @Override
        Object mockData() {
            return (int)((Math.random()*9+1) *10000);
        }
    },String{
        @Override
        Object mockData() {
            java.lang.String s = params.get(random.nextInt(params.size()));
            s = "Date : [" + DateFormatUtils.format(new Date(),"yyyy-MM-dd HH:mm:ss") +"]_" + s + counter.getAndIncrement();
            return s;
        }
    },Float64{
        @Override
        Object mockData() {
            float min = 100f;
            float max = 100000f;
            return min + new Random().nextFloat() * (max - min);

        }
    },Boolean{
        @Override
        Object mockData() {
            return random.nextBoolean();
        }
    };
    Boolean nullable;
    Random random = new Random();
    abstract Object mockData();
    public static CkTypeEnum getEnum(String ckType){
        String pre = "Nullable(";
        boolean nullable = true;
        if(ckType.startsWith(pre)){
            nullable = false;
            ckType = StringUtils.substring(ckType,pre.length(),ckType.length()-1);
        }
        CkTypeEnum ckTypeEnum = Enums.getIfPresent(CkTypeEnum.class,ckType).orNull();
        if(ckTypeEnum == null){
            return null;
        }
        ckTypeEnum.nullable = nullable;
        return ckTypeEnum;
    }

    public static List<String> params = ConfigUtils.getConfig().getStringList("param.string");
    public static AtomicLong counter = new AtomicLong();
    private static long randomDate(){
        try {

            Date end = new Date();
            Date start = DateUtils.addDays(end,-1);

            if(start.getTime() >= end.getTime()){
                return 0L;
            }
            long date = random(start.getTime(),end.getTime());
            return new Date(date).getTime();
        } catch (Exception e) {
            e.printStackTrace();
        }
        return 0L;
    }

    private static long random(long begin,long end) {
        long rtn = begin + (long) (Math.random() * (end - begin));
        if (rtn == begin || rtn == end) {
            return random(begin, end);
        }
        return rtn;
    }
}
@Data
public class CkColumn {

    private String name;
    private String type;
}

3、向多个节点写入,使用线程池,每个节点对应一个线程写入。

线程内部逻辑

@Slf4j
public class CkInsertData extends Thread {

    private CountDownLatch latch;
    private int threadNum;

    public CkInsertData(CountDownLatch latch, int threadNum) {
        this.latch = latch;
        this.threadNum = threadNum;
    }

    @Override
    public void run() {
        try {
            String shardName = "shard_" + threadNum;
            DataSource dataSource = ClickhouseDataSource.getDataSource().get(threadNum-1);
            String[] split = ConfigUtils.getConfig().getString("ck.tables").split(",");
            ClickhouseDao clickhouseDao = new ClickhouseDao(dataSource);
            for (int i = 0; i < 10 * 20; i++) {

                for (String name : split) {
                    String dataBasesName = shardName;
                    String tableName = name.replaceAll(" ", "");
                    clickhouseDao.batchInsert(dataBasesName, tableName);
                }
            }
        }finally {
            latch.countDown();
        }
    }
}

主逻辑,启动逻辑

ublic class Test {

    private static ExecutorService es = Executors.newFixedThreadPool(6);

    public static void main(String[] args) throws InterruptedException {

        CountDownLatch latch = new CountDownLatch(6);
        for (int i = 1; i <= 6; i++) {
            es.execute(new CkInsertData(latch,i));
        }
        latch.await();
        System.out.println("完成!");
    }
}
发布了78 篇原创文章 · 获赞 6 · 访问量 8515

猜你喜欢

转载自blog.csdn.net/MrBack/article/details/104488953
今日推荐