一、前言
今天有一个批量写入多张集群表数据的任务。要求集群每个节点都要写入一定数据。于是我多花了几个小时,写了一个灵活的批量写入代码。以后再有类似任务可以直接拿来用。经测试是没问题的,但是细节可能写的不好。有空再改。
主要解决问题:Clickhouse批量写入多个节点数据。
二、开始
1、首先是Clickhouse的数据源初始化。正常使用是考虑使用BalancedClickhouseDataSource,但是我想代码控制写入数据量的均匀,所以使用的ClickHouseDataSource。
public class ClickhouseDataSource {
private volatile static List<DataSource> sources = null;
public static List<DataSource> getDataSource() {
if (sources == null) {
synchronized (ClickhouseDataSource.class) {
if (sources == null) {
List<DataSource> result = Lists.newArrayList();
List<Map<String, Object>> collect = ConfigUtils.getConfig().getList("store.clickhouse.connect").stream().map(configValue -> (Map<String, Object>) configValue.unwrapped())
.collect(Collectors.toList());
List<String> urlList = collect.stream().map(node -> (String) node.get("connection-url")).collect(Collectors.toList());
Map<String, Object> unwrapped = (Map<String, Object>) ConfigUtils.getConfig().getValue("store.clickhouse.param").unwrapped();
ClickHouseProperties ckProperties = new ClickHouseProperties();
ckProperties.setMaxBlockSize(80000 * 1000);
ckProperties.setMaxMemoryUsage(300000000000L);
ckProperties.setMaxTotal(1000);
ckProperties.setUseServerTimeZone(false);
ckProperties.setUseServerTimeZoneForDates(false);
ckProperties.setUseTimeZone((String) unwrapped.get("zone"));
ckProperties.setDefaultMaxPerRoute(500);
ckProperties.setConnectionTimeout(1500 * 1000);
ckProperties.setKeepAliveTimeout(-1);
ckProperties.setSocketTimeout(Integer.MAX_VALUE);
//ckProperties.setUser((String) unwrapped.get("user"));
//ckProperties.setPassword((String) unwrapped.get("password"));
sources = Lists.newArrayList();
for (String url : urlList) {
DataSource ds = new ClickHouseDataSource(url, ckProperties);
sources.add(ds);
}
}
}
}
return sources;
}
}
2、ck 批量写入。将数据转成TSV后写入。ClickhouseDao中通过dbName和tableName查询system.columns来获得列的名称和类型。即知道类型那么即可根据类型来模拟数据
public class BatchStringHttpEntity extends AbstractHttpEntity {
private List<String> batchRows;
public static final String NULL = "\\N";
public BatchStringHttpEntity(List<String> batchRows) {
this.batchRows = batchRows;
}
@Override
public boolean isStreaming() {
return false;
}
@Override
public boolean isRepeatable() {
return true;
}
@Override
public long getContentLength() {
return -1;
}
@Override
public InputStream getContent() throws IOException, IllegalStateException {
throw new UnsupportedOperationException();
}
@Override
public void writeTo(OutputStream outputStream) throws IOException {
for (String row : batchRows) {
outputStream.write(row.getBytes(StreamUtils.UTF_8));
}
}
}
@Slf4j
public class ClickhouseDao {
private DataSource dataSource;
public ClickhouseDao(DataSource dataSource){
this.dataSource = dataSource;
}
private void close(ResultSet resultSet, Statement statement, Connection connection){
if(resultSet != null){
try {
resultSet.close();
} catch (SQLException e) {
e.printStackTrace();
}
}
if(statement != null){
try {
statement.close();
} catch (SQLException e) {
e.printStackTrace();
}
}
if(connection != null){
try {
connection.close();
} catch (SQLException e) {
e.printStackTrace();
}
}
}
public void batchInsert(String databaseName,String tableName) {
log.info("batch Insert start ! db : "+databaseName+",table : " +tableName);
LinkedHashMap<String, String> paramType = getParamType(databaseName, tableName);
log.info("start mock data ! db : "+databaseName+",table : " +tableName);
List<LinkedHashMap<String, Object>> paramList = mockData(paramType);
log.info("end mock data ! db : "+databaseName+",table : " +tableName);
List<String> batch = transferBatch(paramList,paramType);
log.info("transfer data to TSV end ! db : "+databaseName+",table : " +tableName);
try {
String sql = "INSERT INTO `"+ databaseName + "`.`" + tableName + "`";
basicBatchInsert(sql,batch);
log.info("batch Insert end ! db : "+databaseName+",table : " +tableName);
} catch (SQLException e) {
log.error("batchInsert Data Failed ! exception is {}",e);
throw new RuntimeException("batchInsert Data Failed !");
}
}
private List<LinkedHashMap<String, Object>> mockData(LinkedHashMap<String, String> paramType){
return MockData.mockData(paramType);
}
private LinkedHashMap<String, String> getParamType(String databaseName, String tableName) {
String sql = "SELECT name,type FROM system.columns where database = '"+databaseName+"' AND table = '"+ tableName+"' AND name not in ('_cw_uuid','_cw_insert_time')";
List<CkColumn> ckColumns = queryMetaBySql(sql);
LinkedHashMap<String, String> result = Maps.newLinkedHashMap();
for (CkColumn property : ckColumns){
result.put(property.getName(),property.getType());
}
return result;
}
public List<String> transferBatch(List<LinkedHashMap<String, Object>> paramList,Map<String, String> paramType) {
List<String> batch = Lists.newArrayList();
Map<String,String> nameAndType = paramType;
Object value = null;
for (int i = 0; i < paramList.size(); i++) {
LinkedHashMap<String, Object> param = paramList.get(i);
StringBuilder tsvSb = new StringBuilder();
List<String> strings = new ArrayList(nameAndType.keySet());
int size = strings.size();
for (int j =0 ; j < size ; j++) {
String columnName = strings.get(j);
value = param.containsKey(columnName) ? param.get(columnName) : null;
if (value == null || StringUtils.isEmpty(value.toString())) {
tsvSb.append(BatchStringHttpEntity.NULL);
} else {
tsvSb.append(value.toString());
}
tsvSb.append(j < nameAndType.keySet().size()-1 ? '\t' : '\n');
}
batch.add(tsvSb.toString());
}
return batch;
}
public void basicBatchInsert(String sql, List<String> batch) throws SQLException {
Connection connection = null;
ClickHouseStatementImpl statement = null;
try {
connection = dataSource.getConnection();
statement = (ClickHouseStatementImpl)connection.createStatement();
statement.sendStream(new BatchStringHttpEntity(batch),sql);
}finally {
statement.close();
connection.close();
}
}
public List<CkColumn> queryMetaBySql(String sql){
Connection conn = null;
PreparedStatement stmt = null;
ResultSet rs = null;
try {
conn = dataSource.getConnection();
stmt = conn.prepareStatement(sql);
rs = stmt.executeQuery(sql);
ResultSetMetaData metaData = rs.getMetaData();
List<CkColumn> list = readRows(CkColumn.class, rs, true);
return list;
} catch (SQLException e) {
e.printStackTrace();
throw new RuntimeException("Get sql connect failed ! message :"+ExceptionUtils.getRootCauseMessage(e));
}finally {
close(rs,stmt,conn);
}
}
protected <T> List readRows(Class<T> clazz, ResultSet resultSet, boolean formatJson) throws SQLException {
ImmutableList.Builder rows = ImmutableList.builder();
Enhancer enhancer = new Enhancer();
if (clazz != null) {
enhancer.setSuperclass(clazz);
enhancer.setCallback(new MethodInterceptor() {
@Override
public Object intercept(Object obj, Method method, Object[] args, MethodProxy proxy) throws Throwable {
return proxy.invokeSuper(obj, args);
}
});
}
Map<String, FastMethod> fastMethods = null;
Map<String, Integer> fieldNameIndexMap = new HashMap<>();
if (null != clazz) {
fastMethods = getClassSetFastMethodMap(clazz);
}
ResultSetMetaData resultSetMetaData = resultSet.getMetaData();
for (int i = 1; i < resultSetMetaData.getColumnCount() + 1; i++) {
String columnName = resultSetMetaData.getColumnName(i);
if (null != clazz) {
if (fastMethods.containsKey(columnName)) {
fieldNameIndexMap.put(columnName, i);
}
}
else {
fieldNameIndexMap.put(columnName, i);
}
}
while (resultSet.next()) {
if (null != clazz) {
T t = (T) enhancer.create();
for (String field : fieldNameIndexMap.keySet()) {
FastMethod fastMethod = fastMethods.get(field);
if (fastMethod != null) {
try {
fastMethod.invoke(t, new Object[]{resultSet.getObject(fieldNameIndexMap.get(field))});
} catch (InvocationTargetException e) {
log.error("ClickhouseBaseDao searchListBySql column:[{}] fastMethod [{}] set resultSet error {}", field, fastMethod.getName(),ExceptionUtils.getRootCause(e));
throw new RuntimeException(e);
}
}
}
rows.add(t);
} else {
if (formatJson) {
Map<String, Object> map = new HashMap<>();
for (String field : fieldNameIndexMap.keySet()) {
map.put(field, resultSet.getObject(fieldNameIndexMap.get(field)));
}
rows.add(map);
} else {
List<Object> list = new ArrayList<>();
for (int i = 1; i <= fieldNameIndexMap.size(); i++) {
list.add(resultSet.getObject(i));
}
rows.add(list);
}
}
}
return rows.build();
}
private static Map<String, Map<String, FastMethod>> classSetFastMethodMap = new HashMap<String, Map<String, FastMethod>>();
public static Map<String, FastMethod> getClassSetFastMethodMap(Class clazz) {
String className = clazz.getName();
FastClass fastClass = FastClass.create(clazz);
Map<String, FastMethod> setFastMethodMap;
if (classSetFastMethodMap.containsKey(className)) {
setFastMethodMap = classSetFastMethodMap.get(className);
} else {
setFastMethodMap = new HashMap<String, FastMethod>();
List<Field> fields = GenericsUtils.getAllFields(clazz);
if (CollectionUtils.isNotEmpty(fields)) {
for (int i = 0, length = fields.size(); i < length; i++) {
java.lang.reflect.Field field = fields.get(i);
if (!field.isAnnotationPresent(Transient.class)) {
String fieldName = field.getName();
setFastMethodMap.put(fieldName, fastClass.getMethod(String.format("set%s", StringUtils.capitalize(fieldName)),
new Class[]{field.getType()}));
}
}
}
classSetFastMethodMap.put(className, setFastMethodMap);
}
return setFastMethodMap;
}
}
public class MockData {
public static List<LinkedHashMap<String, Object>> mockData(LinkedHashMap<String, String> paramType){
List<LinkedHashMap<String,Object>> result = Lists.newArrayList();
for (int i = 0; i < 5000; i++) {
LinkedHashMap<String, Object> stringObjectMap = mockOneData(paramType);
result.add(stringObjectMap);
}
return result;
}
private static LinkedHashMap<String,Object> mockOneData(LinkedHashMap<String, String> paramType){
LinkedHashMap<String,Object> data = Maps.newLinkedHashMap();
for (String name : paramType.keySet()){
String ckType = paramType.get(name);
CkTypeEnum anEnum = CkTypeEnum.getEnum(ckType);
Object o = null;
if(anEnum != null){
o = anEnum.mockData();
}
data.put(name,o);
}
return data;
}
}
public enum CkTypeEnum {
Int64{
@Override
Object mockData() {
return randomDate();
}
},Int32{
@Override
Object mockData() {
return (int)((Math.random()*9+1) *10000);
}
},String{
@Override
Object mockData() {
java.lang.String s = params.get(random.nextInt(params.size()));
s = "Date : [" + DateFormatUtils.format(new Date(),"yyyy-MM-dd HH:mm:ss") +"]_" + s + counter.getAndIncrement();
return s;
}
},Float64{
@Override
Object mockData() {
float min = 100f;
float max = 100000f;
return min + new Random().nextFloat() * (max - min);
}
},Boolean{
@Override
Object mockData() {
return random.nextBoolean();
}
};
Boolean nullable;
Random random = new Random();
abstract Object mockData();
public static CkTypeEnum getEnum(String ckType){
String pre = "Nullable(";
boolean nullable = true;
if(ckType.startsWith(pre)){
nullable = false;
ckType = StringUtils.substring(ckType,pre.length(),ckType.length()-1);
}
CkTypeEnum ckTypeEnum = Enums.getIfPresent(CkTypeEnum.class,ckType).orNull();
if(ckTypeEnum == null){
return null;
}
ckTypeEnum.nullable = nullable;
return ckTypeEnum;
}
public static List<String> params = ConfigUtils.getConfig().getStringList("param.string");
public static AtomicLong counter = new AtomicLong();
private static long randomDate(){
try {
Date end = new Date();
Date start = DateUtils.addDays(end,-1);
if(start.getTime() >= end.getTime()){
return 0L;
}
long date = random(start.getTime(),end.getTime());
return new Date(date).getTime();
} catch (Exception e) {
e.printStackTrace();
}
return 0L;
}
private static long random(long begin,long end) {
long rtn = begin + (long) (Math.random() * (end - begin));
if (rtn == begin || rtn == end) {
return random(begin, end);
}
return rtn;
}
}
@Data
public class CkColumn {
private String name;
private String type;
}
3、向多个节点写入,使用线程池,每个节点对应一个线程写入。
线程内部逻辑
@Slf4j
public class CkInsertData extends Thread {
private CountDownLatch latch;
private int threadNum;
public CkInsertData(CountDownLatch latch, int threadNum) {
this.latch = latch;
this.threadNum = threadNum;
}
@Override
public void run() {
try {
String shardName = "shard_" + threadNum;
DataSource dataSource = ClickhouseDataSource.getDataSource().get(threadNum-1);
String[] split = ConfigUtils.getConfig().getString("ck.tables").split(",");
ClickhouseDao clickhouseDao = new ClickhouseDao(dataSource);
for (int i = 0; i < 10 * 20; i++) {
for (String name : split) {
String dataBasesName = shardName;
String tableName = name.replaceAll(" ", "");
clickhouseDao.batchInsert(dataBasesName, tableName);
}
}
}finally {
latch.countDown();
}
}
}
主逻辑,启动逻辑
ublic class Test {
private static ExecutorService es = Executors.newFixedThreadPool(6);
public static void main(String[] args) throws InterruptedException {
CountDownLatch latch = new CountDownLatch(6);
for (int i = 1; i <= 6; i++) {
es.execute(new CkInsertData(latch,i));
}
latch.await();
System.out.println("完成!");
}
}