大数据性能调优之HBase的RowKey设计
http://www.open-open.com/lib/view/open1417612091323.html
使用HBASE用到的几个filter SingleColumnValueFilter,RowFilter,ValueFilter
http://www.tuicool.com/articles/2eiqMfz
SingleColumnValueFilter
过滤某列值大于多少小于多少
List<Filter> filters = new ArrayList<Filter>(); filters.add( new SingleColumnValueFilter(Bytes.toBytes("pinfo"), //列族 Bytes.toBytes("t"), //列名 CompareOp.GREATER,Bytes.toBytes("1359901"]) ) //值 filters.add( new SingleColumnValueFilter(Bytes.toBytes("pinfo"), Bytes.toBytes("t"), CompareOp.LESS,Bytes.toBytes("1389901"]) ) FilterList filterList1 = new FilterList(Operator.MUST_PASS_ALL,filters); sn.setFilter(filterList1);
1、初始化Hbase
private Configuration conf = null; HTable table = null; private String tableName; private static final Logger LOG = LoggerFactory.getLogger(HbaseUtils.class); public HbaseUtils(PropertiesType propertiesType, String tableName) { conf = HBaseConfiguration.create(); PropertiesUtil properties = new PropertiesUtil(propertiesType.getValue()); conf.set("hbase.zookeeper.quorum", properties.getValue("hbase.zookeeper.quorum")); conf.set("hbase.zookeeper.property.clientPort", properties.getValue("hbase.zookeeper.property.clientPort")); try { table = new HTable(conf, Bytes.toBytes(tableName)); } catch (IOException e) { LOG.error(e.getMessage()); } this.tableName = tableName; }
2、在指定的条件下,按某一字段的所有结果
/** * 在指定的条件下,按某一字段聚合 * @param paramMap 参数条件 * @param dimensionColumns 维度 * @param aggregateColumn 聚合字段 * @return 返回map,key 为dimensionColumns 维度相对应的数据,value 为aggregateColumn 字段对应的值 */ public Map<String, Long> aggregateBySingleColumn(Map<String, String> paramMap, String[] dimensionColumns, String aggregateColumn) { if (dimensionColumns == null || dimensionColumns.length == 0 || paramMap == null || aggregateColumn == null || aggregateColumn.equals("")) { return null; } Map<String, Long> map = null; try { FilterList filterList = new FilterList(); Scan scan = new Scan(); //添加过滤条件 for (String paramKey : paramMap.keySet()) { SingleColumnValueFilter filter = new SingleColumnValueFilter(Bytes.toBytes(paramKey), Bytes.toBytes(paramKey), CompareOp.EQUAL, new SubstringComparator(paramMap.get(paramKey))); filterList.addFilter(filter); } scan.setFilter(filterList); //要展现的列 for (String column : dimensionColumns) { scan.addColumn(Bytes.toBytes(column), Bytes.toBytes(column)); } scan.addColumn(Bytes.toBytes(aggregateColumn), Bytes.toBytes(aggregateColumn)); ResultScanner results = table.getScanner(scan); //将查询结果放入map 中 map = new ConcurrentHashMap<String, Long>(); for (Result result : results) { // String dimensionKey = ""; StringBuilder dimensionKey = new StringBuilder(); //取值 String value = new String(result.getValue(Bytes.toBytes(aggregateColumn), Bytes.toBytes(aggregateColumn))); Long aggregateValue = value == null? 0 : Long.parseLong(value); //拼接Key for (String column : dimensionColumns) { dimensionKey.append("\t" + new String(result.getValue(Bytes.toBytes(column), Bytes.toBytes(column)))); } dimensionKey = dimensionKey.deleteCharAt(0); if(map.containsKey(dimensionKey)) { map.put(dimensionKey.toString(), map.get(dimensionKey.toString()) + aggregateValue); } else { map.put(dimensionKey.toString(), aggregateValue); } } } catch (Exception e) { LOG.error(e.getMessage()); } return map; }
2、调用例子 对获取结果进一步 按照自己的维度计算加和
Map<String, String> paramMap = new HashMap<String, String>(); paramMap.put("stat_date", statDate); //获取当天的汇总数据 Map<String, Long> resultMap = new HashMap<String, Long>(); synchronized (this.db) { resultMap = this.db.aggregateBySingleColumn(paramMap, columns, "pv"); } this.finalMap.clear(); if(null != resultMap && resultMap.size() > 0){ for(String key : resultMap.keySet()) { String[] arr = key.split("\t"); if(null != arr && null != columns && arr.length == columns.length) { String appId = arr[1]; String appVersion = arr[2]; String eventKey = arr[3]; Long pv = resultMap.get(key); pv = pv == null? 0 : pv; StringBuilder buf = new StringBuilder(); buf.append(appId).append("\t").append(appVersion).append("\t").append(eventKey); if(this.finalMap.containsKey(buf.toString())) { this.finalMap.put(buf.toString(), this.finalMap.get(buf.toString()) + pv); } else { this.finalMap.put(buf.toString(), pv); } } } } if(null!=this.finalMap && this.finalMap.size() > 0){ for(String key : this.finalMap.keySet()) { String[] arr = key.split("\t"); if(null!=arr && arr.length >=3){ String appId = arr[0]; String appVersion = arr[1]; String eventKey = arr[2]; Long pv = this.finalMap.get(key); if(pv > 0) { String updateSql = String.format(UPDATE_QUERY_SQL, pv, statDate, appId, appVersion, eventKey); this.sqlList.add(updateSql); LOG.info("execute:execute mysql updateSql sql: " + updateSql); } if(null!=this.sqlList&&this.sqlList.size() > 100){ for(String sql : sqlList){ if(StringUtils.isNotBlank(sql)){ synchronized (this.stat_db) { LOG.info("execute:execute mysql updateSql sql: " + sql); this.stat_db.execute(sql); } } } this.sqlList.clear(); } } } if(null!=this.sqlList&&this.sqlList.size() > 0){ for(String sql : sqlList){ if(StringUtils.isNotBlank(sql)){ synchronized (this.stat_db) { LOG.info("execute:execute mysql updateSql sql: " + sql); this.stat_db.execute(sql); } } } this.sqlList.clear(); } } this.finalMap.clear();