Hbase Filter 总结1

本文链接： https://blog.csdn.net/weixin_44996457/article/details/102779344
Hbase过滤器
HBase的filter有四种比较器：
（1）二进制比较器：如’binary:abc’，按字典排序跟’abc’进行比较
（2）二进制前缀比较器：如’binaryprefix:abc’，按字典顺序只跟’abc’比较前3个字符
（3）正则表达式比较器：如’regexstring:ab*yz’，按正则表达式匹配以ab开头，以yz结尾的值。这个比较器只能使用=、!=两个比较运算符。
（4）子串比较器：如’substring:abc123’，匹配以abc123开头的值。这个比较顺也只能使用=、!=两个比较运算符。
一、ColumnPrefixFilter：列前缀过滤器
该Filter是按照列名的前缀来扫描单元格的，只会返回符合条件的列数据
1、echo "sacn  'AMR:DWD_AMR_JB_METER-E-CURVE_201905',
LIMIT=>10,FILTER=>\"ColumnPrefixFilter('08')\""|hbase shell >a.txt
//ColumnPrefixFilter例子
private static void scanFilter06() throws IOException,
        UnsupportedEncodingException {
    Configuration conf = HBaseConfiguration.create();
    conf.set("hbase.rootdir", "hdfs://ncst:9000/hbase");
    conf.set("hbase.zookeeper.quorum", "ncst");
    HTable ht = new HTable(conf, "users");
    
    //匹配 以'ag'开头的所有的列
    ColumnPrefixFilter columnPrefixFilter = new ColumnPrefixFilter("ag".getBytes());
            
    Scan scan = new Scan();
    scan.setFilter(columnPrefixFilter);
    ResultScanner rs = ht.getScanner(scan);
    for(Result result : rs){
        for(Cell cell : result.rawCells()){
            System.out.println(new String(CellUtil.cloneRow(cell))+"\t"
                    +new String(CellUtil.cloneFamily(cell))+"\t"
                    +new String(CellUtil.cloneQualifier(cell))+"\t"
                    +new String(CellUtil.cloneValue(cell),"UTF-8")+"\t"
                    +cell.getTimestamp());
        }
    }
    ht.close();
}
                                                                                                       
二、TimestampsFilter：时间戳过滤器
说明：该过滤器允许针对返回给客户端的时间版本进行更细粒度的控制，使用的时候，可以提供一个返回的时间戳的列表，只有与时间戳匹配的单元才可以返回。当做多行扫描或者是单行检索时，如果需要一个时间区间，可以在Get或Scan对象上使用setTimeRange()方法来实现这一点。
1、echo "scan  'AMR:DWD_AMR_JB_METER-E-CURVE_201905',
FILTER=>\"TimestampsFilter(1448069941270,1548069941230)\ ""| hbase shell >a.txt
private static void scanFilter10() throws IOException,
        UnsupportedEncodingException {
    Configuration conf = HBaseConfiguration.create();
    conf.set("hbase.rootdir", "hdfs://ncst:9000/hbase");
    conf.set("hbase.zookeeper.quorum", "ncst");
    HTable ht = new HTable(conf, "users");
List<Long> timestamps = new ArrayList<Long>();  
timestamps.add(1479788961691L);  
timestamps.add(1479788676517L);  
timestamps.add(1479788812565L);  
Filter filter = new TimestampsFilter(timestamps);
Scan scan = new Scan();
    scan.setStartRow("xiaoming".getBytes());
    scan.setFilter(filter);
    ResultScanner rs = ht.getScanner(scan);
    for(Result result : rs){
        for(Cell cell : result.rawCells()){
            System.out.println(new String(CellUtil.cloneRow(cell))+"\t"
                    +new String(CellUtil.cloneFamily(cell))+"\t"
                    +new String(CellUtil.cloneQualifier(cell))+"\t"
                    +new String(CellUtil.cloneValue(cell),"UTF-8")+"\t"
                    +cell.getTimestamp());
        }
    }
    ht.close();
}

                                                                                                     
三、PageFilter：分页过滤器Shell不支持?
//PageFilter例子
private static void scanFilter10() throws IOException,
        UnsupportedEncodingException {
    Configuration conf = HBaseConfiguration.create();
    conf.set("hbase.rootdir", "hdfs://ncst:9000/hbase");
    conf.set("hbase.zookeeper.quorum", "ncst");
    HTable ht = new HTable(conf, "users");

    //从RowKey为 "lisi" 开始，取3行(包含lisi)    
    PageFilter pageFilter = new PageFilter(3L);
    Scan scan = new Scan();
    scan.setStartRow("xiaoming".getBytes());
    scan.setFilter(pageFilter);
    ResultScanner rs = ht.getScanner(scan);
    for(Result result : rs){
        for(Cell cell : result.rawCells()){
            System.out.println(new String(CellUtil.cloneRow(cell))+"\t"
                    +new String(CellUtil.cloneFamily(cell))+"\t"
                    +new String(CellUtil.cloneQualifier(cell))+"\t"
                    +new String(CellUtil.cloneValue(cell),"UTF-8")+"\t"
                    +cell.getTimestamp());
        }
    }
    ht.close();
}
                                                                                                            
四、MultipleColumnPrefixFilter：复合列前缀过滤器
/匹配 以'a'或者'c'开头 所有的列{二维数组}
1、	scan 'users',{FILTER=>"MultipleColumnPrefixFilter('a','c')"}
//MultipleColumnPrefixFilter例子
private static void scanFilter07() throws IOException,
        UnsupportedEncodingException {
    Configuration conf = HBaseConfiguration.create();
    conf.set("hbase.rootdir", "hdfs://ncst:9000/hbase");
    conf.set("hbase.zookeeper.quorum", "ncst");
    HTable ht = new HTable(conf, "users");

    //匹配 以'a'或者'c'开头 所有的列{二维数组}
    byte[][] prefixes =new byte[][]{"a".getBytes(), "c".getBytes()};        
     MultipleColumnPrefixFilter multipleColumnPrefixFilter = new MultipleColumnPrefixFilter(prefixes );

    Scan scan = new Scan();
    scan.setFilter(multipleColumnPrefixFilter);
    ResultScanner rs = ht.getScanner(scan);
    for(Result result : rs){
        for(Cell cell : result.rawCells()){
            System.out.println(new String(CellUtil.cloneRow(cell))+"\t"
                    +new String(CellUtil.cloneFamily(cell))+"\t"
                    +new String(CellUtil.cloneQualifier(cell))+"\t"
                    +new String(CellUtil.cloneValue(cell),"UTF-8")+"\t"
                    +cell.getTimestamp());
        }
    }
    ht.close();
}                                                                                              
五、FamilyFilter：列簇过滤器
1、scan 'users',{FILTER=>"FamilyFilter(=,'binaryprefix:add')"}
 //基于列族过滤数据的FamilyFilter
private static void scanFilter04() throws IOException,
        UnsupportedEncodingException {
    Configuration conf = HBaseConfiguration.create();
    conf.set("hbase.rootdir", "hdfs://ncst:9000/hbase");
    conf.set("hbase.zookeeper.quorum", "ncst");
    HTable ht = new HTable(conf, "users");

    //过滤 = 'address'的列族
    //FamilyFilter familyFilter = new FamilyFilter(CompareOp.EQUAL, new BinaryComparator("address".getBytes()));
    
    //过滤以'add'开头的列族
    FamilyFilter familyFilter = new FamilyFilter(CompareOp.EQUAL, new BinaryPrefixComparator("add".getBytes()));
    
    Scan scan = new Scan();
    scan.setFilter(familyFilter);
    ResultScanner rs = ht.getScanner(scan);
    for(Result result : rs){
        for(Cell cell : result.rawCells()){
            System.out.println(new String(CellUtil.cloneRow(cell))+"\t"
                    +new String(CellUtil.cloneFamily(cell))+"\t"
                    +new String(CellUtil.cloneQualifier(cell))+"\t"
                    +new String(CellUtil.cloneValue(cell),"UTF-8")+"\t"
                    +cell.getTimestamp());
        }
    }
    ht.close();
}                                                                                                           
六、ColumnPaginationFilter
public void ColumnPaginationFilter(String tableName) throws Exception {
		Table table = conn.getTable(TableName.valueOf(tableName));
		Scan scan = new Scan();
		Filter filter = new ColumnPaginationFilter(2,1);
		scan.setFilter(filter);
//		用addFamily增加列族后，会只返回指定列族的数据
		scan.addFamily(Bytes.toBytes("course"));
		ResultScanner scanner = table.getScanner(scan);
		for (Result r : scanner) {
			for (Cell cell : r.rawCells()) {
				System.out.println(
						"Rowkey-->"+Bytes.toString(r.getRow())+"  "+
						"Familiy:Quilifier-->"+Bytes.toString(CellUtil.cloneQualifier(cell))+"  "+
						"Value-->"+Bytes.toString(CellUtil.cloneValue(cell)));
			}
		}
	}

                                                                                                   
七、SingleColumnValueFilter
说明：根据列的值来决定这一行数据是否返回，落脚点在行，而不是列。我们可以设置filter.setFilterIfMissing(true);如果为true，当这一列不存在时，不会返回，如果为false，当这一列不存在时，会返回所有的列信息。
1、scan 'users',{FILTER=>"SingleColumnValueFilter('info','age',=,'substring:4')"}
private static void scanFilter03() throws IOException,
        UnsupportedEncodingException {
    Configuration conf = HBaseConfiguration.create();
    conf.set("hbase.rootdir", "hdfs://ncst:9000/hbase");
    conf.set("hbase.zookeeper.quorum", "ncst");
    HTable ht = new HTable(conf, "users");
    
    //检测一个子串是否存在于值中(大小写不敏感) -- SubstringComparator
    //过滤age值中包含'4'的RowKey
    SubstringComparator comparator = new SubstringComparator("4");
    //第四个参数不一样
    SingleColumnValueFilter scvf = new SingleColumnValueFilter("info".getBytes(), "age".getBytes(), CompareOp.EQUAL, comparator);
    Scan scan = new Scan();
    scan.setFilter(scvf);
    ResultScanner rs = ht.getScanner(scan);
    for(Result result : rs){
        for(Cell cell : result.rawCells()){
            System.out.println(new String(CellUtil.cloneRow(cell))+"\t"
                    +new String(CellUtil.cloneFamily(cell))+"\t"
                    +new String(CellUtil.cloneQualifier(cell))+"\t"
                    +new String(CellUtil.cloneValue(cell),"UTF-8")+"\t"
                    +cell.getTimestamp());
        }
    }
    ht.close();
}
                                                                                               
八、RowFilter：行健过滤器
说明：筛选出匹配的所有的行，支持基于行键过滤数据，可以执行精确匹配，子字符串匹配或正则表达式匹配，过滤掉不匹配的数据。
1、scan 'testByCrq', FILTER=>"RowFilter(=,'substring:111')"
如上命令所示，查询的是表名为testByCrq，过滤方式是通过rowkey过滤，匹配出rowkey含111的数据。
2、echo "sacn 'AMR:DWD_AMR_JB_METER-E-CURVE_201905',
LIMIT=>10,FILTER=>\"RowFilter(=,regexstring:A*B')\""|hbase shell >a.txt
//RowFilter例子
private static void scanFilter09() throws IOException,
        UnsupportedEncodingException {
    Configuration conf = HBaseConfiguration.create();
    conf.set("hbase.rootdir", "hdfs://ncst:9000/hbase");
    conf.set("hbase.zookeeper.quorum", "ncst");
    HTable ht = new HTable(conf, "users");

    //匹配 行键包含'CD' 所有的行    
    RowFilter rowFilter = new RowFilter(CompareOp.EQUAL, new SubstringComparator("CD"));
    
    Scan scan = new Scan();
    scan.setFilter(rowFilter);
    ResultScanner rs = ht.getScanner(scan);
    for(Result result : rs){
        for(Cell cell : result.rawCells()){
            System.out.println(new String(CellUtil.cloneRow(cell))+"\t"
                    +new String(CellUtil.cloneFamily(cell))+"\t"
                    +new String(CellUtil.cloneQualifier(cell))+"\t"
                    +new String(CellUtil.cloneValue(cell),"UTF-8")+"\t"
                    +cell.getTimestamp());
        }
    }
    ht.close();
}
                                                                                                                
九、QualifierFilter：列过滤器
说明：该Filter是一种类似RowFilter的比较过滤器，不同之处是它用来匹配列限定符而不是行健，对列的名称进行过滤，而不是列的值。
1、echo "sacn 'AMR:DWD_AMR_JB_METER-E-CURVE_201905',
LIMIT=>10,FILTER=>\" QualifierFilter (=,regexstring:name')\""|hbase shell >a.txt
//基于Qualifier(列名)过滤数据的QualifierFilter
private static void scanFilter05() throws IOException,
        UnsupportedEncodingException {
    Configuration conf = HBaseConfiguration.create();
    conf.set("hbase.rootdir", "hdfs://ncst:9000/hbase");
    conf.set("hbase.zookeeper.quorum", "ncst");
    HTable ht = new HTable(conf, "users");
    
    //过滤列名 = 'age'所有RowKey
    //QualifierFilter qualifierFilter = new QualifierFilter(CompareOp.EQUAL, new BinaryComparator("age".getBytes()));
    
    //过滤列名  以'age'开头 所有RowKey(包含age)
    //QualifierFilter qualifierFilter = new QualifierFilter(CompareOp.EQUAL, new BinaryPrefixComparator("age".getBytes()));
    
    //过滤列名  包含'age' 所有RowKey(包含age)
    //QualifierFilter qualifierFilter = new QualifierFilter(CompareOp.EQUAL, new SubstringComparator("age"));
    
    //过滤列名  符合'.ge'正则表达式 所有RowKey
    QualifierFilter qualifierFilter = new QualifierFilter(CompareOp.EQUAL, new RegexStringComparator(".ge"));
    
    Scan scan = new Scan();
    scan.setFilter(qualifierFilter);
    ResultScanner rs = ht.getScanner(scan);
    for(Result result : rs){
        for(Cell cell : result.rawCells()){
            System.out.println(new String(CellUtil.cloneRow(cell))+"\t"
                    +new String(CellUtil.cloneFamily(cell))+"\t"
                    +new String(CellUtil.cloneQualifier(cell))+"\t"
                    +new String(CellUtil.cloneValue(cell),"UTF-8")+"\t"
                    +cell.getTimestamp());
        }
    }
    ht.close();
}
                                                                                                          
十、ColumnRangeFilter
//匹配 以'a'开头到以'c'开头(不包含c) 所有的列  
1、scan 'users',{FILTER=>"ColumnRangeFilter('a',true,'c',false)"}
//ColumnRangeFilter例子
private static void scanFilter08() throws IOException,
UnsupportedEncodingException {
    Configuration conf = HBaseConfiguration.create();
    conf.set("hbase.rootdir", "hdfs://ncst:9000/hbase");
    conf.set("hbase.zookeeper.quorum", "ncst");
    HTable ht = new HTable(conf, "users");

    //匹配 以'a'开头到以'c'开头(不包含c) 所有的列    
    ColumnRangeFilter columnRangeFilter = new ColumnRangeFilter("a".getBytes(), true, "c".getBytes(), false);

    Scan scan = new Scan();
    scan.setFilter(columnRangeFilter);
    ResultScanner rs = ht.getScanner(scan);
    for(Result result : rs){
        for(Cell cell : result.rawCells()){
            System.out.println(new String(CellUtil.cloneRow(cell))+"\t"
                    +new String(CellUtil.cloneFamily(cell))+"\t"
                    +new String(CellUtil.cloneQualifier(cell))+"\t"
                    +new String(CellUtil.cloneValue(cell),"UTF-8")+"\t"
                    +cell.getTimestamp());
        }
    }
    ht.close();
}
猜你喜欢