Hbase2.0.2之scala API

import java.util
import java.util.{ArrayList, HashMap, List, Map}
import org.apache.hadoop.hbase.{Cell, CompareOperator, HBaseConfiguration, TableName}
import org.apache.hadoop.hbase.client._
import org.apache.hadoop.hbase.filter._
import org.apache.hadoop.hbase.util.Bytes
import org.apache.spark.SparkConf
import scala.collection.JavaConversions._
object HbaseScala {

  def main(args: Array[String]): Unit = {
     System.setProperty("hadoop.home.dir", "D:\\hadoop-common-bin-2.7.x")
     val sparkConf = new SparkConf().setAppName("HBaseTest")
     val conf = HBaseConfiguration.create
     val tableName:TableName =TableName.valueOf("blog_scala")
     conf.set("hbase.zookeeper.quorum", "hadoop01,hadoop02,hadoop03")
     conf.set("hbase.zookeeper.property.clientPort", "2181")


      //Connection 是操作hbase的入口
      val connection= ConnectionFactory.createConnection(conf)
      //创建表测试(单列簇)
//      createHTable(connection, tableName,"area")

      //创建表(多列簇)
      val columnFamilys: Array[String] = Array("article", "author")
//      createHTable(connection, tableName,columnFamilys)

      val listMap: List[Map[String, AnyRef]] = new ArrayList[Map[String, AnyRef]]
      val map1: Map[String, AnyRef] = new HashMap[String, AnyRef]
      map1.put("rowKey", "ce_shi1")
      map1.put("columnFamily", "article")
      map1.put("columnName", "title")
      map1.put("columnValue", "Head First HBase")
      listMap.add(map1)
      val map2: Map[String, AnyRef] = new HashMap[String, AnyRef]
      map2.put("rowKey", "ce_shi1")
      map2.put("columnFamily", "article")
      map2.put("columnName", "content")
      map2.put("columnValue", "HBase is the Hadoop database")
      listMap.add(map2)
      val map3: Map[String, AnyRef] = new HashMap[String, AnyRef]
      map3.put("rowKey", "ce_shi1")
      map3.put("columnFamily", "article")
      map3.put("columnName", "tag")
      map3.put("columnValue", "Hadoop,HBase,NoSQL")
      listMap.add(map3)
      val map4: Map[String, AnyRef] = new HashMap[String, AnyRef]
      map4.put("rowKey", "ce_shi1")
      map4.put("columnFamily", "author")
      map4.put("columnName", "name")
      map4.put("columnValue", "nicholas")
      listMap.add(map4)
      val map5: Map[String, AnyRef] = new HashMap[String, AnyRef]
      map5.put("rowKey", "ce_shi1")
      map5.put("columnFamily", "author")
      map5.put("columnName", "nickname")
      map5.put("columnValue", "lee")
      listMap.add(map5)
      val map6: Map[String, AnyRef] = new HashMap[String, AnyRef]
      map6.put("rowKey", "ce_shi2")
      map6.put("columnFamily", "author")
      map6.put("columnName", "name")
      map6.put("columnValue", "spark")
      listMap.add(map6)
      val map7: Map[String, AnyRef] = new HashMap[String, AnyRef]
      map7.put("rowKey", "ce_shi2")
      map7.put("columnFamily", "author")
      map7.put("columnName", "nickname")
      map7.put("columnValue", "hadoop")
      listMap.add(map7)
//      insertMany(connection, tableName,listMap);
//      insertMany(connection,tableName)

      //添加单行数据
//      insertSingle(connection,tableName)

      //根据RowKey,列簇,列名修改值
      val rowKey = "ce_shi2"
      val columnFamily = "author"
      val columnName = "name"
      val columnValue = "hbase"
//      updateData(connection,tableName,rowKey,columnFamily,columnName,columnValue);

      val rowKey1 = "rowKey5"
      val columnFamily1 = "author"
      val columnName1 = "tag"
      val columnNames = new util.ArrayList[String]
      columnNames.add("name")
      columnNames.add("nickname")
      //删除某行某个列簇的某个列
//      deleteData(connection,tableName,rowKey1,columnFamily1,columnName1);
      //删除某行某个列簇
//      deleteData(connection,tableName,rowKey1,columnFamily1);
      //删除某行某个列簇的多个列
//      deleteData(connection,tableName,rowKey1,columnFamily1,columnNames);
      //删除某行
      deleteData(connection,tableName,rowKey1);

    //根据RowKey获取数据
//      getResult(connection,tableName,"rowKey5")

      //全表扫描
      scanTable(connection,tableName)

      //rowKey过滤器
//      rowkeyFilter(connection,tableName)

      //列值过滤器
//      singColumnFilter(connection,tableName)

      //列名前缀过滤器
//      columnPrefixFilter(connection,tableName)

      //过滤器集合
//      filterSet(connection,tableName)
  }

  /**
    * 创建表(只有一个列簇)
    * @param connection
    * @param tableName
    */
  def createHTable(connection: Connection,tableName: TableName,columnFamily:String): Unit=
  {
    val admin = connection.getAdmin
    if (!admin.tableExists(tableName)) {
      //表描述器构造器
      val tdb = TableDescriptorBuilder.newBuilder(tableName)
      //列族描述起构造器
      val cdb:ColumnFamilyDescriptorBuilder = ColumnFamilyDescriptorBuilder.newBuilder(Bytes.toBytes(columnFamily))
      //获得列描述起
      val cfd: ColumnFamilyDescriptor = cdb.build
      //添加列族
      tdb.setColumnFamily(cfd)
      //获得表描述器
      val td = tdb.build
      //创建表
      admin.createTable(td)
      println("create done.")
    }else{
      print("表已存在")
    }

  }

  /**
    * 创建表(多列簇)
    * @param connection
    * @param tableName
    * @param columnFamilys
    */
  def createHTable(connection: Connection,tableName: TableName, columnFamilys: Array[String]): Unit ={
    val admin=connection.getAdmin
    if(!admin.tableExists(tableName)){
      //表描述器构造器
      val tdb = TableDescriptorBuilder.newBuilder(tableName)
      var cdb:ColumnFamilyDescriptorBuilder=null
      //获得列描述起
      var cfd: ColumnFamilyDescriptor=null
      for (columnFamily <- columnFamilys) {
        //列族描述起构造器
         cdb = ColumnFamilyDescriptorBuilder.newBuilder(Bytes.toBytes(columnFamily))
        //获得列描述起
          cfd = cdb.build
        //添加列族
        tdb.setColumnFamily(cfd)
      }
      //获得表描述器
      val td = tdb.build
      //创建表
      admin.createTable(td)
      println("create done.")
    }
  }

  /**
    * 添加数据(一个rowKey,一个列簇)
    * @param connection
    * @param tableName
    */
  def insertSingle(connection: Connection,tableName: TableName): Unit = {
    val table = connection.getTable(tableName)
    val put = new Put(Bytes.toBytes("rowKey5"))
    put.addColumn(Bytes.toBytes("author"), Bytes.toBytes("name"), Bytes.toBytes("hbase"))
    put.addColumn(Bytes.toBytes("author"), Bytes.toBytes("nickname"), Bytes.toBytes("hadoop"))
    put.addColumn(Bytes.toBytes("article"), Bytes.toBytes("tag"), Bytes.toBytes("sqoop"))
    put.addColumn(Bytes.toBytes("article"), Bytes.toBytes("content"), Bytes.toBytes("flume"))
    put.addColumn(Bytes.toBytes("article"), Bytes.toBytes("title"), Bytes.toBytes("hive"))
    table.put(put)
    table.close()
  }
  /**
    * 添加数据(多个rowKey,多个列簇,适合由固定结构的数据)
    * @param connection
    * @param tableName
    * @param list
    */
  def insertMany(connection: Connection,tableName: TableName, list: List[Map[String, AnyRef]]): Unit = {
    val puts: ArrayList[Put] = new ArrayList[Put]
    val table: Table = connection.getTable(tableName)
    if (list != null && list.size > 0) {
      for (map <- list) {
        val put: Put = new Put(Bytes.toBytes(map.get("rowKey").toString))
        put.addColumn(Bytes.toBytes(map.get("columnFamily").toString), Bytes.toBytes(map.get("columnName").toString), Bytes.toBytes(map.get("columnValue").toString))
        puts.add(put)
      }
    }
    table.put(puts)
    table.close()
    System.out.println("add data Success!")
  }

  /**
    * 添加数据(多个rowKey,多个列簇)
    * @param connection
    * @param tableName
    */
  def insertMany(connection: Connection,tableName: TableName): Unit = {
    val table = connection.getTable(tableName)
    val puts = new ArrayList[Put]
    val put1 = new Put(Bytes.toBytes("rowKey1"))
    put1.addColumn(Bytes.toBytes("author"), Bytes.toBytes("nickname"), Bytes.toBytes("bigData"))
    val put2 = new Put(Bytes.toBytes("rowKey2"))
    put2.addColumn(Bytes.toBytes("author"), Bytes.toBytes("name"), Bytes.toBytes("spark"))
    val put3 = new Put(Bytes.toBytes("rowKey3"))
    put3.addColumn(Bytes.toBytes("article"), Bytes.toBytes("title"), Bytes.toBytes("HBase,Hive"))
    val put4 = new Put(Bytes.toBytes("rowKey4"))
    put4.addColumn(Bytes.toBytes("article"), Bytes.toBytes("content"), Bytes.toBytes("HBase"))
    puts.add(put1)
    puts.add(put2)
    puts.add(put3)
    puts.add(put4)
    table.put(puts)
    table.close()
  }

  /**
    * 根据RowKey,列簇,列名修改值
    * @param connection
    * @param tableName
    * @param rowKey
    * @param columnFamily
    * @param columnName
    * @param columnValue
    */
  def updateData(connection: Connection,tableName: TableName, rowKey: String, columnFamily: String, columnName: String, columnValue: String): Unit = {
    val table = connection.getTable(tableName)
    val put = new Put(Bytes.toBytes(rowKey))
    put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes(columnName), Bytes.toBytes(columnValue))
    table.put(put)
    table.close()
  }

  /**
    * 根据rowKey删除一行数据
    *
    * @param tableName
    * @param rowKey
    */
  def deleteData(connection: Connection,tableName: TableName, rowKey: String): Unit = {
    val table = connection.getTable(tableName)
    val delete = new Delete(Bytes.toBytes(rowKey))
    table.delete(delete)
    table.close()
  }

  /**
    * 删除某一行的某一个列簇内容
    *
    * @param tableName
    * @param rowKey
    * @param columnFamily
    */
  def deleteData(connection: Connection,tableName: TableName, rowKey: String, columnFamily: String): Unit = {
    val table = connection.getTable(tableName)
    val delete = new Delete(Bytes.toBytes(rowKey))
    delete.addFamily(Bytes.toBytes(columnFamily))
    table.delete(delete)
    table.close()
  }

  /**
    * 删除某一行某个列簇某列的值
    *
    * @param tableName
    * @param rowKey
    * @param columnFamily
    * @param columnName
    */
  def deleteData(connection: Connection,tableName: TableName, rowKey: String, columnFamily: String, columnName: String): Unit = {
    val table = connection.getTable(tableName)
    val delete = new Delete(Bytes.toBytes(rowKey))
    delete.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes(columnName))
    table.delete(delete)
    table.close()
  }

  /**
    * 删除某一行某个列簇多个列的值
    *
    * @param tableName
    * @param rowKey
    * @param columnFamily
    * @param columnNames
    */

  def deleteData(connection: Connection,tableName: TableName, rowKey: String, columnFamily: String, columnNames: util.List[String]): Unit = {
    val table = connection.getTable(tableName)
    val delete = new Delete(Bytes.toBytes(rowKey))
    import scala.collection.JavaConversions._
    for (columnName <- columnNames) {
      delete.addColumns(Bytes.toBytes(columnFamily), Bytes.toBytes(columnName))
    }
    table.delete(delete)
    table.close()
  }
  /**
    * 根据rowKey查询数据
    * @param connection
    * @param tableName
    * @param rowKey
    */
  def getResult(connection: Connection,tableName: TableName, rowKey: String): Unit = {
    val table = connection.getTable(tableName)
    //获得一行
    val get = new Get(Bytes.toBytes(rowKey))
    val set = table.get(get)
    val cells = set.rawCells
    for (cell <- cells) {
      System.out.println(Bytes.toString(cell.getQualifierArray, cell.getQualifierOffset, cell.getQualifierLength) + "::" + Bytes.toString(cell.getValueArray, cell.getValueOffset, cell.getValueLength))
    }
    table.close()
  }

  /**
    * 全表扫描
    * @param connection
    * @param tableName
    */
  def scanTable(connection: Connection,tableName: TableName): Unit = {
    val table = connection.getTable(tableName)
    val scan = new Scan
    val rsacn = table.getScanner(scan)
    import scala.collection.JavaConversions._
    for (rs <- rsacn) {
      val rowkey = Bytes.toString(rs.getRow)
      System.out.println("row key :" + rowkey)
      val cells = rs.rawCells
      for (cell <- cells) {
        System.out.println(Bytes.toString(cell.getFamilyArray, cell.getFamilyOffset, cell.getFamilyLength) + "::" + Bytes.toString(cell.getQualifierArray, cell.getQualifierOffset, cell.getQualifierLength) + "::" + Bytes.toString(cell.getValueArray, cell.getValueOffset, cell.getValueLength))
      }
      System.out.println("-----------------------------------------")
    }
  }


  //过滤器 LESS <  LESS_OR_EQUAL <=  EQUAL =   NOT_EQUAL <>  GREATER_OR_EQUAL >=   GREATER >   NO_OP 排除所有
  /**
    * rowKey过滤器
    * @param connection
    * @param tableName
    */
  def rowkeyFilter(connection: Connection,tableName: TableName): Unit = {
    val table = connection.getTable(tableName)
    val scan = new Scan
    val filter = new RowFilter(CompareOperator.EQUAL, new RegexStringComparator("Key1$")) //str$ 末尾匹配,相当于sql中的 %str  ^str开头匹配,相当于sql中的str%
    scan.setFilter(filter)
    val scanner = table.getScanner(scan)
    for (rs <- scanner) {
      val rowkey = Bytes.toString(rs.getRow)
      System.out.println("row key :" + rowkey)
      val cells = rs.rawCells
      for (cell <- cells) {
        System.out.println(Bytes.toString(cell.getFamilyArray, cell.getFamilyOffset, cell.getFamilyLength) + "::" + Bytes.toString(cell.getQualifierArray, cell.getQualifierOffset, cell.getQualifierLength) + "::" + Bytes.toString(cell.getValueArray, cell.getValueOffset, cell.getValueLength))
      }
      System.out.println("-----------------------------------------")
    }
  }

  /**
    * 列值过滤器
    * @param connection
    * @param tableName
    */
  def singColumnFilter(connection: Connection,tableName: TableName): Unit = {
    val table = connection.getTable(tableName)
    val scan = new Scan
    //下列参数分别为,列族,列名,比较符号,值
    val filter = new SingleColumnValueFilter(Bytes.toBytes("author"), Bytes.toBytes("name"), CompareOperator.LESS, Bytes.toBytes("hbase"))
    scan.setFilter(filter)
    val scanner = table.getScanner(scan)
    for (rs <- scanner) {
      val rowkey = Bytes.toString(rs.getRow)
      System.out.println("row key :" + rowkey)
      val cells = rs.rawCells
      for (cell <- cells) {
        System.out.println(Bytes.toString(cell.getFamilyArray, cell.getFamilyOffset, cell.getFamilyLength) + "::" + Bytes.toString(cell.getQualifierArray, cell.getQualifierOffset, cell.getQualifierLength) + "::" + Bytes.toString(cell.getValueArray, cell.getValueOffset, cell.getValueLength))
      }
      System.out.println("-----------------------------------------")
    }
  }

  /**
    * 列名前缀过滤器
    * @param connection
    * @param tableName
    */
  def columnPrefixFilter(connection: Connection,tableName: TableName): Unit = {
    val table = connection.getTable(tableName)
    val scan = new Scan
    val filter = new ColumnPrefixFilter(Bytes.toBytes("name"))
    scan.setFilter(filter)
    val scanner = table.getScanner(scan)
    for (rs <- scanner) {
      val rowkey = Bytes.toString(rs.getRow)
      System.out.println("row key :" + rowkey)
      val cells = rs.rawCells
      for (cell <- cells) {
        System.out.println(Bytes.toString(cell.getFamilyArray, cell.getFamilyOffset, cell.getFamilyLength) + "::" + Bytes.toString(cell.getQualifierArray, cell.getQualifierOffset, cell.getQualifierLength) + "::" + Bytes.toString(cell.getValueArray, cell.getValueOffset, cell.getValueLength))
      }
      System.out.println("-----------------------------------------")
    }
  }

  /**
    * 过滤器集合
    * @param connection
    * @param tableName
    */
  def filterSet(connection: Connection,tableName: TableName): Unit = {
    val table = connection.getTable(tableName)
    val scan = new Scan
    val list = new FilterList(FilterList.Operator.MUST_PASS_ALL)
    val filter1 = new SingleColumnValueFilter(Bytes.toBytes("author"), Bytes.toBytes("name"), CompareOperator.EQUAL, Bytes.toBytes("spark"))
    val filter2 = new ColumnPrefixFilter(Bytes.toBytes("name"))
    list.addFilter(filter1)
    list.addFilter(filter2)
    scan.setFilter(list)
    val scanner = table.getScanner(scan)
    for (rs <- scanner) {
      val rowkey = Bytes.toString(rs.getRow)
      System.out.println("row key :" + rowkey)
      val cells = rs.rawCells
      for (cell <- cells) {
        System.out.println(Bytes.toString(cell.getFamilyArray, cell.getFamilyOffset, cell.getFamilyLength) + "::" + Bytes.toString(cell.getQualifierArray, cell.getQualifierOffset, cell.getQualifierLength) + "::" + Bytes.toString(cell.getValueArray, cell.getValueOffset, cell.getValueLength))
      }
      System.out.println("-----------------------------------------")
    }
  }
}

猜你喜欢

转载自blog.csdn.net/u012893747/article/details/84934160