使用MapReduce操作HBase

1 环境准备

1)成功搭建hadoop-2.6.0-cdh5.10.0,hbase-1.2.0-cdh5.10.0开发环境
2)成功启动dfs、yarn、zookeeper、master、regionserver,通过HBase Shell进行测试

sbin/start-dfs.sh
sbin/start-yarn.sh
bin/hbase-daemon.sh start zookeeper
bin/hbase-daemon.sh start master
bin/hbase-daemon.sh start regionserver

3)使用Eclipse作为开发工具,导入下面两个依赖
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-4vhfoeZK-1589855901219)()]

2 创建项目

我们使用HBase Java API 练习这个项目。

3 创建hdfs目录,上传数据至HBases

(1)数据集data.txt内容如下

public static class extends private new
private final static void
key value context context
throws exception 
value string line value value string
split
string string new string line Value 
while 
string value
string word value string
set word value 
context write public static class
word extends  text text private
new void key value context context
throws int sum for get
output set sum context write key output value

(2)创建目录和上传数据

bin/hadoop fs -mkdir -p /user/root/hbase/input/
bin/hadoop fs -put /home/chenbo/data/hadoop/data.txt /user/root/hbase/input/

(3)代码如下

import java.nio.file.Path;
import java.io.IOException;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.MasterNotRunningException;
import org.apache.hadoop.hbase.ZooKeeperConnectionException;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.util.Tool;
 
public class WordCountUpLoadToHBase extends Configured {
    
    

    public static class WCHBaseMapper extends Mapper<Object, Text, ImmutableBytesWritable, IntWritable> {
    
    
        private final static IntWritable one = new IntWritable(1);
        private Text word = new Text();

        public void map(Object key,Text value,Context context) throws IOException, InterruptedException{
    
    
            StringTokenizer strs = new StringTokenizer(value.toString());
            while(strs.hasMoreTokens()){
    
    
                word.set(strs.nextToken());
                context.write(new ImmutableBytesWritable(Bytes.toBytes(word.toString())), one);
            }
        }
    }

    public static class WCHBaseReducer extends TableReducer<ImmutableBytesWritable, IntWritable, ImmutableBytesWritable>{
    
    

        public void reduce(ImmutableBytesWritable key,Iterable<IntWritable> values,Context context) throws IOException, InterruptedException{
    
    
            int sum = 0;
            for(IntWritable val:values){
    
     
                sum += val.get();
            }
            Put put = new Put(key.get());
            put.add(Bytes.toBytes("content"),Bytes.toBytes("count"),Bytes.toBytes(sum+""));
            context.write(key, put);
        }
    }
    @SuppressWarnings("all")
    public static void main(String[] args) throws MasterNotRunningException, ZooKeeperConnectionException, IOException, ClassNotFoundException, InterruptedException {
    
    
        // TODO Auto-generated method stub
        String tableName = "wordcount";
        Configuration conf = HBaseConfiguration.create();
        conf.set("hbase.zookeeper.quorum","192.168.159.130");
        conf.set("hbase.zookeeper.property.clientPort","2181");

        HBaseAdmin admin = new HBaseAdmin(conf);
        if(admin.tableExists(tableName)){
    
    
            admin.disableTable(tableName);
            admin.deleteTable(tableName);
        }
        HTableDescriptor tableDescriptor = new HTableDescriptor(tableName);
        HColumnDescriptor columnDescriptor =new HColumnDescriptor("content");
        tableDescriptor.addFamily(columnDescriptor);
        admin.createTable(tableDescriptor);

        Job job = new Job(conf,"upload to hbase");
        job.setJarByClass(WordCountUpLoadToHBase.class);
        job.setMapperClass(WCHBaseMapper.class);
        TableMapReduceUtil.initTableReducerJob(tableName, WCHBaseReducer.class, job,null,null,null,null,false);

        job.setMapOutputKeyClass(ImmutableBytesWritable.class);
        job.setMapOutputValueClass(IntWritable.class);
        job.setOutputKeyClass(ImmutableBytesWritable.class);
        job.setOutputValueClass(Put.class);
        FileInputFormat.addInputPaths(job, "hdfs://192.168.159.130:8020/user/root/hbase/input/data.txt");
        System.exit(job.waitForCompletion(true)?0:1);
    }
}

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-KiSNLsnA-1589855901222)()]

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-27tApHMT-1589855901224)()]

4 从HBase获取数据并计算结果上传至HDFS

4.1 创建MRReadFormHbase.class

import java.io.IOException;
import java.util.Map;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class MRReadFromHbase extends Configured {
    
    

    public static class WCHBaseMapper extends TableMapper<Text, Text>{
    
    

        @Override
        public void map(ImmutableBytesWritable key,Result values,Context context) throws IOException, InterruptedException{
    
    
            StringBuffer sb =new StringBuffer("");
            for(Map.Entry<byte[], byte[]> value:values.getFamilyMap("content".getBytes()).entrySet()){
    
    
                String  str =new String(value.getValue());
                if(str!=null){
    
    
                    sb.append(str);
                }
                context.write(new Text(key.get()), new Text(sb.toString()));
            }
        }
    }

    public static class WCHBaseReducer extends Reducer<Text, Text, Text, Text>{
    
    
        private Text result =new Text();
        public void reduce(Text key,Iterable<Text> values,Context context) throws IOException, InterruptedException{
    
    
            for(Text val:values){
    
    
                result.set(val);
                context.write(key,result);
            }
        }
    }


    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
    
    
        // TODO Auto-generated method stub
        String tableName = "wordcount";
        Configuration conf =HBaseConfiguration.create();
        conf.set("hbase.zookeeper.quorum","192.168.159.130");
        conf.set("hbase.zookeeper.property.clientPort","2181");


        Job job =new Job(conf,"read from hbase to hdfs");
        job.setJarByClass(MRReadFromHbase.class);
        job.setReducerClass(WCHBaseReducer.class);
        TableMapReduceUtil.initTableMapperJob(tableName, new Scan(), WCHBaseMapper.class, Text.class, Text.class, job);
        FileOutputFormat.setOutputPath(job, new Path("hdfs://192.168.159.130:8020/user/root/hbase/output/"));
        System.exit(job.waitForCompletion(true)?0:1);
    }
}

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-Gpq802pc-1589855901228)()]

猜你喜欢

转载自blog.csdn.net/weixin_44322234/article/details/106209261
今日推荐