Hbase之--------将Hdfs数据加载到Hbase数据库中

数据:
zhangfenglun,M,20,13522334455,[email protected],23521472
chenfei,M,20,13684634455,[email protected],84545472
liyuchen,M,20,13522334255,[email protected],84765472
liuwei,M,20,13528734455,[email protected],84521765
liuyang,M,20,13522354455,[email protected],84231472
caofei,M,20,13735675455,[email protected],84527642
zhaoxinkuan,M,20,13522334466,[email protected],84512472
gaoying,M,20,13454523455,[email protected],845212344

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Mutation;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;



public class hdfsToHbase {
	static class MyMapper extends Mapper<LongWritable, Text, Text, Text>{
		Text mk=new Text();
		Text mv=new Text();
		@Override
		protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)
				throws IOException, InterruptedException {
			**//取出一行的数据**
			//zhangfenglun,M,20,13522334455,[email protected],23521472
			// name,sex,age,phone,email,qq
			String[] split = value.toString().split(",");
			mk.set(split[0]);
			
			mv.set(value.toString().substring(split[0].length()+1));
			context.write(mk, mv);
		}
	}
	static class MyReducer extends TableReducer<Text, Text, NullWritable>{
		@Override
		protected void reduce(Text key, Iterable<Text> values, 
				Reducer<Text, Text, NullWritable, Mutation>.Context context)
				throws IOException, InterruptedException {
			
			Put p=new Put(key.toString().getBytes());//行键,创建put对象时必须添加行键
			//循环遍历Iterable   分装put对象
			// name,sex,age,phone,email,qq
			for(Text v:values){
				String[] data = v.toString().split(",");

					//zhangfenglun,M,20,13522334455,[email protected],23521472
					//向put对象添加列簇、列、值
					p.addColumn("basicinfo".getBytes(), "sex".getBytes(), data[0].getBytes());
					p.addColumn("basicinfo".getBytes(), "age".getBytes(), data[1].getBytes());
					p.addColumn("extrainfo".getBytes(), "phone".getBytes(), data[2].getBytes());
					p.addColumn("extrainfo".getBytes(), "email".getBytes(), data[3].getBytes());	
					p.addColumn("extrainfo".getBytes(), "qq".getBytes(), data[4].getBytes());	
				
			}
			context.write(NullWritable.get(), p);
		}
	}
	

	public static void main(String[] args) throws ClassNotFoundException, IOException, InterruptedException {
		//设置权限
		System.setProperty("HADOOP_USER_NAME", "hadoop");
		Configuration conf=new Configuration();
		//设置zookeeper的映射关系
		conf.set("hbase.zookeeper.quorum", "hadoop02:2181,hadoop03:2181,hadoop01:2181");
		//创建job任务
		Job job=Job.getInstance(conf);
		
		//指定执行的jar包
		job.setJarByClass(Kaoshi.hdfsToHbase.class);
		
		//mapper  reducer
		//指定mapper、reduce类
		job.setMapperClass(MyMapper.class);
		//job.setReducerClass(MyReducer.class);
		
		//设置map的输出的key  value
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(Text.class);
		//创建连接hbase
		Connection connect = ConnectionFactory.createConnection(conf);
		//3.获取hbaseadmin对象
		Admin admin = connect.getAdmin();
		//4.建表
		String tablename="mingxing";
		if(!admin.tableExists(TableName.valueOf(tablename))){//判断表是否存在的  存在  true   不存在false
			HTableDescriptor htd=new HTableDescriptor(TableName.valueOf(tablename));
			HColumnDescriptor hcd1=new HColumnDescriptor("basicinfo".getBytes());
			HColumnDescriptor hcd2=new HColumnDescriptor("extrainfo".getBytes());
			htd.addFamily(hcd1);
			htd.addFamily(hcd2);
			admin.createTable(htd);
		}
		TableMapReduceUtil.initTableReducerJob(tablename, MyReducer.class, job,null,null,null,null,false);
		//指定输入路径
		FileInputFormat.addInputPath(job, new Path("hdfs://bd1804/mingxing.txt"));
		//输出路径
		FileSystem fs=FileSystem.get(conf);
		//将hdfs中core-site   hdfs-site放在src下
//这里的输出其实没有必要了,因为数据到了hbase中
//我这里测了一下,没有任何数据,可以删除
		//判断输出目录是否存在
		Path out=new Path("hdfs://bd1804/stuout01");
		if(fs.exists(out)){
			fs.delete(out, true);
		}
		FileOutputFormat.setOutputPath(job, out);
		
		//提交job
		job.waitForCompletion(true);
	}

}


猜你喜欢

转载自blog.csdn.net/YZY_001/article/details/82764995
今日推荐