数据:
zhangfenglun,M,20,13522334455,[email protected],23521472
chenfei,M,20,13684634455,[email protected],84545472
liyuchen,M,20,13522334255,[email protected],84765472
liuwei,M,20,13528734455,[email protected],84521765
liuyang,M,20,13522354455,[email protected],84231472
caofei,M,20,13735675455,[email protected],84527642
zhaoxinkuan,M,20,13522334466,[email protected],84512472
gaoying,M,20,13454523455,[email protected],845212344
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Mutation;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class hdfsToHbase {
static class MyMapper extends Mapper<LongWritable, Text, Text, Text>{
Text mk=new Text();
Text mv=new Text();
@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)
throws IOException, InterruptedException {
**//取出一行的数据**
//zhangfenglun,M,20,13522334455,[email protected],23521472
// name,sex,age,phone,email,qq
String[] split = value.toString().split(",");
mk.set(split[0]);
mv.set(value.toString().substring(split[0].length()+1));
context.write(mk, mv);
}
}
static class MyReducer extends TableReducer<Text, Text, NullWritable>{
@Override
protected void reduce(Text key, Iterable<Text> values,
Reducer<Text, Text, NullWritable, Mutation>.Context context)
throws IOException, InterruptedException {
Put p=new Put(key.toString().getBytes());//行键,创建put对象时必须添加行键
//循环遍历Iterable 分装put对象
// name,sex,age,phone,email,qq
for(Text v:values){
String[] data = v.toString().split(",");
//zhangfenglun,M,20,13522334455,[email protected],23521472
//向put对象添加列簇、列、值
p.addColumn("basicinfo".getBytes(), "sex".getBytes(), data[0].getBytes());
p.addColumn("basicinfo".getBytes(), "age".getBytes(), data[1].getBytes());
p.addColumn("extrainfo".getBytes(), "phone".getBytes(), data[2].getBytes());
p.addColumn("extrainfo".getBytes(), "email".getBytes(), data[3].getBytes());
p.addColumn("extrainfo".getBytes(), "qq".getBytes(), data[4].getBytes());
}
context.write(NullWritable.get(), p);
}
}
public static void main(String[] args) throws ClassNotFoundException, IOException, InterruptedException {
//设置权限
System.setProperty("HADOOP_USER_NAME", "hadoop");
Configuration conf=new Configuration();
//设置zookeeper的映射关系
conf.set("hbase.zookeeper.quorum", "hadoop02:2181,hadoop03:2181,hadoop01:2181");
//创建job任务
Job job=Job.getInstance(conf);
//指定执行的jar包
job.setJarByClass(Kaoshi.hdfsToHbase.class);
//mapper reducer
//指定mapper、reduce类
job.setMapperClass(MyMapper.class);
//job.setReducerClass(MyReducer.class);
//设置map的输出的key value
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
//创建连接hbase
Connection connect = ConnectionFactory.createConnection(conf);
//3.获取hbaseadmin对象
Admin admin = connect.getAdmin();
//4.建表
String tablename="mingxing";
if(!admin.tableExists(TableName.valueOf(tablename))){//判断表是否存在的 存在 true 不存在false
HTableDescriptor htd=new HTableDescriptor(TableName.valueOf(tablename));
HColumnDescriptor hcd1=new HColumnDescriptor("basicinfo".getBytes());
HColumnDescriptor hcd2=new HColumnDescriptor("extrainfo".getBytes());
htd.addFamily(hcd1);
htd.addFamily(hcd2);
admin.createTable(htd);
}
TableMapReduceUtil.initTableReducerJob(tablename, MyReducer.class, job,null,null,null,null,false);
//指定输入路径
FileInputFormat.addInputPath(job, new Path("hdfs://bd1804/mingxing.txt"));
//输出路径
FileSystem fs=FileSystem.get(conf);
//将hdfs中core-site hdfs-site放在src下
//这里的输出其实没有必要了,因为数据到了hbase中
//我这里测了一下,没有任何数据,可以删除
//判断输出目录是否存在
Path out=new Path("hdfs://bd1804/stuout01");
if(fs.exists(out)){
fs.delete(out, true);
}
FileOutputFormat.setOutputPath(job, out);
//提交job
job.waitForCompletion(true);
}
}