模板1
最基本的MapReduecr程序的写法
流程:
将数据从本地文件导入,经过MapReduecr数据分析,将分析结果存储到HDFS
案例代码
Mapper代码
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class TestMapper extends Mapper <LongWritable, Text,Text, IntWritable>{
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
// 逻辑代码...
}
}
Reducer代码
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class TestReducer extends Reducer <Text, IntWritable,Text,IntWritable>{
@Override
protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
// 逻辑代码...
}
}
Tool代码
import com.cw.ct.analysis.mapper.TestMapper;
import com.cw.ct.analysis.reducer.TestReducer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobStatus;
import org.apache.hadoop.util.Tool;
/**
* 数据分析的工具类
*/
public class TestTool implements Tool {
private Configuration configuration = null;
public int run(String[] strings) throws Exception {
// 初始化job任务
Job job = Job.getInstance();
job.setJarByClass(TestTool.class);
//设置运行哪个map Task
job.setMapperClass(TestMapper.class);
//设置运行哪个reduce Task
job.setReducerClass(TestReducer.class);
//设置map Task的输出的(key,value)的数据类型
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
//设置reduce Task的输出的(key,value)的数据类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
//指定要处理的数据所在的位置
FileInputFormat.setInputPaths(job,"hdfs://192.168.91.101:8020/wordcount/input/big.txt");
//指定处理之后的结果数据保存位置
FileOutputFormat.setOutputPath(job,new Path("hdfs://192.168.91.101:8020/wordcount/output"));
boolean result = job.waitForCompletion(true);
if (result){
return JobStatus.State.SUCCEEDED.getValue();
}else{
return JobStatus.State.FAILED.getValue();
}
}
public void setConf(Configuration configuration) {
this.configuration = configuration
}
public Configuration getConf() {
return this.configuration;
}
}
启动类
public class Bootstrap {
public static void main(String[] args) throws Exception {
// 写法一:
int result = ToolRunner.run(new TestTool(), args);
// 写法二:
// new Configuration():将MapReducer程序运行的环境中的配置文件自动加载(core.xml,hbase.xml...)
// new Configuration()其实写不写都可以,ToolRunner会检测tool中的Configuration是否不为null值,为null值自动创建一个Configuration
int result = ToolRunner.run(new Configuration(),new TestTool(), args);
if (result == JobStatus.State.SUCCEEDED.getValue()){
System.out.println("运行成功!");
System.exit(0);
}else{
System.out.println("运行失败!");
System.exit(1);
}
}
}
模板2
Hbase集成MapReducer
流程
从HDFS中读取文件数据,将数据存储到Hbase
案例代码
Mapper代码
public class TestMapper extends Mapper<LongWritable,Text,LongWritable, Text> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
// 逻辑代码...
}
}
Reducer代码
/**
* 继承TableReducer类,实现与HBase打交道
*
*
*/
public class TestReducer extends TableReducer<LongWritable, Text, NullWritable> {
@Override
protected void reduce(LongWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
// 示范代码,仅供参考
for(Text value:values){
//获取每行的数据
String [] fields=value.toString().split("\t");
//构建Put对象
Put put = new Put(Bytes.toBytes(fields[0]));
//4.给Put对象赋值
put.addColumn(Bytes.toBytes("info"),Bytes.toBytes("name"),Bytes.toBytes(fields[1]));
put.addColumn(Bytes.toBytes("info"),Bytes.toBytes("color"),Bytes.toBytes(fields[2]));
context.write(NullWritable.get(),put);
}
}
}
tool代码
public class TestTool implements Tool {
//定义一个Configuration
private Configuration configuration = null;
public int run(String[] args) throws Exception {
//1.获取Job对象
Job job = Job.getInstance(configuration);
//2.设置驱动类路径
job.setJarByClass(TestTool.class);
//3.设置Mapper和Mapper输出的KV类型
job.setMapperClass(TestMapper.class);
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(Text.class);
//4.设置Reducer类
TableMapReduceUtil.initTableReducerJob(args[1],TestReducer.class,job);
//5.设置输入参数
FileInputFormat.setInputPaths(job,new Path(args[0]));
//6.提交任务
boolean result = job.waitForCompletion(true);
return result?0:1;
}
public void setConf(Configuration configuration) {
this.configuration=configuration;
}
public Configuration getConf() {
return configuration;
}
}
启动类
public class Bootstrap {
public static void main(String[] args) throws Exception {
int run = ToolRunner.run(cnew TestTool(), args);
System.exit(run);
}
}
模板3
流程
从HBase中读取数据,再将数据存储到HBase
案例代码
Mapper代码
public class TestMapper extends TableMapper<ImmutableBytesWritable, Put> {
/**
* 同一个rowKey的属于一个Map任务,也就是按照rowKey划分Map任务
* @param key
* @param value
* @param context
* @throws IOException
* @throws InterruptedException
*/
protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException {
// 以下为示范代码
//构建Put对象
Put put = new Put(key.get());
//获取数据
for (Cell cell:value.rawCells()){
//给Put对象赋值
put.add(cell);
}
//写出
context.write(key,put);
}
}
Reducer代码
public class TestReducer extends TableReducer<ImmutableBytesWritable, Put, NullWritable> {
@Override
protected void reduce(ImmutableBytesWritable key, Iterable<Put> values, Context context) throws IOException, InterruptedException {
for(Put put:values){
context.write(NullWritable.get(),put);
}
}
}
tool代码
public class TestTool implements Tool {
private Configuration configuration = null;
public int run(String[] args) throws Exception {
Job job = Job.getInstance(configuration);
job.setJarByClass(FruitDriver2.class);
// 设置Mapper,fruit1为HBase表名,new Scan()为全局扫描读取数据
TableMapReduceUtil.initTableMapperJob("fruit1",new Scan(),TestMapper.class,ImmutableBytesWritable.class,Put.class,job);
// 设置Reducer,fruit2为HBase表名
TableMapReduceUtil.initTableReducerJob("fruit2",TestReducer.class,job);
boolean result = job.waitForCompletion(true);
return result ? 0:1;
}
public void setConf(Configuration configuration) {
this.configuration=configuration;
}
public Configuration getConf() {
return configuration;
}
}
启动类
public class Bootstrap{
public static void main(String[] args) throws Exception {
int run = ToolRunner.run(configuration, new FruitDriver2(), args);
System.exit(run);
}
}
模板4
将MapReducer与Mysql和HBase集成
流程
MapReducer从HBase中读取数据,然后数据分析,将分析结果保存到Mysql
案例代码
Mapper代码
public class TestMapper extends TableMapper<Text, Text> {
@Override
protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException {
// 逻辑代码...
}
}
Reducer代码
public class TestReducer extends Reducer<Text, Text,Text,Text> {
@Override
protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
// 逻辑代码...
}
}
自定义输出格式
/**
* MySQL的数据格式化输入对象
*/
public class MySqlTestOutputFormat extends OutputFormat<Text, Text> {
private FileOutputCommitter committer = null;
// 实现一个静态内部类
protected static class MySQLRecordWrite extends RecordWriter<Text,Text>{
public MySQLRecordWrite() {
}
/**
* 输出数据
* @param text
* @param text2
* @throws IOException
* @throws InterruptedException
*/
@Override
public void write(Text text, Text text2) throws IOException, InterruptedException {
// 实现将数据存储到Mysql中
}
/**
* 释放资源
* @param taskAttemptContext
* @throws IOException
* @throws InterruptedException
*/
@Override
public void close(TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
}
}
/**
* 将自定义的内部读写类返回
*/
@Override
public RecordWriter<Text, Text> getRecordWriter(TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
return new MySQLRecordWrite();
}
@Override
public void checkOutputSpecs(JobContext jobContext) throws IOException, InterruptedException {
}
// 以下的方法代码照搬即可
@Override
public OutputCommitter getOutputCommitter(TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
if (this.committer == null) {
Path output = getOutputPath(taskAttemptContext);
this.committer = new FileOutputCommitter(output,taskAttemptContext);
}
return this.committer;
}
private static Path getOutputPath(JobContext job) {
String name = job.getConfiguration().get(FileOutputFormat.OUTDIR);
return name == null ? null: new Path(name);
}
}
tool代码
/**
* 数据分析的工具类
*/
public class TestTool implements Tool {
private Configuration configuration = null;
public int run(String[] strings) throws Exception {
Job job = Job.getInstance();
job.setJarByClass(AnalysisTextTool.class);
// 设置mapper
TableMapReduceUtil.initTableMapperJob(
Names.TABLE.getValue(),
new Scan(),
TestMapper.class,
Text.class,
Text.class,
job
);
// 设置reducer
job.setReducerClass(TestReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
// 设置自定义的输出格式
// outputformat
job.setOutputFormatClass(MySqlTextOutputFormat.class);
boolean result = job.waitForCompletion(true);
return result ? 0:1;
}
public void setConf(Configuration configuration) {
this.configuration = configuration
}
public Configuration getConf() {
return this.configuration;
}
}
启动类
public class Bootstrap{
public static void main(String[] args) throws Exception {
int run = ToolRunner.run( new TestTool(), args);
System.exit(run);
}
}
自定义数据类型
有时,在MapReducer中的数据类型是无法满足我们的需求的,因此我们要定义一些自定义数据对象
模板
自定义Key类型
/**
* 自定义数据分析Key
* Writable:是基本数据类接口
* Comparable:key比较接口
*
*/
public class TestKey implements WritableComparable<TestKey> {
private String tel;
private String date;
public TestKey() {
}
public TestKey(String tel, String date) {
this.tel = tel;
this.date = date;
}
public String getTel() {
return tel;
}
public void setTel(String tel) {
this.tel = tel;
}
public String getDate() {
return date;
}
public void setDate(String date) {
this.date = date;
}
/**
* 比较tel,date,如果相同的,发送到同一个reducer
* @param key
* @return
*/
public int compareTo(TestKey key) {
// 以下为示范代码
int result = tel.compareTo(key.getTel());
// 如果tel相同则比较date
if (result == 0){
result = date.compareTo(key.getDate());
}
// 返回0是相同的意思
return result;
}
/**
* 写数据,必须实现
* @param dataOutput
* @throws IOException
*/
public void write(DataOutput dataOutput) throws IOException {
dataOutput.writeUTF(tel);
dataOutput.writeUTF(date);
}
/**
* 读数据,必须实现
* @param dataInput
* @throws IOException
*/
public void readFields(DataInput dataInput) throws IOException {
// 读取顺序要和写数据一致
tel = dataInput.readUTF();
date = dataInput.readUTF();
}
}
自定义Value类型
/**
* 自定义Value
* 只需实现Writable即可,因为value不用比较
*/
public class TestValue implements Writable {
private String sumCall;
private String sumDuration;
public TestValue() {
}
public TestValue(String sumCall, String sumDuration) {
this.sumCall = sumCall;
this.sumDuration = sumDuration;
}
public String getSumCall() {
return sumCall;
}
public void setSumCall(String sumCall) {
this.sumCall = sumCall;
}
public String getSumDuration() {
return sumDuration;
}
public void setSumDuration(String sumDuration) {
this.sumDuration = sumDuration;
}
/**
* 写数据,必须实现
* @param dataOutput
* @throws IOException
*/
public void write(DataOutput dataOutput) throws IOException {
dataOutput.writeUTF(sumCall);
dataOutput.writeUTF(sumDuration);
}
/**
* 读数据,必须实现
* @param dataInput
* @throws IOException
*/
public void readFields(DataInput dataInput) throws IOException {
// 读取顺序要和写数据一致
sumCall = dataInput.readUTF();
sumDuration = dataInput.readUTF();
}
}