模板1

最基本的MapReduecr程序的写法

流程：

将数据从本地文件导入，经过MapReduecr数据分析，将分析结果存储到HDFS

案例代码

Mapper代码

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
 

public class TestMapper extends Mapper <LongWritable, Text,Text, IntWritable>{
    
	
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        
		// 逻辑代码...
    }
}

Reducer代码

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
 

public class TestReducer extends Reducer <Text, IntWritable,Text,IntWritable>{
    
	
    @Override
    protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
        
		// 逻辑代码...
    }
}

Tool代码

import com.cw.ct.analysis.mapper.TestMapper;
import com.cw.ct.analysis.reducer.TestReducer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobStatus;
import org.apache.hadoop.util.Tool;


/**
 *  数据分析的工具类
 */
public class TestTool implements Tool {

    private Configuration configuration = null;

    public int run(String[] strings) throws Exception {

		// 初始化job任务
        Job job = Job.getInstance();
        job.setJarByClass(TestTool.class);

        
		//设置运行哪个map Task
        job.setMapperClass(TestMapper.class);
 
        //设置运行哪个reduce Task
        job.setReducerClass(TestReducer.class);
 
        //设置map Task的输出的（key，value）的数据类型
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);
 
        //设置reduce Task的输出的（key，value）的数据类型
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
 
        //指定要处理的数据所在的位置
        FileInputFormat.setInputPaths(job,"hdfs://192.168.91.101:8020/wordcount/input/big.txt");
        //指定处理之后的结果数据保存位置
        FileOutputFormat.setOutputPath(job,new Path("hdfs://192.168.91.101:8020/wordcount/output"));
		
        boolean result = job.waitForCompletion(true);
        if (result){
            return JobStatus.State.SUCCEEDED.getValue();
        }else{
            return JobStatus.State.FAILED.getValue();
        }
    }

    public void setConf(Configuration configuration) {
        this.configuration = configuration

    }

    public Configuration getConf() {
        return this.configuration;
    }
}

启动类

public class Bootstrap {
    public static void main(String[] args) throws Exception {

		// 写法一：
        int result = ToolRunner.run(new TestTool(), args);
		
		// 写法二:
		// new Configuration():将MapReducer程序运行的环境中的配置文件自动加载(core.xml,hbase.xml...)
		// new Configuration()其实写不写都可以，ToolRunner会检测tool中的Configuration是否不为null值，为null值自动创建一个Configuration
		int result = ToolRunner.run(new Configuration(),new TestTool(), args);

        if (result == JobStatus.State.SUCCEEDED.getValue()){
            System.out.println("运行成功！");
            System.exit(0);
        }else{
            System.out.println("运行失败！");
            System.exit(1);
        }
    }
}

模板2

Hbase集成MapReducer

流程

从HDFS中读取文件数据，将数据存储到Hbase

案例代码

Mapper代码

public class TestMapper extends Mapper<LongWritable,Text,LongWritable, Text> {


    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        // 逻辑代码...
    }
}

Reducer代码

/**
* 继承TableReducer类，实现与HBase打交道
*
*
*/
public class TestReducer extends TableReducer<LongWritable, Text, NullWritable> {

    @Override
    protected void reduce(LongWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException {

        // 示范代码，仅供参考
        for(Text value:values){

            //获取每行的数据
            String [] fields=value.toString().split("\t");

            //构建Put对象
            Put put = new Put(Bytes.toBytes(fields[0]));

            //4.给Put对象赋值
            put.addColumn(Bytes.toBytes("info"),Bytes.toBytes("name"),Bytes.toBytes(fields[1]));
            put.addColumn(Bytes.toBytes("info"),Bytes.toBytes("color"),Bytes.toBytes(fields[2]));

            context.write(NullWritable.get(),put);
        }
    }
}

tool代码

public class TestTool implements Tool {

    //定义一个Configuration
    private Configuration configuration = null;

    public int run(String[] args) throws Exception {

        //1.获取Job对象
        Job job = Job.getInstance(configuration);

        //2.设置驱动类路径
        job.setJarByClass(TestTool.class);

        //3.设置Mapper和Mapper输出的KV类型
        job.setMapperClass(TestMapper.class);
        job.setMapOutputKeyClass(LongWritable.class);
        job.setMapOutputValueClass(Text.class);

        //4.设置Reducer类
        TableMapReduceUtil.initTableReducerJob(args[1],TestReducer.class,job);

        //5.设置输入参数
        FileInputFormat.setInputPaths(job,new Path(args[0]));

        //6.提交任务
        boolean result = job.waitForCompletion(true);

        return result?0:1;
    }

    public void setConf(Configuration configuration) {
        this.configuration=configuration;

    }

    public Configuration getConf() {
        return configuration;
    }
}

启动类

public class Bootstrap {
    public static void main(String[] args) throws Exception {

        int run = ToolRunner.run(cnew TestTool(), args);
        System.exit(run);

    }
}

模板3

流程

从HBase中读取数据，再将数据存储到HBase

案例代码

Mapper代码

public class TestMapper extends TableMapper<ImmutableBytesWritable, Put> {

    /**
     *  同一个rowKey的属于一个Map任务，也就是按照rowKey划分Map任务
     * @param key
     * @param value
     * @param context
     * @throws IOException
     * @throws InterruptedException
     */
    protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException {

		// 以下为示范代码
        //构建Put对象
        Put put = new Put(key.get());

        //获取数据
        for (Cell cell:value.rawCells()){

            //给Put对象赋值
                put.add(cell);
        }

        //写出
        context.write(key,put);
    }
}

Reducer代码

public class TestReducer extends TableReducer<ImmutableBytesWritable, Put, NullWritable> {

    @Override
    protected void reduce(ImmutableBytesWritable key, Iterable<Put> values, Context context) throws IOException, InterruptedException {

        for(Put put:values){
            context.write(NullWritable.get(),put);
        }

    }
}

tool代码

public class TestTool implements Tool {

    private Configuration configuration = null;

    public int run(String[] args) throws Exception {

        Job job = Job.getInstance(configuration);
        job.setJarByClass(FruitDriver2.class);

		// 设置Mapper，fruit1为HBase表名，new Scan()为全局扫描读取数据
        TableMapReduceUtil.initTableMapperJob("fruit1",new Scan(),TestMapper.class,ImmutableBytesWritable.class,Put.class,job);
		
		// 设置Reducer，fruit2为HBase表名
        TableMapReduceUtil.initTableReducerJob("fruit2",TestReducer.class,job);

        boolean result = job.waitForCompletion(true);

        return result ? 0:1;
    }

    public void setConf(Configuration configuration) {
        this.configuration=configuration;

    }

    public Configuration getConf() {
        return configuration;
    }
}

启动类

public class Bootstrap{

     public static void main(String[] args) throws Exception {

        int run = ToolRunner.run(configuration, new FruitDriver2(), args);
        System.exit(run);
    }
}

模板4

将MapReducer与Mysql和HBase集成

流程

MapReducer从HBase中读取数据，然后数据分析，将分析结果保存到Mysql

案例代码

Mapper代码

public class TestMapper extends TableMapper<Text, Text> {

    @Override
    protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException {
		// 逻辑代码...
    }
}

Reducer代码

public class TestReducer extends Reducer<Text, Text,Text,Text> {

    @Override
    protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {

        // 逻辑代码...
    }
}

自定义输出格式

/**
 *  MySQL的数据格式化输入对象
 */
public class MySqlTestOutputFormat extends OutputFormat<Text, Text> {

    private FileOutputCommitter committer = null;

    // 实现一个静态内部类
    protected static class MySQLRecordWrite extends RecordWriter<Text,Text>{

		public MySQLRecordWrite() {
            
        }

        /**
         *  输出数据
         * @param text
         * @param text2
         * @throws IOException
         * @throws InterruptedException
         */
        @Override
        public void write(Text text, Text text2) throws IOException, InterruptedException {
            // 实现将数据存储到Mysql中
        }

        /**
         *  释放资源
         * @param taskAttemptContext
         * @throws IOException
         * @throws InterruptedException
         */
        @Override
        public void close(TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
            
        }
    }
	
	/**
	* 将自定义的内部读写类返回
	*/
    @Override
    public RecordWriter<Text, Text> getRecordWriter(TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
        return new MySQLRecordWrite();
    }

    @Override
    public void checkOutputSpecs(JobContext jobContext) throws IOException, InterruptedException {

    }

	// 以下的方法代码照搬即可
    @Override
    public OutputCommitter getOutputCommitter(TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {

        if (this.committer == null) {
            Path output = getOutputPath(taskAttemptContext);
            this.committer = new FileOutputCommitter(output,taskAttemptContext);
        }

        return this.committer;
    }

    private static Path getOutputPath(JobContext job) {

        String name = job.getConfiguration().get(FileOutputFormat.OUTDIR);
        return name == null ? null: new Path(name);
    }
}

tool代码

/**
 *  数据分析的工具类
 */
public class TestTool implements Tool {

    private Configuration configuration = null;

    public int run(String[] strings) throws Exception {

        Job job = Job.getInstance();
        job.setJarByClass(AnalysisTextTool.class);

        // 设置mapper
        TableMapReduceUtil.initTableMapperJob(
                Names.TABLE.getValue(),
                new Scan(),
                TestMapper.class,
                Text.class,
                Text.class,
                job
        );

        // 设置reducer
        job.setReducerClass(TestReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);
		
		// 设置自定义的输出格式
        // outputformat
        job.setOutputFormatClass(MySqlTextOutputFormat.class);

        boolean result = job.waitForCompletion(true);
        return result ? 0:1;
    }

    public void setConf(Configuration configuration) {
        this.configuration = configuration

    }

    public Configuration getConf() {
        return this.configuration;
    }
}

启动类

public class Bootstrap{

     public static void main(String[] args) throws Exception {

        int run = ToolRunner.run( new TestTool(), args);
        System.exit(run);
    }
}

自定义数据类型

有时，在MapReducer中的数据类型是无法满足我们的需求的，因此我们要定义一些自定义数据对象

模板

自定义Key类型

/**
 *  自定义数据分析Key
 *  Writable：是基本数据类接口
 *  Comparable：key比较接口
 *
 */
public class TestKey implements WritableComparable<TestKey> {


    private String tel;
    private String date;


    public TestKey() {
    }

    public TestKey(String tel, String date) {
        this.tel = tel;
        this.date = date;
    }

    public String getTel() {
        return tel;
    }

    public void setTel(String tel) {
        this.tel = tel;
    }

    public String getDate() {
        return date;
    }

    public void setDate(String date) {
        this.date = date;
    }

    /**
     *  比较tel,date，如果相同的，发送到同一个reducer
     * @param key
     * @return
     */
    public int compareTo(TestKey key) {

		// 以下为示范代码
		
		
        int result = tel.compareTo(key.getTel());
        // 如果tel相同则比较date
        if (result == 0){
            result = date.compareTo(key.getDate());
        }
        // 返回0是相同的意思
        return result;
    }

    /**
     *  写数据，必须实现
     * @param dataOutput
     * @throws IOException
     */
    public void write(DataOutput dataOutput) throws IOException {
        dataOutput.writeUTF(tel);
        dataOutput.writeUTF(date);

    }

    /**
     *  读数据，必须实现
     * @param dataInput
     * @throws IOException
     */
    public void readFields(DataInput dataInput) throws IOException {
		 // 读取顺序要和写数据一致
        tel = dataInput.readUTF();
        date = dataInput.readUTF();

    }
}

自定义Value类型

/**
 *  自定义Value
 *  只需实现Writable即可，因为value不用比较
 */
public class TestValue implements Writable {


    private String sumCall;
    private String sumDuration;

    public TestValue() {

    }

    public TestValue(String sumCall, String sumDuration) {
        this.sumCall = sumCall;
        this.sumDuration = sumDuration;
    }

    public String getSumCall() {
        return sumCall;
    }

    public void setSumCall(String sumCall) {
        this.sumCall = sumCall;
    }

    public String getSumDuration() {
        return sumDuration;
    }

    public void setSumDuration(String sumDuration) {
        this.sumDuration = sumDuration;
    }

	/**
     *  写数据，必须实现
     * @param dataOutput
     * @throws IOException
     */
    public void write(DataOutput dataOutput) throws IOException {
        dataOutput.writeUTF(sumCall);
        dataOutput.writeUTF(sumDuration);

    }
	
	/**
     *  读数据，必须实现
     * @param dataInput
     * @throws IOException
     */
    public void readFields(DataInput dataInput) throws IOException {
		// 读取顺序要和写数据一致
        sumCall = dataInput.readUTF();
        sumDuration = dataInput.readUTF();
    }
}

MapReducer的map和reducer的几种写法模板以及自定义数据类型

模板1

流程：

案例代码

Mapper代码

Reducer代码

Tool代码

启动类

模板2

流程

案例代码

Mapper代码

Reducer代码

tool代码

启动类

模板3

流程

案例代码

Mapper代码

Reducer代码

tool代码

启动类

模板4

流程

案例代码

Mapper代码

Reducer代码

自定义输出格式

tool代码

启动类

自定义数据类型

模板

自定义Key类型

自定义Value类型

猜你喜欢