Hadoop例子WordConut

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/QXC1281/article/details/84716690

Mapper类

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;
import java.util.StringTokenizer;

/**
 * org.apache.hadoop.mapreduce.Mapper: 输入key,输入value,输出key,输出value
 */
public class WordCountMapper extends Mapper<LongWritable,Text,Text,IntWritable>{
    @Override
    protected void map(LongWritable key, Text value, Context context)
            throws IOException, InterruptedException {
        // IntWritable one=new IntWritable(1);
        //得到输入的每一行数据
        String valueStr = value.toString();
        for (String line : valueStr.split("\n")) {
            StringTokenizer st = new StringTokenizer(line);
            //StringTokenizer "kongge"
            while (st.hasMoreTokens()){
                String word= st.nextToken();
                context.write(new Text(word),new IntWritable(1));
            }
        }
    }
}

Reducer类

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;

/**
 * org.apache.hadoop.mapreduce.Reducer 输入key,输出value,输出key,输出value
 * 必须匹配map的输出
 */
public class WordCountReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
    @Override
    protected void reduce(Text key, Iterable<IntWritable> iterable, Context context)
     throws IOException, InterruptedException {
        int sum = 0;
        for (IntWritable i : iterable) {
            sum = sum + i.get();
        }
        context.write(key, new IntWritable(sum));
    }
}

主函数入口

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;


public class WordCount {
    public static void main(String[] args){
        //创建配置对象
        Configuration conf=new Configuration();
        try{
            //创建job对象
            Job job = Job.getInstance(conf, "word count");
            //Configuration conf, String jobName
            //设置运行job的类
            job.setJarByClass(WordCount.class);
            //设置mapper 类
            job.setMapperClass(WordCountMapper.class);
            //设置reduce 类
            job.setReducerClass(WordCountReducer.class);

            //设置map输出的key value
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(IntWritable.class);

            //设置reduce 输出的 key value
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(IntWritable.class);
            FileInputFormat.setInputPaths(job, new Path("/test/input/friendship.txt"));
            FileOutputFormat.setOutputPath(job, new Path("/test/output"));

            //提交job
            boolean b = job.waitForCompletion(true);
            if(!b){
                System.out.println("wordcount task fail!");
            }
        }catch (Exception e){
            e.printStackTrace();
        }
    }
}

测试类

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mrunit.mapreduce.MapDriver;
import org.apache.hadoop.mrunit.mapreduce.MapReduceDriver;
import org.apache.hadoop.mrunit.mapreduce.ReduceDriver;
import org.junit.Before;
import org.junit.Test;
import org.study.mr.count.WordCountMapper;
import org.study.mr.count.WordCountReducer;

import java.util.ArrayList;
import java.util.List;

/**
 * @author
 * @date 2018-12-02 10:02:00
 */
public class WordCountMRTest {
    public static final String inputTxt = "hello world\n" +
            "hello hadoops\n" +
            "hello sunwei\n";
    MapDriver<LongWritable, Text, Text, IntWritable> mapDriver;
    ReduceDriver<Text, IntWritable, Text, IntWritable> reduceDriver;

    MapReduceDriver<LongWritable, Text, Text, IntWritable, Text, IntWritable> mapReduceDriver;

    @Before
    public void setUp() throws Exception {
        WordCountMapper mapper = new WordCountMapper();
        WordCountReducer reducer = new WordCountReducer();
        mapDriver = MapDriver.newMapDriver(mapper);
        reduceDriver = ReduceDriver.newReduceDriver(reducer);
        mapReduceDriver = MapReduceDriver.newMapReduceDriver(mapper, reducer);
    }

    @Test
    public void testMapper() {
        mapDriver.withInput(new LongWritable(), new Text("hello"));
        mapDriver.withOutput(new Text("hello"), new IntWritable(1));
        mapDriver.runTest();
    }

    @Test
    public void testReducer() {
        List<IntWritable> values = new ArrayList<IntWritable>();
        values.add(new IntWritable(1));
        values.add(new IntWritable(1));
        reduceDriver.withInput(new Text("hello"), values);
        reduceDriver.withOutput(new Text("hello"), new IntWritable(2));
        reduceDriver.runTest();
    }

    @Test
    public void testMapReducer() {
        mapReduceDriver.withInput(new LongWritable(), new Text("hello"));
        List<IntWritable> values = new ArrayList<IntWritable>();
        values.add(new IntWritable(1));
        mapReduceDriver.withOutput(new Text("hello"), new IntWritable(1));
        mapReduceDriver.runTest();
    }



}

pom.xml

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>mr-study</groupId>
    <artifactId>mr-study</artifactId>
    <version>1.0-SNAPSHOT</version>

    <properties>
        <hadoopVersion>2.8.5</hadoopVersion>
    </properties>

    <dependencies>
        <!-- Hadoop start -->
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-common</artifactId>
            <version>${hadoopVersion}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-hdfs</artifactId>
            <version>${hadoopVersion}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-mapreduce-client-core</artifactId>
            <version>${hadoopVersion}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>${hadoopVersion}</version>
        </dependency>
        <!-- Hadoop -->

        <!-- MRUnit相关依赖 -->
        <dependency>
            <groupId>org.apache.mrunit</groupId>
            <artifactId>mrunit</artifactId>
            <version>0.9.0-incubating</version>
            <classifier>hadoop2</classifier>
        </dependency>

        <!-- junit依赖 -->
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>4.12</version>
            <scope>test</scope>
        </dependency>
    </dependencies>
    <build>
        <finalName>WordCount</finalName>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <version>3.6.0</version>
                <configuration>
                    <source>1.8</source>
                    <target>1.8</target>
                </configuration>
            </plugin>
            <!--指定主函数和各个依赖-->
            <plugin>
                <artifactId>maven-assembly-plugin</artifactId>
                <version>2.4</version>
                <configuration>
                    <archive>
                        <manifest>
                            <mainClass>org.study.mr.count.WordCount</mainClass>
                        </manifest>
                    </archive>
                    <descriptorRefs>
                        <descriptorRef>jar-with-dependencies</descriptorRef>
                    </descriptorRefs>
                </configuration>
                <executions>
                    <execution>
                        <id>make-assembly</id>
                        <phase>package</phase>
                        <goals>
                            <goal>single</goal>
                        </goals>
                    </execution>
                </executions>
            </plugin>
        </plugins>
        <!--加入配置文件-->
        <resources>
            <resource>
                <directory>src/main/java</directory>
                <includes>
                    <include>**/*.properties</include>
                </includes>
            </resource>
            <resource>
                <directory>src/main/resources</directory>
            </resource>
            <resource>
                <directory>resources</directory>
            </resource>
        </resources>
    </build>
</project>

编译运行

#编译
mvn clean package

# 向hadoop提交任务
hadoop jar WordCount-jar-with-dependencies.jar 

输入例子

/test/input/friendship.txt

On Friendship
And a youth said, "Speak to us of Friendship."
Your friend is your needs answered.
He is your field which you sow with love and reap with thanksgiving.
And he is your board and your fireside.
For you come to him with your hunger, and you seek him for peace.
When your friend speaks his mind you fear not the "nay" in your own mind, nor do you withhold the "ay."
And when he is silent your heart ceases not to listen to his heart;
For without words, in friendship, all thoughts, all desires, all expectations are born and shared, with joy that is unacclaimed.
When you part from your friend, you grieve not;
For that which you love most in him may be clearer in his absence, as the mountain to the climber is clearer from the plain.
And let there be no purpose in friendship save the deepening of the spirit.
For love that seeks aught but the disclosure of its own mystery is not love but a net cast forth: and only the unprofitable is caught.
And let your best be for your friend.
If he must know the ebb of your tide, let him know its flood also.
For what is your friend that you should seek him with hours to kill?
Seek him always with hours to live.
For it is his to fill your need, but not your emptiness.
And in the sweetness of friendship let there be laughter, and sharing of pleasures.
For in the dew of little things the heart finds its morning and is refreshed.

输出例子

Friendship 5
....省略

注意事项

本地启动时读取为本地目录注意修改input和ouput路径

所需HDFS命令

# 创建目录
hdfs dfs -mkdir -p /test/input

# 上传文件
hdfs dfs -put 本地路径 /test/input

输出文件存在请删除

# 错误org.apache.hadoop.mapred.FileAlreadyExistsException: Output directory hdfs://master:9000/test/output already exists
# 执行删除
hdfs dfs -rm -r /test/output

查看是否存在

dfs dfs -ls /test/input/

hdfs dfs -cat /test/input/friendship.txt

Mac Hadoop运行Mkdirs failed to create错误

Exception in thread "main" java.io.IOException: Mkdirs failed to create /var/folders/_p/2ws43j_d0rn32ptggdc089440000gn/T/hadoop-unjar7467945055400525315/META-INF/license
        at org.apache.hadoop.util.RunJar.ensureDirectory(RunJar.java:145)
        at org.apache.hadoop.util.RunJar.unJar(RunJar.java:110)
        at org.apache.hadoop.util.RunJar.unJar(RunJar.java:85)
        at org.apache.hadoop.util.RunJar.run(RunJar.java:227)
        at org.apache.hadoop.util.RunJar.main(RunJar.java:153)

# 删除LICENSE
zip -d WordCount-jar-with-dependencies.jar META-INF/LICENSE
zip -d WordCount-jar-with-dependencies.jar LICENSE

猜你喜欢

转载自blog.csdn.net/QXC1281/article/details/84716690