IDEA远程提交hadoop任务

新建maven项目，添加如下依赖

<dependencies>
     <dependency>
         <groupId>org.apache.hadoop</groupId>
         <artifactId>hadoop-common</artifactId>
         <version>2.7.1</version>
     </dependency>

     <dependency>
         <groupId>org.apache.hadoop</groupId>
         <artifactId>hadoop-mapreduce-client-core</artifactId>
         <version>2.7.1</version>
     </dependency>

     <dependency>
         <groupId>org.apache.hadoop</groupId>
         <artifactId>hadoop-hdfs</artifactId>
         <version>2.7.1</version>
     </dependency>

     <dependency>
         <groupId>org.apache.hadoop</groupId>
         <artifactId>hadoop-mapreduce-client-jobclient</artifactId>
         <version>2.7.1</version>
     </dependency>
 </dependencies>

编写Map处理

public static class Map extends Mapper<LongWritable, Text, Text, IntWritable> {
      @Override
      protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
          String line = value.toString();
          System.out.println("行值:" + line);
          StringTokenizer tokenizer = new StringTokenizer(line, "\n");
          while (tokenizer.hasMoreTokens()) {
              StringTokenizer tokenizerLine = new StringTokenizer(tokenizer.nextToken());
              String strName = tokenizerLine.nextToken();
              String strScore = tokenizerLine.nextToken();
              Text name = new Text(strName);
              int score = Integer.parseInt(strScore);
              context.write(name, new IntWritable(score));
          }
      }
  }

编写Reduce处理

public static class Reduce extends Reducer<Text, IntWritable, Text, IntWritable> {
      @Override
      protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
          int sum = 0;
          int count = 0;
          Iterator<IntWritable> iterator = values.iterator();
          while (iterator.hasNext()) {
              sum += iterator.next().get();
              count++;
          }
          int average = sum / count;
          context.write(key, new IntWritable(average));
      }
  }

main函数

System.setProperty("HADOOP_USER_NAME", "wujinlei");
Configuration conf = new Configuration();
conf.set("fs.defaultFS", "hdfs://master:9000");
conf.set("mapreduce.app-submission.cross-platform", "true");
conf.set("mapred.jar", "E:\\JackManWu\\hadoo-ptest\\target\\hadoop-test-1.0-SNAPSHOT.jar");
conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
Job job = Job.getInstance(conf, "student_score");
job.setJarByClass(StudentScore.class);//要执行的jar中的类

job.setMapperClass(Map.class);
job.setCombinerClass(Reduce.class);
job.setReducerClass(Reduce.class);

job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);

FileInputFormat.addInputPath(job, new Path("hdfs://master:9000/home/wujinlei/work/student/input"));
FileOutputFormat.setOutputPath(job, new Path("hdfs://master:9000/home/wujinlei/work/student/output"));
System.exit(job.waitForCompletion(true) ? 0 : 1);

准备好home/wujinlei/work/student/input输入文件，参照第一个hadoop程序-WordCount中的创建输入文件部分，在集群上预先准备好输入文件（ps：home/wujinlei/work/student/output不用准备，系统自动生成输出）
- 样例输入文件：
```
陈洲立 67
陈东伟 98
李宁 87
杨森 86
刘东奇 78
谭果 94
盖盖 83
陈洲立 68
陈东伟 96
李宁 82
杨森 85
刘东奇 72
谭果 97
盖盖 82
```
执行main函数，结合hadoop日志，在任务页面查看任务执行情况，检验最终生成的结果。

IDEA远程提交hadoop任务

IDEA远程提交hadoop任务

猜你喜欢