IDEA远程提交hadoop任务

IDEA远程提交hadoop任务

  • 新建maven项目,添加如下依赖
<dependencies>
     <dependency>
         <groupId>org.apache.hadoop</groupId>
         <artifactId>hadoop-common</artifactId>
         <version>2.7.1</version>
     </dependency>

     <dependency>
         <groupId>org.apache.hadoop</groupId>
         <artifactId>hadoop-mapreduce-client-core</artifactId>
         <version>2.7.1</version>
     </dependency>

     <dependency>
         <groupId>org.apache.hadoop</groupId>
         <artifactId>hadoop-hdfs</artifactId>
         <version>2.7.1</version>
     </dependency>

     <dependency>
         <groupId>org.apache.hadoop</groupId>
         <artifactId>hadoop-mapreduce-client-jobclient</artifactId>
         <version>2.7.1</version>
     </dependency>
 </dependencies>
  • 编写Map处理
public static class Map extends Mapper<LongWritable, Text, Text, IntWritable> {
      @Override
      protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
          String line = value.toString();
          System.out.println("行值:" + line);
          StringTokenizer tokenizer = new StringTokenizer(line, "\n");
          while (tokenizer.hasMoreTokens()) {
              StringTokenizer tokenizerLine = new StringTokenizer(tokenizer.nextToken());
              String strName = tokenizerLine.nextToken();
              String strScore = tokenizerLine.nextToken();
              Text name = new Text(strName);
              int score = Integer.parseInt(strScore);
              context.write(name, new IntWritable(score));
          }
      }
  }
  • 编写Reduce处理
public static class Reduce extends Reducer<Text, IntWritable, Text, IntWritable> {
      @Override
      protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
          int sum = 0;
          int count = 0;
          Iterator<IntWritable> iterator = values.iterator();
          while (iterator.hasNext()) {
              sum += iterator.next().get();
              count++;
          }
          int average = sum / count;
          context.write(key, new IntWritable(average));
      }
  }
  • main函数
System.setProperty("HADOOP_USER_NAME", "wujinlei");
Configuration conf = new Configuration();
conf.set("fs.defaultFS", "hdfs://master:9000");
conf.set("mapreduce.app-submission.cross-platform", "true");
conf.set("mapred.jar", "E:\\JackManWu\\hadoo-ptest\\target\\hadoop-test-1.0-SNAPSHOT.jar");
conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
Job job = Job.getInstance(conf, "student_score");
job.setJarByClass(StudentScore.class);//要执行的jar中的类

job.setMapperClass(Map.class);
job.setCombinerClass(Reduce.class);
job.setReducerClass(Reduce.class);

job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);

FileInputFormat.addInputPath(job, new Path("hdfs://master:9000/home/wujinlei/work/student/input"));
FileOutputFormat.setOutputPath(job, new Path("hdfs://master:9000/home/wujinlei/work/student/output"));
System.exit(job.waitForCompletion(true) ? 0 : 1);
  • 准备好home/wujinlei/work/student/input输入文件,参照第一个hadoop程序-WordCount中的创建输入文件部分,在集群上预先准备好输入文件(ps:home/wujinlei/work/student/output不用准备,系统自动生成输出)
    • 样例输入文件:
    陈洲立 67
    陈东伟 98
    李宁 87
    杨森 86
    刘东奇 78
    谭果 94
    盖盖 83
    陈洲立 68
    陈东伟 96
    李宁 82
    杨森 85
    刘东奇 72
    谭果 97
    盖盖 82
    
  • 执行main函数,结合hadoop日志,在任务页面查看任务执行情况,检验最终生成的结果。

猜你喜欢

转载自my.oschina.net/u/3163032/blog/1622460