Hadoop 中的Hello world 代码如下:
1 package com.wordcount; 2 3 import org.apache.hadoop.conf.Configuration; 4 import org.apache.hadoop.conf.Configured; 5 import org.apache.hadoop.fs.FileSystem; 6 import org.apache.hadoop.fs.Path; 7 import org.apache.hadoop.io.IntWritable; 8 import org.apache.hadoop.io.LongWritable; 9 import org.apache.hadoop.io.Text; 10 import org.apache.hadoop.mapreduce.Job; 11 import org.apache.hadoop.mapreduce.Mapper; 12 import org.apache.hadoop.mapreduce.Reducer; 13 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 14 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 15 import org.apache.hadoop.util.Tool; 16 import org.apache.hadoop.util.ToolRunner; 17 18 import java.io.IOException; 19 20 /** 21 * @program: hadoop_demo 22 * @description: 23 * @author: Mr.Walloce 24 * @create: 2018/11/03 15:04 25 **/ 26 public class WordCount extends Configured implements Tool { 27 28 /** 29 * <LongWritable, Text, Text, IntWritable> 输入和输出的key-value类型 30 */ 31 static class MyMap extends Mapper<LongWritable, Text, Text, IntWritable> { 32 //结果输出的字符串 33 Text out_key = new Text(); 34 35 //结果输出的默认值 36 IntWritable out_value = new IntWritable(1); 37 38 /** 39 * @param key 输入的字符串的偏移量 40 * @param value 输入的字符串 41 * @param context 42 * @throws IOException 43 * @throws InterruptedException 44 */ 45 @Override 46 protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { 47 System.out.println("map阶段开始执行,,,"); 48 String line = value.toString(); 49 long index = key.get(); 50 //对字符串进行处理,获取到单词 51 String[] words = line.split(" "); 52 if (words.length > 0) { 53 for (String word : words) { 54 out_key.set(word); 55 context.write(out_key, out_value); 56 } 57 } 58 System.out.println("map阶段结束。。。"); 59 } 60 } 61 62 /** 63 * <Text, IntWritable, Text, IntWritable>输入和输出的key-value类型 64 */ 65 static class MyReduce extends Reducer<Text, IntWritable, Text, IntWritable> { 66 private IntWritable result = new IntWritable(); 67 68 @Override 69 protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { 70 System.out.println("Reduce阶段开始执行..."); 71 int sum = 0; 72 for (IntWritable value : values) { 73 sum += value.get(); 74 } 75 result.set(sum); 76 System.out.println("单词" + key.toString() + ": " + result.get()); 77 context.write(key, result); 78 System.out.println("Reduce阶段结束。。。"); 79 } 80 } 81 82 static class MyCombiner extends Reducer<Text, IntWritable, Text, IntWritable> { 83 private IntWritable result = new IntWritable(); 84 85 @Override 86 protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { 87 System.out.println("Combiner阶段开始..."); 88 int sum = 0; 89 for (IntWritable value : values) { 90 sum += value.get(); 91 } 92 result.set(sum); 93 context.write(key, result); 94 System.out.println("Combiner阶段结束。。。"); 95 } 96 } 97 98 public int run(String[] args) throws Exception { 99 100 //Hadoop的八股文 101 Configuration conf = this.getConf(); 102 Job job = Job.getInstance(conf, this.getClass().getSimpleName()); 103 104 //************************对job进行具体的设置************************* 105 //在集群中运行时不写会报错,本地运行科不写(最好写上) 106 job.setJarByClass(WordCount.class); 107 108 //设置输入输出路径 109 Path in_path = new Path(args[0]); 110 FileInputFormat.addInputPath(job, in_path); 111 Path out_path = new Path(args[1]); 112 FileOutputFormat.setOutputPath(job, out_path); 113 114 //输出前判断输出路径是否存在,存在则删除(输出路径不能重复) 115 FileSystem fs = out_path.getFileSystem(conf); 116 if (fs.exists(out_path)) { 117 fs.delete(out_path, true); 118 } 119 120 //运行map类相关的参数设置 121 job.setMapperClass(MyMap.class); 122 job.setMapOutputKeyClass(Text.class); 123 job.setMapOutputValueClass(IntWritable.class); 124 125 //运行Shuffle相关的参数设置 126 job.setCombinerClass(MyCombiner.class); 127 128 //设置reduce类相关的参数设置 129 job.setReducerClass(MyReduce.class); 130 job.setOutputKeyClass(Text.class); 131 job.setOutputValueClass(IntWritable.class); 132 133 //运行是否成功 134 boolean isSuccess = job.waitForCompletion(true); 135 136 //运行成功返回0,反之返回1 137 return isSuccess ? 0 : 1; 138 } 139 140 public static void main(String args[]) { 141 Configuration conf = new Configuration(); 142 143 args = new String[]{ 144 "hdfs://walloce.one:8020/walloce/data/test.txt", 145 "hdfs://walloce.one:8020/walloce/output/"+ Math.random() 146 }; 147 148 try { 149 ToolRunner.run(conf, new WordCount(), args); 150 } catch (Exception e) { 151 e.printStackTrace(); 152 } 153 } 154 }
运行异常代码:
18/11/22 15:06:00 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable 18/11/22 15:06:02 INFO client.RMProxy: Connecting to ResourceManager at walloce.one/192.168.206.143:8032 18/11/22 15:06:03 WARN mapreduce.JobSubmitter: No job jar file set. User classes may not be found. See Job or Job#setJar(String). 18/11/22 15:06:03 INFO input.FileInputFormat: Total input paths to process : 1 18/11/22 15:06:03 INFO mapreduce.JobSubmitter: number of splits:1 18/11/22 15:06:04 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1542897380554_0004 18/11/22 15:06:04 INFO mapred.YARNRunner: Job jar is not present. Not adding any jar to the list of resources. 18/11/22 15:06:04 INFO impl.YarnClientImpl: Submitted application application_1542897380554_0004 18/11/22 15:06:04 INFO mapreduce.Job: The url to track the job: http://walloce.one:8088/proxy/application_1542897380554_0004/ 18/11/22 15:06:04 INFO mapreduce.Job: Running job: job_1542897380554_0004 18/11/22 15:06:08 INFO mapreduce.Job: Job job_1542897380554_0004 running in uber mode : false 18/11/22 15:06:08 INFO mapreduce.Job: map 0% reduce 0% 18/11/22 15:06:08 INFO mapreduce.Job: Job job_1542897380554_0004 failed with state FAILED due to: Application application_1542897380554_0004 failed 2 times due to AM Container for appattempt_1542897380554_0004_000002 exited with exitCode: 1 due to: Exception from container-launch: ExitCodeException exitCode=1: /bin/bash: line 0: fg: no job control ExitCodeException exitCode=1: /bin/bash: line 0: fg: no job control at org.apache.hadoop.util.Shell.runCommand(Shell.java:538) at org.apache.hadoop.util.Shell.run(Shell.java:455) at org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:702) at org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor.launchContainer(DefaultContainerExecutor.java:195) at org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:300) at org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:81) at java.util.concurrent.FutureTask.run(FutureTask.java:262) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:745) Container exited with a non-zero exit code 1 .Failing this attempt.. Failing the application.
检查配置文件yarn-site.xml:
1 <!-- 指定计算模型在yarn上 --> 2 <property> 3 <name>mapreduce.framework.name</name> 4 <value>yarn</value> 5 </property>
由于是在本地运行,所以计算模型要指定在本地:
1 <!-- 指定计算模型在yarn上 --> 2 <property> 3 <name>mapreduce.framework.name</name> 4 <value>local</value> 5 </property>
文件修改后,可以运行成功。。
记: 心酸的学习历程!