Hadoop 文件系统命令行基础

1. file system
2.command line
/**
 * License to Sheep Core
 */

package src.com.sheepcore.wordcount.mapreduce;


import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.*;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;


/**
 * Configure the job, and build the runtime environment for mapreduce.
 */
public class MRRunJob {
    public static void main (String ... args) throws IOException {
        Configuration conf = new Configuration();
        //the ip address of namenode

        conf.set("fs.defaultFS", "hdfs://172.28.4.191:9000");
        conf.set("fs.hdfs.impl","org.apache.hadoop.hdfs.DistributedFileSystem");
        //initialize file system
        FileSystem fs = null;
        try {
            fs = FileSystem.get(conf);
        } catch (IOException ex1) {
            ex1.printStackTrace();
        }
        System.out.println(fs.exists(new Path(
                "/data/input")));
        Job job = null;
        try {
            //create a new job to run your mapreduce program
            job = Job.getInstance(conf, "My first mapreduce program!");
        } catch (IOException ex2) {
            ex2.printStackTrace();
        }
        //set main class entry
        job.setJarByClass(MRRunJob.class);
        //set mapper class entry
        job.setMapperClass(WordCountMapper.class);
        //set reducer class entry
        job.setReducerClass(WordCountReducer.class);
        //set the type of output key of mapper
        job.setOutputKeyClass(Text.class);
        //set the type of output value of mapper
        job.setOutputValueClass(IntWritable.class);

        //set input path
        Path inPath =
                new Path("/data/input");

        //set output path, notice that it should not be already exists!
        Path outPath =
                new Path("/data/output");

        try {

            //add input path into job
            FileInputFormat.addInputPath(job, inPath);
            //add output path into job
            if(fs.exists(outPath))
                fs.delete(outPath, true);
            FileOutputFormat.setOutputPath(job, outPath);
            boolean finish = job.waitForCompletion(true);
        } catch (IOException ex3) {
            ex3.printStackTrace();
        } catch (Exception e) {
            e.printStackTrace();
        }

    }

}
Hadoop 文件系统命令行基础

猜你喜欢