6.824——实验一Part I: Map/Reduce input and output

这一部分实验主要就是完成map原始文件(input)到中间文件(intermediateFile),然后reduce中间文件到输出文件(output)的功能,相当于是map函数和reduce函数的具体调用,分别生成中间文件和结果文件的阶段。

1.doMap

func doMap(
	jobName string, // the name of the MapReduce job
	mapTask int, // which map task this is
	inFile string,
	nReduce int, // the number of reduce task that will be run ("R" in the paper)
	mapF func(filename string, contents string) []KeyValue,
)

这个函数的主要功能是将某一个分块文件,通过用户定义的Map函数(mapF)生成键值对序列,并按Key mod Nreduce 来Partition键值对到不同的分组中(1…Nreduce)。
下面是完整的函数内容

	contents := ""
	f, _ := os.Open(inFile)
	inputScanner := bufio.NewScanner(f)
	//similar while boolean
	for inputScanner.Scan() {
		contents += inputScanner.Text() + " "
	}

	kvs := mapF(jobName, contents)
	file2kv := make(map[string][]KeyValue)
	for _, kv := range kvs {
		//partition KeyValues into diffrent file by key mod nReduce
		//then ReduceTask r only dispose the r'th intermediate file's KeyValues
		r := ihash(kv.Key) % nReduce
		fileName := reduceName(jobName, mapTask, r)
		file2kv[fileName] = append(file2kv[fileName], kv)
	}
	for file, kvs := range file2kv {
		f, _ := os.Create(file)
		enc := json.NewEncoder(f)
		for _, kv := range kvs {
			enc.Encode(&kv)
		}
		f.Close()
	}

2.doReduce

func doReduce(
	jobName string, // the name of the whole MapReduce job
	reduceTask int, // which reduce task this is
	outFile string, // write the output here
	nMap int, // the number of map tasks that were run ("M" in the paper)
	reduceF func(key string, values []string) string,
)

这个函数的主要功能就是收集所有map任务的第reduceTask 个中间文件(merge),然后调用用户定义的reduceF函数进行Reduce,最后sort并写入到output文件。
下面是完整的函数内容:

	//merge:merge KeyValue pair of the r'th intermediate file for all map task into key2v
	key2v := make(map[string][]string)
	for i := 0; i < nMap; i++ {
		//supposing reduceTask equals r,reduce task r collects the r'th intermediate file from each map task,
		//fileName is the r'th intermediate file's name
		fileName := reduceName(jobName, i, reduceTask)
		f, _ := os.Open(fileName)
		kvs := []KeyValue{}

		dec := json.NewDecoder(f)
		for {
			kv := KeyValue{}
			err := dec.Decode(&kv)
			if err != nil {
				fmt.Println("Decoder failed", err.Error())
				break
			}
			kvs = append(kvs, kv)
		}
		f.Close()
		for _, kv := range kvs {
			key2v[kv.Key] = append(key2v[kv.Key], kv.Value)
		}
	}
	//sort key2v by key
	keys := []string{}
	for k := range key2v {
		keys = append(keys, k)
	}
	sort.Strings(keys)
	//write to outPutfile
	file, _ := os.Create(outFile)
	enc := json.NewEncoder(file)
	for _, k := range keys {
		enc.Encode(KeyValue{k, reduceF(k, key2v[k])})
	}
	file.Close()
发布了69 篇原创文章 · 获赞 10 · 访问量 1万+

猜你喜欢

转载自blog.csdn.net/JustKian/article/details/100915663