Spark算子中combineByKey算子的理解(包含mapPartitionsWithIndex算子和parallelizePairs算子)【Java版纯代码】

package com.bjsxt;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.Function2;
import org.apache.spark.api.java.function.VoidFunction;

import scala.Tuple2;



public class combineByKey {
	public static void main(String[] args) {
		SparkConf conf=new SparkConf().setAppName("test").setMaster("local");
		JavaSparkContext sc=new JavaSparkContext(conf);
		List<Tuple2<String, Integer>> list = Arrays.asList(
				new Tuple2<>("zhangsan",10),
				new Tuple2<>("lisi",11),
				new Tuple2<>("zhangsan",12),
				
				new Tuple2<>("zhangsan",13),
				new Tuple2<>("lisi",14),
				new Tuple2<>("wangwu",15),
				new Tuple2<>("lisi",16)
				);
		JavaPairRDD<String, Integer> rdd1 = sc.parallelizePairs(list,2);
	rdd1.mapPartitionsWithIndex(new Function2<Integer, Iterator<Tuple2<String,Integer>>,Iterator<String>>() {

		@Override
		public Iterator<String> call(Integer index, Iterator<Tuple2<String, Integer>> iter) throws Exception {
			List<String> list2 = new ArrayList<String>();
			while(iter.hasNext()) {
				Tuple2<String,Integer> next=iter.next();
				System.out.println("index="+index+",value="+next);
			}
			return list2.iterator();
		}			
		}, false)/*.collect()*/;
	JavaPairRDD<String, Integer> rdd2 = rdd1.combineByKey(new Function<Integer, Integer>() {

		@Override
		public Integer call(Integer i) throws Exception {
			
			return i;
		}
	}, new Function2<Integer, Integer, Integer>() {

		@Override
		public Integer call(Integer v1, Integer v2) throws Exception {
			
			return v1+v2;
		}
	}, new Function2<Integer, Integer, Integer>() {

		@Override
		public Integer call(Integer s1, Integer s2) throws Exception {
			
			return s1+s2;
		}
	});
	rdd2.foreach(new VoidFunction<Tuple2<String,Integer>>() {

		@Override
		public void call(Tuple2<String, Integer> arg0) throws Exception {
			System.out.println(arg0);
		}
	});
	}
}

猜你喜欢

转载自blog.csdn.net/wyqwilliam/article/details/81367412