package com.bjsxt;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.Function2;
import org.apache.spark.api.java.function.VoidFunction;
import scala.Tuple2;
public class aggregateByKey {
public static void main(String[] args) {
SparkConf conf=new SparkConf().setAppName("test").setMaster("local");
JavaSparkContext sc=new JavaSparkContext(conf);
List<Tuple2<String, Integer>> list = Arrays.asList(
new Tuple2<>("zhangsan",10),
new Tuple2<>("lisi",11),
new Tuple2<>("zhangsan",12),
new Tuple2<>("zhangsan",13),
new Tuple2<>("lisi",14),
new Tuple2<>("wangwu",15),
new Tuple2<>("lisi",16)
);
JavaPairRDD<String, Integer> rdd1 = sc.parallelizePairs(list,2);
rdd1.mapPartitionsWithIndex(new Function2<Integer, Iterator<Tuple2<String,Integer>>,Iterator<String>>() {
@Override
public Iterator<String> call(Integer index, Iterator<Tuple2<String, Integer>> iter) throws Exception {
List<String> list2 = new ArrayList<String>();
while(iter.hasNext()) {
Tuple2<String,Integer> next=iter.next();
System.out.println("index="+index+",value="+next);
}
return list2.iterator();
}
}, false)/*.collect()*/;
JavaPairRDD<String, String> rdd2 = rdd1.aggregateByKey("hello", new Function2<String, Integer, String>() {
@Override
public String call(String v1, Integer v2) throws Exception {
return v1+"~"+v2;
}
}, new Function2<String, String, String>() {
/**
* 差UN局in来的是String类型
*/
@Override
public String call(String s1, String s2) throws Exception {
return s1+"=>"+s2;
}
});
rdd2.foreach(new VoidFunction<Tuple2<String,String>>() {
@Override
public void call(Tuple2<String, String> arg0) throws Exception {
System.out.println(arg0);
}
});
}
}
Spark算子中aggregateByKey算子的理解【Java版纯代码】
猜你喜欢
转载自blog.csdn.net/wyqwilliam/article/details/81355340
今日推荐
周排行