parallelize(在parallelize中,filter和map的执行时一个一个地执行的,见代码执行流程就可以看出)【Java纯代码】

package com.bjsxt;


import java.util.Arrays;
import java.util.List;

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;

public class PipeLineTest {
    public static void main(String[] args) {
        SparkConf conf=new SparkConf().setAppName("test").setMaster("local");
        JavaSparkContext sc=new JavaSparkContext(conf);
        List<String> aslist = Arrays.asList("zhangsan","lisi","wangwu");
        JavaRDD<String> parallelize = sc.parallelize(aslist);
        JavaRDD<String> filter = parallelize.filter(new Function<String, Boolean>() {
            
            @Override
            public Boolean call(String one) throws Exception {
                // TODO Auto-generated method stub
                System.out.println("filter----------"+one);
                return true;
            }
        });
        JavaRDD<String> map = filter.map(new Function<String, String>() {

            @Override
            public String call(String s) throws Exception {
                System.out.println("map*******************"+s);
                return s+"~";
            }
        });
        map.collect();            
    }
}
 

猜你喜欢

转载自blog.csdn.net/wyqwilliam/article/details/81154598