Spark API Java编程使用方法
如何使用map
JavaDStream<String> lines = messages.map(s -> s.substring(0, 5))
- Implement the Function interfaces
JavaDStream<String> lines = messages.map(new Function<String, String>() {
public String call(String s) {
return s.substring(0, 5);
}
});
class GetLength implements Function<String, int> {
public Inter call(String s) { return s.length(); }
}
JavaDStream<String> lineLengths = lines.map(new GetLength())
如何使用reduce
int totalLength = lineLengths.reduce((a, b) -> a + b)
- Implement the Function interfaces
int totalLength = lineLengths.reduce(new Function2<Integer, Integer, Integer>() {
public Integer call(Integer a, Integer b) { return a + b; }
});
class Sum implements Function2<Integer, Integer, Integer> {
public Integer call(Integer a, Integer b) { return a + b; }
}
int totalLength = lineLengths.reduce(new Sum());
如何使用reduceByKey
JavaPairDStream<String, Integer> wordCounts = wordCount.reduceByKey((a, b) -> a + b);
- Implement the Function interfaces
JavaPairDStream<String, Integer> wordCounts = wordCount.reduceByKey(
new Function2<Integer, Integer, Integer>() {
@Override
public Integer call(Integer i1, Integer i2) {
return i1 + i2;
}
});
如何使用flatMap
JavaDStream<String> words = lines.flatMap(x -> Lists.newArrayList(x.split(" ")))
- Implement the Function interfaces
JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
public Iterable<String> call(String x) {
return Lists.newArrayList(SPACE.split(x));
}
});
如何使用mapPartitions
JavaDStream<Tuple2<String, String>> mapLines = lines.mapPartitions(parts -> {
List<Tuple2<String, String>> list = new ArrayList<Tuple2<String, String>>();
while(parts.hasNext()){
String msg = parts.next();
String ip = msg.split(" ")[0];
String domain = msg.split(" ")[1];
list.add(new Tuple2<String, String>(ip, domain));
};
return list;
});
- Implement the Function interfaces
JavaDStream<Tuple2<String, String>> mapLines = lines.mapPartitions(
new FlatMapFunction<Iterator<String>, Tuple2<String, String>>() {
List<Tuple2<String, String>> list = new ArrayList<Tuple2<String, String>>();
public Iterable<Tuple2<String, String>> call(Iterator<String> s){
while(s.hasNext()){
String msg = s.next();
String ip = msg.split(" ")[0];
String domain = msg.split(" ")[1];
list.add(new Tuple2<String, String>(ip, domain));
}
return list;
}
}
);
如何使用mapToPair
JavaPairDStream<String, Integer> wordCounts = words.mapToPair(s -> new Tuple2(s, 1));
- Implement the Function interfaces
JavaPairDStream<String, Integer> wordCounts = words.mapToPair(
new PairFunction<String, String, Integer>() {
public Tuple2<String, Integer> call(String s) {
return new Tuple2<String, Integer>(s, 1);
}
});