|
6 | 6 | import java.util.Arrays;
|
7 | 7 | import java.util.Collection;
|
8 | 8 | import java.util.HashMap;
|
9 |
| -import java.util.Iterator; |
10 | 9 | import java.util.List;
|
11 | 10 | import java.util.Map;
|
12 | 11 |
|
|
15 | 14 | import org.apache.log4j.Level;
|
16 | 15 | import org.apache.log4j.Logger;
|
17 | 16 | import org.apache.spark.SparkConf;
|
18 |
| -import org.apache.spark.api.java.JavaPairRDD; |
19 | 17 | import org.apache.spark.api.java.JavaRDD;
|
20 | 18 | import org.apache.spark.api.java.JavaSparkContext;
|
21 | 19 | import org.apache.spark.api.java.Optional;
|
@@ -43,7 +41,6 @@ public class WordCountingAppWithCheckpoint {
|
43 | 41 |
|
44 | 42 | public static JavaSparkContext sparkContext;
|
45 | 43 |
|
46 |
| - @SuppressWarnings("serial") |
47 | 44 | public static void main(String[] args) throws InterruptedException {
|
48 | 45 |
|
49 | 46 | Logger.getLogger("org")
|
@@ -74,63 +71,30 @@ public static void main(String[] args) throws InterruptedException {
|
74 | 71 |
|
75 | 72 | JavaInputDStream<ConsumerRecord<String, String>> messages = KafkaUtils.createDirectStream(streamingContext, LocationStrategies.PreferConsistent(), ConsumerStrategies.<String, String> Subscribe(topics, kafkaParams));
|
76 | 73 |
|
77 |
| - JavaPairDStream<String, String> results = messages.mapToPair(new PairFunction<ConsumerRecord<String, String>, String, String>() { |
78 |
| - @Override |
79 |
| - public Tuple2<String, String> call(ConsumerRecord<String, String> record) { |
80 |
| - return new Tuple2<>(record.key(), record.value()); |
81 |
| - } |
82 |
| - }); |
| 74 | + JavaPairDStream<String, String> results = messages.mapToPair((PairFunction<ConsumerRecord<String, String>, String, String>) record -> new Tuple2<>(record.key(), record.value())); |
83 | 75 |
|
84 |
| - JavaDStream<String> lines = results.map(new Function<Tuple2<String, String>, String>() { |
85 |
| - @Override |
86 |
| - public String call(Tuple2<String, String> tuple2) { |
87 |
| - return tuple2._2(); |
88 |
| - } |
89 |
| - }); |
| 76 | + JavaDStream<String> lines = results.map((Function<Tuple2<String, String>, String>) tuple2 -> tuple2._2()); |
90 | 77 |
|
91 |
| - JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() { |
92 |
| - @Override |
93 |
| - public Iterator<String> call(String x) { |
94 |
| - return Arrays.asList(x.split("\\s+")) |
95 |
| - .iterator(); |
96 |
| - } |
97 |
| - }); |
| 78 | + JavaDStream<String> words = lines.flatMap((FlatMapFunction<String, String>) x -> Arrays.asList(x.split("\\s+")) |
| 79 | + .iterator()); |
98 | 80 |
|
99 |
| - JavaPairDStream<String, Integer> wordCounts = words.mapToPair(new PairFunction<String, String, Integer>() { |
100 |
| - @Override |
101 |
| - public Tuple2<String, Integer> call(String s) { |
102 |
| - return new Tuple2<>(s, 1); |
103 |
| - } |
104 |
| - }) |
105 |
| - .reduceByKey(new Function2<Integer, Integer, Integer>() { |
106 |
| - @Override |
107 |
| - public Integer call(Integer i1, Integer i2) { |
108 |
| - return i1 + i2; |
109 |
| - } |
110 |
| - }); |
111 |
| - |
112 |
| - Function3<String, Optional<Integer>, State<Integer>, Tuple2<String, Integer>> mappingFunc = (word, one, state) -> { |
| 81 | + JavaPairDStream<String, Integer> wordCounts = words.mapToPair((PairFunction<String, String, Integer>) s -> new Tuple2<>(s, 1)) |
| 82 | + .reduceByKey((Function2<Integer, Integer, Integer>) (i1, i2) -> i1 + i2); |
| 83 | + |
| 84 | + JavaMapWithStateDStream<String, Integer, Integer, Tuple2<String, Integer>> cumulativeWordCounts = wordCounts.mapWithState(StateSpec.function((Function3<String, Optional<Integer>, State<Integer>, Tuple2<String, Integer>>) (word, one, state) -> { |
113 | 85 | int sum = one.orElse(0) + (state.exists() ? state.get() : 0);
|
114 | 86 | Tuple2<String, Integer> output = new Tuple2<>(word, sum);
|
115 | 87 | state.update(sum);
|
116 | 88 | return output;
|
117 |
| - }; |
118 |
| - |
119 |
| - JavaPairRDD<String, Integer> initialRDD = JavaPairRDD.fromJavaRDD(sparkContext.emptyRDD()); |
120 |
| - |
121 |
| - JavaMapWithStateDStream<String, Integer, Integer, Tuple2<String, Integer>> cumulativeWordCounts = wordCounts.mapWithState(StateSpec.function(mappingFunc) |
122 |
| - .initialState(initialRDD)); |
123 |
| - |
124 |
| - cumulativeWordCounts.foreachRDD(new VoidFunction<JavaRDD<Tuple2<String, Integer>>>() { |
125 |
| - @Override |
126 |
| - public void call(JavaRDD<Tuple2<String, Integer>> javaRdd) throws Exception { |
127 |
| - List<Tuple2<String, Integer>> wordCountList = javaRdd.collect(); |
128 |
| - for (Tuple2<String, Integer> tuple : wordCountList) { |
129 |
| - List<Word> words = Arrays.asList(new Word(tuple._1, tuple._2)); |
130 |
| - JavaRDD<Word> rdd = sparkContext.parallelize(words); |
131 |
| - javaFunctions(rdd).writerBuilder("vocabulary", "words", mapToRow(Word.class)) |
132 |
| - .saveToCassandra(); |
133 |
| - } |
| 89 | + })); |
| 90 | + |
| 91 | + cumulativeWordCounts.foreachRDD((VoidFunction<JavaRDD<Tuple2<String, Integer>>>) javaRdd -> { |
| 92 | + List<Tuple2<String, Integer>> wordCountList = javaRdd.collect(); |
| 93 | + for (Tuple2<String, Integer> tuple : wordCountList) { |
| 94 | + List<Word> wordList = Arrays.asList(new Word(tuple._1, tuple._2)); |
| 95 | + JavaRDD<Word> rdd = sparkContext.parallelize(wordList); |
| 96 | + javaFunctions(rdd).writerBuilder("vocabulary", "words", mapToRow(Word.class)) |
| 97 | + .saveToCassandra(); |
134 | 98 | }
|
135 | 99 | });
|
136 | 100 |
|
|
0 commit comments