|
7 | 7 | import java.util.function.Function; |
8 | 8 | import java.util.stream.Collectors; |
9 | 9 |
|
10 | | -/* |
11 | | -* MapReduce is a programming model for processing and generating large data sets with a parallel, |
12 | | -distributed algorithm on a cluster. |
13 | | -* It has two main steps: the Map step, where the data is divided into smaller chunks and processed in parallel, |
14 | | -and the Reduce step, where the results from the Map step are combined to produce the final output. |
15 | | -* Wikipedia link : https://en.wikipedia.org/wiki/MapReduce |
16 | | -*/ |
17 | | - |
| 10 | +/** |
| 11 | + * MapReduce is a programming model for processing and generating large data sets |
| 12 | + * using a parallel, distributed algorithm on a cluster. |
| 13 | + * It consists of two main phases: |
| 14 | + * - Map: the input data is split into smaller chunks and processed in parallel. |
| 15 | + * - Reduce: the results from the Map phase are aggregated to produce the final output. |
| 16 | + * |
| 17 | + * See also: https://en.wikipedia.org/wiki/MapReduce |
| 18 | + */ |
18 | 19 | public final class MapReduce { |
| 20 | + |
19 | 21 | private MapReduce() { |
20 | 22 | } |
21 | | - /* |
22 | | - *Counting all the words frequency within a sentence. |
23 | | - */ |
24 | | - public static String mapreduce(String sentence) { |
25 | | - List<String> wordList = Arrays.stream(sentence.split(" ")).toList(); |
26 | 23 |
|
27 | | - // Map step |
28 | | - Map<String, Long> wordCounts = wordList.stream().collect(Collectors.groupingBy(Function.identity(), LinkedHashMap::new, Collectors.counting())); |
29 | | - |
30 | | - // Reduce step |
31 | | - StringBuilder result = new StringBuilder(); |
32 | | - wordCounts.forEach((word, count) -> result.append(word).append(": ").append(count).append(",")); |
| 24 | + /** |
| 25 | + * Counts the frequency of each word in a given sentence using a simple MapReduce-style approach. |
| 26 | + * |
| 27 | + * @param sentence the input sentence |
| 28 | + * @return a string representing word frequencies in the format "word: count,word: count,..." |
| 29 | + */ |
| 30 | + public static String countWordFrequencies(String sentence) { |
| 31 | + // Map phase: split the sentence into words |
| 32 | + List<String> words = Arrays.asList(sentence.trim().split("\\s+")); |
33 | 33 |
|
34 | | - // Removing the last ',' if it exists |
35 | | - if (!result.isEmpty()) { |
36 | | - result.setLength(result.length() - 1); |
37 | | - } |
| 34 | + // Group and count occurrences of each word, maintain insertion order |
| 35 | + Map<String, Long> wordCounts = words.stream().collect(Collectors.groupingBy(Function.identity(), LinkedHashMap::new, Collectors.counting())); |
38 | 36 |
|
39 | | - return result.toString(); |
| 37 | + // Reduce phase: format the result |
| 38 | + return wordCounts.entrySet().stream().map(entry -> entry.getKey() + ": " + entry.getValue()).collect(Collectors.joining(",")); |
40 | 39 | } |
41 | 40 | } |
0 commit comments