F_AdvancedStreams.java

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package solutions;

import java.io.BufferedReader;
import java.io.IOException;
import java.math.BigInteger;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.IntSummaryStatistics;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.function.Function;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import java.util.stream.Stream;

import org.junit.After;
import org.junit.Before;
import org.junit.Ignore;
import org.junit.Test;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;

/**
 * This set of exercises covers advanced stream operations,
 * including reduction, grouping collectors, composition
 * of collectors.
 */
public class F_AdvancedStreams {

    /**
     * Compute the value of 21!, that is, 21 factorial. This value is larger than
     * Long.MAX_VALUE, so you must use BigInteger.
     */
    @Test
    public void ex19_bigFactorial() {
        //TODO//BigInteger result = BigInteger.ONE;
        //BEGINREMOVE
        BigInteger result =
            IntStream.rangeClosed(1, 21)
                .mapToObj(n -> BigInteger.valueOf(n))
                .reduce(BigInteger.ONE, (m, n) -> m.multiply(n));
        //ENDREMOVE

        assertEquals(new BigInteger("51090942171709440000"), result);
    }
    // Hint:
    // <editor-fold defaultstate="collapsed">
    // Use LongStream and reduction.
    // </editor-fold>


    /**
     * Get the last word in the text file.
     *
     * @throws IOException
     */
    @Test
    public void ex20_getLastWord() throws IOException {
        //TODO//String result = null;
        //BEGINREMOVE
        String result =
            reader.lines()
                  .flatMap(line -> WORD_PATTERN.splitAsStream(line))
                  .reduce((a, b) -> b)
                  .orElse("");
        //ENDREMOVE

        assertEquals("thee", result);
    }
    // Hint:
    // <editor-fold defaultstate="collapsed">
    // Use Stream.reduce().
    // </editor-fold>


    /**
     * Categorize the words from the text file into a map, where the map's key
     * is the length of each word, and the value corresponding to a key is a
     * list of words of that length. Don't bother with uniqueness or lower-
     * casing the words.
     *
     * @throws IOException
     */
    @Test
    public void ex21_mapLengthToWordList() throws IOException {
        //TODO//Map<Integer, List<String>> result = null;
        //BEGINREMOVE
        Map<Integer, List<String>> result =
            reader.lines()
                  .flatMap(line -> WORD_PATTERN.splitAsStream(line))
                  .collect(Collectors.groupingBy(String::length));
        //ENDREMOVE

        assertEquals(10, result.get(7).size());
        assertEquals(new HashSet<>(Arrays.asList("beauty's", "increase", "ornament")), new HashSet<>(result.get(8)));
        assertEquals(new HashSet<>(Arrays.asList("abundance", "creatures")), new HashSet<>(result.get(9)));
        assertEquals(new HashSet<>(Arrays.asList("contracted", "niggarding")), new HashSet<>(result.get(10)));
        assertEquals(Arrays.asList("substantial"), result.get(11));
        assertFalse(result.containsKey(12));
    }
    // Hint:
    // <editor-fold defaultstate="collapsed">
    // Use Collectors.groupingBy().
    // </editor-fold>


    /**
     * Categorize the words from the text file into a map, where the map's key
     * is the length of each word, and the value corresponding to a key is a
     * count of words of that length. Don't bother with uniqueness or lower-
     * casing the words. This is the same as the previous exercise except
     * the map values are the count of words instead of a list of words.
     *
     * @throws IOException
     */
    @Test
    public void ex22_mapLengthToWordCount() throws IOException {
        //TODO//Map<Integer, Long> result = null;
        //BEGINREMOVE
        Map<Integer, Long> result =
            reader.lines()
                  .flatMap(line -> WORD_PATTERN.splitAsStream(line))
                  .collect(Collectors.groupingBy(String::length, Collectors.counting()));
        //ENDREMOVE

        assertEquals( 1L, (long)result.get(1));
        assertEquals(11L, (long)result.get(2));
        assertEquals(28L, (long)result.get(3));
        assertEquals(21L, (long)result.get(4));
        assertEquals(16L, (long)result.get(5));
        assertEquals(12L, (long)result.get(6));
        assertEquals(10L, (long)result.get(7));
        assertEquals( 3L, (long)result.get(8));
        assertEquals( 2L, (long)result.get(9));
        assertEquals( 2L, (long)result.get(10));
        assertEquals( 1L, (long)result.get(11));

        IntSummaryStatistics stats = result.keySet().stream().mapToInt(i -> i).summaryStatistics();
        assertEquals("min key",  1, stats.getMin());
        assertEquals("max key", 11, stats.getMax());
    }
    // Hint 1:
    // <editor-fold defaultstate="collapsed">
    // Use the "downstream" overload of Collectors.groupingBy().
    // </editor-fold>
    // Hint 2:
    // <editor-fold defaultstate="collapsed">
    // Use Collectors.counting().
    // </editor-fold>


    /**
     * Gather the words from the text file into a map, accumulating a count of
     * the number of occurrences of each word. Don't worry about upper case and
     * lower case. Extra challenge: implement two solutions, one that uses
     * groupingBy() and the other that uses toMap().
     *
     * @throws IOException
     */
    @Test
    public void ex23_wordFrequencies() throws IOException {
        //TODO//Map<String, Long> result = null;
        //BEGINREMOVE
        Map<String, Long> result =
            reader.lines()
                  .flatMap(line -> WORD_PATTERN.splitAsStream(line))
                  .collect(Collectors.groupingBy(Function.identity(), Collectors.counting()));
                      // or use word -> word instead of Function.identity()

            // Alternative solution using toMap():

            // Map<String, Long> map =
            //     reader.lines()
            //           .flatMap(line -> Stream.of(line.split(REGEXP)))
            //           .collect(toMap(Function.identity(),
            //                          w -> 1L,
            //                          Long::sum));
        //ENDREMOVE

        assertEquals(2L, (long)result.get("tender"));
        assertEquals(6L, (long)result.get("the"));
        assertEquals(1L, (long)result.get("churl"));
        assertEquals(2L, (long)result.get("thine"));
        assertEquals(1L, (long)result.get("world"));
        assertEquals(4L, (long)result.get("thy"));
        assertEquals(3L, (long)result.get("self"));
        assertFalse(result.containsKey("lambda"));
    }
    // Hint 1:
    // <editor-fold defaultstate="collapsed">
    // For Collectors.groupingBy(), consider that each word needs to be in
    // a category of its own, that is, each word is categorized as itself.
    // </editor-fold>
    // Hint 2:
    // <editor-fold defaultstate="collapsed">
    // For Collectors.toMap(), the first occurrence of a word should be mapped to 1.
    // If two elements of the Stream are generating the same key, you will need to
    // provide a merging function.
    // </editor-fold>


    /**
     * From the words in the text file, create nested maps, where the outer map is a
     * map from the first letter of the word to an inner map. (Use a string of length
     * one as the key.) The inner map, in turn, is a mapping from the length of the
     * word to a list of words with that length. Don't bother with any lowercasing
     * or uniquifying of the words.
     *
     * For example, given the words "foo bar baz bazz foo" the string
     * representation of the result would be:
     *     {b={3=[bar, baz], 4=[bazz]}, f={3=[foo, foo]}}
     *
     * @throws IOException
     */
    @Test
    public void ex24_nestedMaps() throws IOException {
        //TODO//Map<String, Map<Integer, List<String>>> result = null;
        //BEGINREMOVE
        Map<String, Map<Integer, List<String>>> result =
            reader.lines()
                  .flatMap(line -> WORD_PATTERN.splitAsStream(line))
                  .collect(Collectors.groupingBy(word -> word.substring(0,1),
                                                 Collectors.groupingBy(String::length)));
        //ENDREMOVE

        assertEquals("[abundance]", result.get("a").get(9).toString());
        assertEquals("[by, be, by]", result.get("b").get(2).toString());
        assertEquals("[flame, fresh]", result.get("f").get(5).toString());
        assertEquals("[gaudy, grave]", result.get("g").get(5).toString());
        assertEquals("[should, spring]", result.get("s").get(6).toString());
        assertEquals("[substantial]", result.get("s").get(11).toString());
        assertEquals("[the, thy, thy, thy, too, the, the, thy, the, the, the]",
            result.get("t").get(3).toString());
        assertEquals("[where, waste, world]", result.get("w").get(5).toString());
    }
    // Hint 1:
    // <editor-fold defaultstate="collapsed">
    // The nested map structure that's the desired is the result of applying
    // a "downstream" collector that's the same operation as the first-level collector.
    // </editor-fold>
    // Hint 2:
    // <editor-fold defaultstate="collapsed">
    // Both collection operations are Collectors.groupingBy().
    // </editor-fold>


    /**
     * Given a stream of strings, accumulate (collect) them into the result string
     * by inserting the input string at both the beginning and end. For example, given
     * input strings "x" and "y" the result should be "yxxy". Note: the input stream
     * is a parallel stream, so you MUST write a proper combiner function to get the
     * correct result.
     */
    @Test
    public void ex25_insertBeginningAndEnd() {
        Stream<String> input = Arrays.asList(
            "a", "b", "c", "d", "e", "f", "g", "h", "i", "j",
            "k", "l", "m", "n", "o", "p", "q", "r", "s", "t")
            .parallelStream();

        //UNCOMMENT//String result = input.collect(null, null, null).toString();
        //UNCOMMENT//// TODO fill in lambda expressions or method references
        //UNCOMMENT//// in place of the nulls in the line above.
        //BEGINREMOVE
        String result =
            input.collect(StringBuilder::new,
                          (sb, s) -> sb.insert(0, s).append(s),
                          (sb1, sb2) -> {
                              int half = sb2.length() / 2;
                              sb1.insert(0, sb2.substring(0, half));
                              sb1.append(sb2.substring(half));
                          })
                 .toString();
        //ENDREMOVE

        assertEquals("tsrqponmlkjihgfedcbaabcdefghijklmnopqrst", result);
    }
    // Hint 1:
    // <editor-fold defaultstate="collapsed">
    // The collector state (that is, the object being accumulated and
    // combined) can be a single StringBuilder.
    // </editor-fold>
    // Hint 2:
    // <editor-fold defaultstate="collapsed">
    // The combiner function must take its second argument and merge
    // it into the first argument, mutating the first argument.
    // </editor-fold>
    // Hint 3:
    // <editor-fold defaultstate="collapsed">
    // The second argument to the combiner function happens AFTER the first
    // argument in encounter order, so the second argument needs to be split
    // in half and prepended/appended to the first argument.
    // </editor-fold>

    /**
     * Count the total number of words and the number of distinct, lower case
     * words in a stream, in one pass. This exercise uses a helper class
     * that defines methods that are called by the Stream.collect() method.
     * Your task is to fill in the implementation of the accumulate() and
     * combine() methods in the helper class. You don't need to modify the
     * test method itself.
     *
     * The stream is run in parallel, so you must write a combine() method
     * that works properly.
     */
    static class TotalAndDistinct {
        private int count = 0;
        private final Set<String> set = new HashSet<>();

        // rely on implicit no-arg constructor

        void accumulate(String s) {
            //UNCOMMENT//// TODO write code to accumulate a single string into this object
            //BEGINREMOVE
            count++;
            set.add(s);
            //ENDREMOVE
        }

        void combine(TotalAndDistinct other) {
            //UNCOMMENT//// TODO write code to combine the other object into this one
            //BEGINREMOVE
            count += other.count;
            set.addAll(other.set);
            //ENDREMOVE
        }

        int getTotalCount() { return count; }
        int getDistinctCount() { return set.size(); }
    }

    @Test
    public void ex26_countTotalAndDistinctWords() {
        List<String> allWords = reader.lines()
                                      .map(String::toLowerCase)
                                      .flatMap(line -> WORD_PATTERN.splitAsStream(line))
                                      .collect(Collectors.toList());

        TotalAndDistinct totalAndDistinct =
            Collections.nCopies(100, allWords)
                       .parallelStream()
                       .flatMap(List::stream)
                       .collect(TotalAndDistinct::new,
                                TotalAndDistinct::accumulate,
                                TotalAndDistinct::combine);

        assertEquals("distinct count", 81, totalAndDistinct.getDistinctCount());
        assertEquals("total count", 10700, totalAndDistinct.getTotalCount());
    }

// ========================================================
// END OF EXERCISES
// TEST INFRASTRUCTURE IS BELOW
// ========================================================


    // Pattern for splitting a string into words
    static final Pattern WORD_PATTERN = Pattern.compile("[- .:,]+");

    private BufferedReader reader;

    @Before
    public void z_setUpBufferedReader() throws IOException {
        reader = Files.newBufferedReader(
                Paths.get("SonnetI.txt"), StandardCharsets.UTF_8);
    }

    @After
    public void z_closeBufferedReader() throws IOException {
        reader.close();
    }

}