Advent-ures

Ned Batchelder
@nedbat


http://bit.ly/bp_advent

Blah blah blah.

Advent of Code

Tonight

December 14, 2016

For example

    seed = 'foo'
    md5('foo0') → a1b2c3d4
    md5('foo1') → 9a8b7c6d
    md5('foo2') → ff0ee1dd
    md5('foo3') → a1bbb3d4     # Triple b!
                               md5('foo4') → 987abc65
                               md5('foo5') → a32cac67
                               md5('foo6') → 4e60f111
                               ...
                               md5('foo241') → 3abbbbb0
                               # 3 is a key!
    

Keep looking

    seed = 'foo'
    md5('foo0') → a1b2c3d4
    md5('foo1') → 9a8b7c6d
    md5('foo2') → ff0ee1dd
    md5('foo3') → a1bbb3d4  ✓
    md5('foo4') → 987abc65
    md5('foo5') → a32cac67
    md5('foo6') → 4e60f111     # Triple 1!
                               md5('foo7') → c1bd8e78
                               ...
                               md5('foo1006') → f5e02b34
                               # 6 is not a key....
    

Keep looking

    seed = 'foo'
    md5('foo0') → a1b2c3d4
    md5('foo1') → 9a8b7c6d
    md5('foo2') → ff0ee1dd
    md5('foo3') → a1bbb3d4  ✓
    md5('foo4') → 987abc65
    md5('foo5') → a32cac67
    md5('foo6') → 4e60f111
    md5('foo7') → c1bd8e78
    md5('foo8') → d8a9112f
    ...
    # until we find the 64th key
    

Part 1

Hashing

part1.py
    def salted_hash(salt, i):
        bytes = f"{salt}{i}".encode("ascii")
        return hashlib.md5(bytes).hexdigest()
    
    >>> salted_hash("something", 17)
    '594a263646d6b3c5db804b28f62cbc67'
    

Finding triples

part1.py
    def triple(s):
        # Return a triple character, or None.
        m = re.search(r"(.)\1\1", s)
        if m:
            return m.group(1)
    
    >>> triple("what aaa something")
    'a'
    >>> triple("hello world")
    >>> triple("111 22222 333")
    '1'
    

Finding keys

part1.py
    def is_key(salt, candidate):
        # Is `candidate` a key?
        h = salted_hash(salt, candidate)
        t = triple(h)
        if t is None:
            return False

        # We have a triple. Look ahead for a quint.
        for check_ahead in range(1, 1001):
            h2 = salted_hash(salt, candidate + check_ahead)
            if t*5 in h2:
                return True

        return False
    

The 64th key

part1.py
    def nth_key(salt, n):
        # Find the `n`th key starting with `salt`.
        num_keys = 0
        for candidate in itertools.count():
            if is_key(salt, candidate):
                num_keys += 1
                if num_keys == n:
                    return candidate
    

The answer

part1.py
    INPUT = 'zpqevtbw'  # Yours will be different!
    k64 = nth_key(INPUT, 64)
    print(f"Part 1: the 64th key is at index {k64}")
    
    $ python part1.py
    Part 1: the 64th key is at index 16106
    

Testing

Testing

part1.py
    def test_triple():
        assert triple("helloaaathere") == "a"
    
    $ pytest part1.py
    ================ test session starts ==================
    platform darwin -- Python 3.6.5, pytest-4.0.1, py-1.7.0
    rootdir: /Users/ned/prz/adventures, inifile:
    collected 1 item

    part1.py .                                     [100%]

    ============== 1 passed in 0.01 seconds ===============
    

Many test cases

part1.py
    @pytest.mark.parametrize("s, t", [
        ("hello there all", None),
        ("aaa", "a"),
        ("0123345xxx112315zzz124xx", "x"),
    ])
    def test_triples(s, t):
        assert triple(s) == t
    
    $ pytest part1.py
    ================= test session starts =================
    platform darwin -- Python 3.6.5, pytest-4.0.1, py-1.7.0, pluggy-0.8.0
    rootdir: /Users/ned/prz/adventures, inifile:
    collected 4 items

    part1.py ....                                   [100%]

    ============== 4 passed in 0.01 seconds ===============
    

Test cases!

part1.py
    @pytest.mark.parametrize("salt, num, result", [
        ('abc', 17, False),         # no triple
        ('abc', 18, False),         # triple, no quint
        ('abc', 39, True),          # a key
        ('abc', 92, True),          # a key
        ('abc', 22727, False),      # near a key
        ('abc', 22728, True),       # a key
        ('abc', 22729, False),      # near a key
    ])
    def test_is_key(salt, num, result):
        assert is_key(salt, num) == result
    
part1.py
    def test_nth_key():
        assert nth_key("abc", 64) == 22728
    
    collected 12 items

    part1.py ............                           [100%]
    
    $ pytest part1.py
    ================= test session starts =================
    collected 12 items

    part1.py ..F.........                           [100%]

    ====================== FAILURES =======================
    _________________ test_triples[aaa-b] _________________

    s = 'aaa', t = 'b'

        @pytest.mark.parametrize("s, t", [
            ("hello there all", None),
            ("aaa", "b"),
            ("0123345xxx112315zzz124xx", "x"),
        ])
        def test_triples(s, t):
    >       assert triple(s) == t
    E       AssertionError: assert 'a' == 'b'
    E         - a
    E         + b

    part1.py:28: AssertionError
    ========= 1 failed, 11 passed in 3.80 seconds =========
    

Part 2

Part 2

Are we OK?

    $ time python part1.py
    Part 1: the 64th key is at index 16106

    real    0m2.945s
    

naive_part2.py

naive_part2.py
    def salted_hash_2017(salt, i):
        val = f"{salt}{i}"
        for _ in range(2017):
            val = hashlib.md5(val.encode("ascii")).hexdigest()
        return val
    
    $ time python naive_part2.py
    Part 2: the 64th key is at index 22423

    real	86m33.516s
    

The problem

part1_counted.py
    HASHES = 0

    def salted_hash(salt, i):
        global HASHES
        HASHES += 1
        bytes = f"{salt}{i}".encode("ascii")
        return hashlib.md5(bytes).hexdigest()
    
    $ python part1_counted.py
    Part 1: the 64th key is at index 16106
    Total of 1,661,356 hashes
    

Caching results

A cache dict

gcache.py
    CACHE = {}

    def salted_hash_2017(salt, i):
        val = val0 = f"{salt}{i}"
        if val0 in CACHE:
            return CACHE[val0]
        for _ in range(2017):
            val = hashlib.md5(val.encode("ascii")).hexdigest()
        CACHE[val0] = val
        return val
    
    $ time python gcache.py
    Part 2: the 64th key is at index 22423

    real	0m54.688s
    

Local cache

lcache.py
    def make_cached_hasher(iterations):
        cache = {}

        def hasher(salt, i):
            val = val0 = f"{salt}{i}"
            if val0 in cache:
                return cache[val0]
            for _ in range(iterations):
                val = hashlib.md5(val.encode("ascii")).hexdigest()
            cache[val0] = val
            return val

        return hasher

    salted_hash_2017 = make_cached_hasher(2017)
    

Separating cache from function

cache_decorator.py
    def cached(func):
        cache = {}
        def wrapped(*args):
            if args in cache:
                return cache[args]
            ret = func(*args)
            cache[args] = ret
            return ret
        return wrapped

    @cached
    def salted_hash_2017(salt, i):      # from naive_part2.py
        val = f"{salt}{i}"
        for _ in range(2017):
            val = hashlib.md5(val.encode("ascii")).hexdigest()
        return val
    

Caching

More iteration

Iterables and iterators

    >>> nums = [1, 2, 3]
    >>> for num in nums:
    ...     print(num)

    1
    2
    3
    
    >>> num_iter = iter(nums)
    >>> next(num_iter)
    1
    >>> next(num_iter)
    2
    >>> next(num_iter)
    3
    

Generators

    >>> def countdown(num):
    ...     while num >= 0:
    ...         yield num
    ...         num -= 1

    >>> for n in countdown(3):
    ...     print(n)

    3
    2
    1
    0
    

Iterating hashes directly

peekable.py
    def hashes_2017(salt):
        for i in itertools.count():
            yield salted_hash_2017(salt, i)

    def test_hashes_2017():
        them = hashes_2017("abc")
        assert next(them) == 'a107ff634856bb300138cac6568c0f24'
        assert next(them) == '65490b7e1ceeff8ade55d803c02bf553'
        assert next(them) == 'dff53117a11213b0e5c4cf63fe3c5198'
    

Main iteration plus peeking ahead

    md5('foo0') → a1b2c3d4
    md5('foo1') → 9a8b7c6d
    md5('foo2') → ff0ee1dd
    md5('foo3') → a1bbb3d4     # Triple b!
                               md5('foo4') → 987abc65
                               md5('foo5') → a32cac67
                               md5('foo6') → 4e60f111
                               md5('foo7') → c1bd8e78
                               md5('foo8') → b2b9a2e9
                               ...
    md5('foo4') → 987abc65
    md5('foo5') → a32cac67
    md5('foo6') → 4e60f111     # Triple 1!
                               md5('foo7') → c1bd8e78
                               ...
    

Peekability

peekable.py
    def test_peekable():
        p = PeekableIterable(itertools.count())
        pi = iter(p)

        assert next(pi) == 0
        assert next(pi) == 1
        assert p.peek_ahead(1) == 2
        assert p.peek_ahead(100) == 101

        assert next(pi) == 2
        assert p.peek_ahead(1) == 3
    
peekable.py
    class PeekableIterable:
        """Peek ahead into infinite iterables"""
        def __init__(self, source):
            self.source = iter(source)
            self.lookahead = collections.deque()

        def __iter__(self):
            while True:
                if self.lookahead:
                    yield self.lookahead.popleft()
                else:
                    yield next(self.source)

        def peek_ahead(self, index):
            assert index > 0
            while index > len(self.lookahead):
                self.lookahead.append(next(self.source))
            return self.lookahead[index-1]
    

Iterating keys

peekable.py
    def key_indexes(salt):
        """Produce all key indexes from `salt`."""
        p = PeekableIterable(hashes_2017(salt))
        for index, hash in enumerate(p):
            t = triple(hash)
            if t:
                for i in range(1, 1001):
                    ahead = p.peek_ahead(i)
                    if t*5 in ahead:
                        yield index

    def test_key_indexes():
        ki = key_indexes("abc")
        assert next(ki) == 10
    

Finishing part 2

peekable.py
    def n_keys(salt, n):
        all_keys = key_indexes(salt)
        return list(itertools.islice(all_keys, n))

    if __name__ == "__main__":
        INPUT = 'zpqevtbw'  # Yours will be different!
        first_64 = n_keys(INPUT, 64)
        k64 = first_64[-1]
        print(f"Part 2: the 64th key is at index {k64}")
    

Questions?

@nedbat

http://bit.ly/bp_advent