1/43

Advent-ures

Ned Batchelder
@nedbat


http://bit.ly/bp_advent

Blah blah blah.

Advent of Code

Tonight

December 14, 2016

For example

seed = 'foo'
md5('foo0') → a1b2c3d4
md5('foo1') → 9a8b7c6d
md5('foo2') → ff0ee1dd
md5('foo3') → a1bbb3d4     # Triple b!
                           md5('foo4') → 987abc65
                           md5('foo5') → a32cac67
                           md5('foo6') → 4e60f111
                           ...
                           md5('foo241') → 3abbbbb0
                           # 3 is a key!

Keep looking

seed = 'foo'
md5('foo0') → a1b2c3d4
md5('foo1') → 9a8b7c6d
md5('foo2') → ff0ee1dd
md5('foo3') → a1bbb3d4  ✓
md5('foo4') → 987abc65
md5('foo5') → a32cac67
md5('foo6') → 4e60f111     # Triple 1!
                           md5('foo7') → c1bd8e78
                           ...
                           md5('foo1006') → f5e02b34
                           # 6 is not a key....

Keep looking

seed = 'foo'
md5('foo0') → a1b2c3d4
md5('foo1') → 9a8b7c6d
md5('foo2') → ff0ee1dd
md5('foo3') → a1bbb3d4  ✓
md5('foo4') → 987abc65
md5('foo5') → a32cac67
md5('foo6') → 4e60f111
md5('foo7') → c1bd8e78
md5('foo8') → d8a9112f
...
# until we find the 64th key

Part 1

Hashing

part1.py
def salted_hash(salt, i):
    bytes = f"{salt}{i}".encode("ascii")
    return hashlib.md5(bytes).hexdigest()
>>> salted_hash("something", 17)
'594a263646d6b3c5db804b28f62cbc67'

Finding triples

part1.py
def triple(s):
    # Return a triple character, or None.
    m = re.search(r"(.)\1\1", s)
    if m:
        return m.group(1)
>>> triple("what aaa something")
'a'
>>> triple("hello world")
>>> triple("111 22222 333")
'1'

Finding keys

part1.py
def is_key(salt, candidate):
    # Is `candidate` a key?
    h = salted_hash(salt, candidate)
    t = triple(h)
    if t is None:
        return False
 
    # We have a triple. Look ahead for a quint.
    for check_ahead in range(1, 1001):
        h2 = salted_hash(salt, candidate + check_ahead)
        if t*5 in h2:
            return True
 
    return False

The 64th key

part1.py
def nth_key(salt, n):
    # Find the `n`th key starting with `salt`.
    num_keys = 0
    for candidate in itertools.count():
        if is_key(salt, candidate):
            num_keys += 1
            if num_keys == n:
                return candidate

The answer

part1.py
INPUT = 'zpqevtbw'  # Yours will be different!
k64 = nth_key(INPUT, 64)
print(f"Part 1: the 64th key is at index {k64}")
$ python part1.py
Part 1: the 64th key is at index 16106

Testing

Testing

part1.py
def test_triple():
    assert triple("helloaaathere") == "a"
$ pytest part1.py
================ test session starts ==================
platform darwin -- Python 3.6.5, pytest-4.0.1, py-1.7.0
rootdir: /Users/ned/prz/adventures, inifile:
collected 1 item
 
part1.py .                                     [100%]
 
============== 1 passed in 0.01 seconds ===============

Many test cases

part1.py
@pytest.mark.parametrize("s, t", [
    ("hello there all", None),
    ("aaa", "a"),
    ("0123345xxx112315zzz124xx", "x"),
])
def test_triples(s, t):
    assert triple(s) == t
$ pytest part1.py
================= test session starts =================
platform darwin -- Python 3.6.5, pytest-4.0.1, py-1.7.0, pluggy-0.8.0
rootdir: /Users/ned/prz/adventures, inifile:
collected 4 items
 
part1.py ....                                   [100%]
 
============== 4 passed in 0.01 seconds ===============

Test cases!

part1.py
@pytest.mark.parametrize("salt, num, result", [
    ('abc', 17, False),         # no triple
    ('abc', 18, False),         # triple, no quint
    ('abc', 39, True),          # a key
    ('abc', 92, True),          # a key
    ('abc', 22727, False),      # near a key
    ('abc', 22728, True),       # a key
    ('abc', 22729, False),      # near a key
])
def test_is_key(salt, num, result):
    assert is_key(salt, num) == result
part1.py
def test_nth_key():
    assert nth_key("abc", 64) == 22728
collected 12 items
 
part1.py ............                           [100%]
$ pytest part1.py
================= test session starts =================
collected 12 items
 
part1.py ..F.........                           [100%]
 
====================== FAILURES =======================
_________________ test_triples[aaa-b] _________________
 
s = 'aaa', t = 'b'
 
    @pytest.mark.parametrize("s, t", [
        ("hello there all", None),
        ("aaa", "b"),
        ("0123345xxx112315zzz124xx", "x"),
    ])
    def test_triples(s, t):
>       assert triple(s) == t
E       AssertionError: assert 'a' == 'b'
E         - a
E         + b
 
part1.py:28: AssertionError
========= 1 failed, 11 passed in 3.80 seconds =========

Part 2

Part 2

Are we OK?

$ time python part1.py
Part 1: the 64th key is at index 16106
 
real    0m2.945s

naive_part2.py

naive_part2.py
def salted_hash_2017(salt, i):
    val = f"{salt}{i}"
    for _ in range(2017):
        val = hashlib.md5(val.encode("ascii")).hexdigest()
    return val
$ time python naive_part2.py
Part 2: the 64th key is at index 22423
 
real	86m33.516s

The problem

part1_counted.py
HASHES = 0
 
def salted_hash(salt, i):
    global HASHES
    HASHES += 1
    bytes = f"{salt}{i}".encode("ascii")
    return hashlib.md5(bytes).hexdigest()
$ python part1_counted.py
Part 1: the 64th key is at index 16106
Total of 1,661,356 hashes

Caching results

A cache dict

gcache.py
CACHE = {}
 
def salted_hash_2017(salt, i):
    val = val0 = f"{salt}{i}"
    if val0 in CACHE:
        return CACHE[val0]
    for _ in range(2017):
        val = hashlib.md5(val.encode("ascii")).hexdigest()
    CACHE[val0] = val
    return val
$ time python gcache.py
Part 2: the 64th key is at index 22423
 
real	0m54.688s

Local cache

lcache.py
def make_cached_hasher(iterations):
    cache = {}
 
    def hasher(salt, i):
        val = val0 = f"{salt}{i}"
        if val0 in cache:
            return cache[val0]
        for _ in range(iterations):
            val = hashlib.md5(val.encode("ascii")).hexdigest()
        cache[val0] = val
        return val
 
    return hasher
 
salted_hash_2017 = make_cached_hasher(2017)

Separating cache from function

cache_decorator.py
def cached(func):
    cache = {}
    def wrapped(*args):
        if args in cache:
            return cache[args]
        ret = func(*args)
        cache[args] = ret
        return ret
    return wrapped
 
@cached
def salted_hash_2017(salt, i):      # from naive_part2.py
    val = f"{salt}{i}"
    for _ in range(2017):
        val = hashlib.md5(val.encode("ascii")).hexdigest()
    return val

Caching

More iteration

Iterables and iterators

>>> nums = [1, 2, 3]
>>> for num in nums:
...     print(num)
 
1
2
3
>>> num_iter = iter(nums)
>>> next(num_iter)
1
>>> next(num_iter)
2
>>> next(num_iter)
3

Generators

>>> def countdown(num):
...     while num >= 0:
...         yield num
...         num -= 1
 
>>> for n in countdown(3):
...     print(n)
 
3
2
1
0

Iterating hashes directly

peekable.py
def hashes_2017(salt):
    for i in itertools.count():
        yield salted_hash_2017(salt, i)
 
def test_hashes_2017():
    them = hashes_2017("abc")
    assert next(them) == 'a107ff634856bb300138cac6568c0f24'
    assert next(them) == '65490b7e1ceeff8ade55d803c02bf553'
    assert next(them) == 'dff53117a11213b0e5c4cf63fe3c5198'

Main iteration plus peeking ahead

md5('foo0') → a1b2c3d4
md5('foo1') → 9a8b7c6d
md5('foo2') → ff0ee1dd
md5('foo3') → a1bbb3d4     # Triple b!
                           md5('foo4') → 987abc65
                           md5('foo5') → a32cac67
                           md5('foo6') → 4e60f111
                           md5('foo7') → c1bd8e78
                           md5('foo8') → b2b9a2e9
                           ...
md5('foo4') → 987abc65
md5('foo5') → a32cac67
md5('foo6') → 4e60f111     # Triple 1!
                           md5('foo7') → c1bd8e78
                           ...

Peekability

peekable.py
def test_peekable():
    p = PeekableIterable(itertools.count())
    pi = iter(p)
 
    assert next(pi) == 0
    assert next(pi) == 1
    assert p.peek_ahead(1) == 2
    assert p.peek_ahead(100) == 101
 
    assert next(pi) == 2
    assert p.peek_ahead(1) == 3
peekable.py
class PeekableIterable:
    """Peek ahead into infinite iterables"""
    def __init__(self, source):
        self.source = iter(source)
        self.lookahead = collections.deque()
 
    def __iter__(self):
        while True:
            if self.lookahead:
                yield self.lookahead.popleft()
            else:
                yield next(self.source)
 
    def peek_ahead(self, index):
        assert index > 0
        while index > len(self.lookahead):
            self.lookahead.append(next(self.source))
        return self.lookahead[index-1]

Iterating keys

peekable.py
def key_indexes(salt):
    """Produce all key indexes from `salt`."""
    p = PeekableIterable(hashes_2017(salt))
    for index, hash in enumerate(p):
        t = triple(hash)
        if t:
            for i in range(1, 1001):
                ahead = p.peek_ahead(i)
                if t*5 in ahead:
                    yield index
 
def test_key_indexes():
    ki = key_indexes("abc")
    assert next(ki) == 10

Finishing part 2

peekable.py
def n_keys(salt, n):
    all_keys = key_indexes(salt)
    return list(itertools.islice(all_keys, n))
 
if __name__ == "__main__":
    INPUT = 'zpqevtbw'  # Yours will be different!
    first_64 = n_keys(INPUT, 64)
    k64 = first_64[-1]
    print(f"Part 2: the 64th key is at index {k64}")

Questions?

@nedbat

http://bit.ly/bp_advent