ts-python

Python @ Two Sigma (discussion)

Turn notebook cells into functions, the right way!

Theme: Python fundamentals

Keywords: functions, *args, **kwargs, functools.wraps, lambda; inspect.signature; decorators; sharing code, collaboration, moving from notebooks to scripts

Presenter James Powell james@dutc.io
Date Friday, December 4, 2020
Time 3:30 PM PST

Notes: https://ts-python.dutc.io

tmate: https://tmate.io/t/dutc/ts-python

print('Good afternoon!')
print("Let's get started!")

Questions

xs = [1, 2, 3]
t  =  1, 2, 3
t  = (1, 2, 3)

# `tuple` - immutable
from collections.abc import Sequence
t[0]
t[0:10]
xs[0]
xs[0:10]
s  = {1, 2, 3}
t  = (1, 2, 3)
xs = [1, 2, 3]

a = {1, 2, 3}
b = {1, 2, 3}
a.union(b)
a.intersection(b)
a.difference(b)
a.symmetric_difference(b)
1 in s
# human-ordered
xs = [1, 2, 3]
t = 1, 2, 3

# machine-ordered
s = {'one', 'two', 'three'}
for x in s:
    print(f'{x = }')
print('-' * 10)
for x in s:
    print(f'{x = }')
# order of the elements connotes what the elements mean
# `tuple`: record
# - heterogeneous
person = 'Craig', 'John', 21, True
person = 'John', 'Craig', 21, False
...
name, surname = person

# order of the elements connotes the order in which we process them
# `list`: collection
# - homogeoneous
people = ['John Craig', 'Craig John']
people = ['Craig Jonh', 'John Craig']
...
for p in people:
    promote(p)
from collections import namedtuple
Person = namedtuple('Person', 'name surname')

people = [
    Person('Craig',   'John'),
    Person('John',    'Craig'),
    Person('Lingxue', 'Zhang'),
    Person('Jingbin', 'Li'),
]

for name, surname in people:
    ...

for p in people:
    print(f'I work with {p[0]}')

for p in people:
    print(f'I work with {p.name}')
people = [
    {'name': 'Craig',   'surname': 'John'},
    {'name': 'John',    'surname': 'Craig'},
    {'name': 'Lingxue', 'surname': 'Zhang'},
    {'name': 'Jingbin', 'surname': 'Li'},
]

for p in people:
    p['name']
simulations = [
    ('tickers.csv', 'data.csv', 'output.csv'),
]

from collections import namedtuple
from pathlib import Path
Sim = namedtuple('Sim', 'metadata signal output')
class Sim(namedtuple('Sim', 'metadata signal output')):
    def __new__(cls, metadata, signal, output):
        if not Path(metadata).exists() or \
           not Path(signal).exists() or \
           Path(output).exists():
            raise ValueError()
        return super().__new__(cls, metadata, signal, output)

simulations = [
    Sim('tickers.csv', 'data.csv', 'output.csv'),
]

simulations = [
    {
        'metadata': 'tickers.csv',
        'signal': 'data.csv',
        'output': 'output.csv',
    },
]

check_valid_files(...)
from pathlib import Path
class Sim:
    def __init__(self, metadata, signal, output):
        if not Path(metadata).exists() or \
           not Path(signal).exists() or \
           Path(output).exists():
            raise ValueError()
        self.metadata = metadata
        self.signal = signal
        self.output = output
    def __repr__(...):
        ...
x = Sim('/etc/hostname', '/etc/hostname', '/tmp/output.csv')
print(x)
from dataclasses import dataclass
from pathlib import Path

@dataclass
class Sim:
    metadata : Path
    signal   : Path
    output   : Path

x = Sim('/etc/hostname', '/etc/hostname', '/tmp/output.csv')
print(x)
print(x.__dict__)
from time import sleep, perf_counter
from random import random
measurements = []

def timed(f):
    before = perf_counter()
    f()
    after = perf_counter()
    measurements.append((after - before))

from functools import wraps

def timed(f):
    @wraps(f)
    def inner(*args, **kwargs):
        before = perf_counter()
        rv = f(*args, **kwargs)
        after = perf_counter()
        measurements.append((after - before))
        return rv
    return inner

#  @timed
def slow(msg):
    ''' does something slowly '''
    sleep(random())
    print(f'{msg = }')
help(slow)
#  slow = create_timed(slow)

@timed
def fast():
    sleep(.01)

slow('first')  # i.
slow('second') # ii.
slow('third')  # iii.
slow('fourth') # iv.

print(f'{measurements = }')
# - function wrap behaviour
#   - instrumentation (e.g., timing)
#   - authorisation, authentication
# - documentation & registration
from itertools import product
from collections import namedtuple

class Strategy(namedtuple('Strategy', 'func title example')):
    def __new__(cls, func, title=None, example=None):
        if title is None:
            title = func.__name__
        return super().__new__(cls, func, title, example)
strats = {}
def register(*args, **kwargs):
    def dec(func):
        strats[func] = Strategy(*args, func=func, **kwargs)
    return dec

@register(
    title="James's Great Strat",
    example=...,
)
def jp_strategy():
    pass

@register(
    title="Craig's Great Strat"
)
def cr_strategy():
    pass

def dd_strategy():
    pass

def jl_strategy():
    pass

def sa_strategy():
    pass

def td_strategy():
    pass

if __name__ == '__main__':
    for player_a, player_b in product(strats, strats):
        print(f'{strats[player_a]} vs {strats[player_b]}')
def f():
    def helper():
        ...
        ...
        ...
    helper()
    ...
    helper()
    ...
from pandas import DataFrame
from numpy.random import normal
df1 = DataFrame({
    'a': normal(size=4),
    'b': normal(size=4),
})
df2 = DataFrame({
    'a': normal(size=4),
    'b': normal(size=4),
})

from collections import namedtuple
class Sim(namedtuple('Sim', 'x y')):
    class Subset(namedtuple('Subset', 'orig subsetted')):
        @classmethod
        def from_df(cls, df):
            return cls(df, df[df['a'] <= df['b']])

    @classmethod
    def from_inputs(cls, x, y):
        return cls(
            cls.Subset.from_df(x),
            cls.Subset.from_df(y),
        )

sim = Sim.from_inputs(df1, df2)
print(f'{sim.x.orig = }')
from numpy import array
from numpy.linalg import det

array([[1, 2, 3]
DataFrame.xs
DataFrame.apply
DataFrame.unstack
DataFrame.stack
DataFrame.pivot
DataFrame.melt
DataFrame.groupby
DataFrame.resample
DataFrame.rolling
def process(df):
    c_df = CleanedInput.from_df(df)
    n_df = df.normalize()
    normalize(df)
    compute(df)

Exercise: Fizzbuzz

The game ”fizzbuzz” is played as follows:

A sample game may look like this:

Task: Write a function that gives the first N values from such a game.

def fizzbuzz(n):
    pass

Task: Write a test for the above.

Task: Extend the above to generalise the divisors. e.g., allow the game to be played with ‘fizz’, ‘buzz’, and ‘quux’ for 3, 5, and 7.

Task: Write search below to search within the fizzbuzz sequence for particular patterns.

# NOTE: feel free to redesign the grammar for your pattern.

def search(pattern, *, max_value=100_000):
    ''' search for a pattern within the fizzbuzz sequence up to max_value
    
    Use the following (sample) grammar:
    - word: match this exact word (e.g., fizz, buzz, fizzbuzz)
    - word₁|word₂: match word₁ OR word₂
    - *: match any word
    - #: match only a number
    '''
    pass

# look for four sequential plays that are
#   fizz, then fizz OR buzz, then anything, then buzz
pattern = ['fizz', 'fizz|buzz', '*', 'fizzbuzz']

Exercise: Rock, Paper, Scissors

The game “Rock, Paper, Scissors” is played as follows:

Task: write a function to evaluate the rules of the game.

# NOTE: for naming & design purposes, you may assume the players are directional
#       i.e., `a` is the Player
#             `b` is the Challenger
#       e.g., `rules` could return "player wins" or "player loses"
#              or it could "player wins" vs "challenger wins"
# QUESTION: how do you represent ties?
def rules(a, b):
    ''' return who wins, given shapes played by two players a and b '''
    pass

Task: write a framework that can evaluate a strategy and play the game for 10,000 rounds given a pairing of strategies.

from random import choice

def random_strategy():
    ''' randomly select a shape '''
    return choice(['rps'])

# other sample strategies…

# QUESTION: how do we track "history" here?
def beat_previous_play():
    ''' select the shape that would beat the opponent's previous play '''
    pass

def most_common_play(n=3):
    ''' select the most common shape from the opponent's previous N plays '''
    pass

games = [(random_strategy(), random_strategy()) for _ in range(10_000)]
results = [rules(a, b) for a, b in games]

Sample Solution (from a past attendee)

from random import choice
from collections import Counter, deque

class Game:

    def __init__(self, memory=5):
        # fifo, most recent to the left
        self.histories = [
            deque([], maxlen=memory),
            deque([], maxlen=memory)
        ]
        self.what_beats_key = {"r": "p", "s": "r", "p": "s"}

    def beat_previous_play(self, player_id):
        """ select the shape that would beat the opponent's previous play """
        opponents_history = self.histories[not player_id]
        if len(opponents_history) < 1:
            return self.random_strategy()
        winning_shape = self.what_beats_key[opponents_history[0]]
        return winning_shape

    def most_common_play(self, player_id, n=3):
        """ select the most common shape from the opponent's previous N plays """
        opponents_history = self.histories[not player_id]
        limit = min(n, len(opponents_history))
        opponents_recent_history = list(opponents_history)[:limit]
        counts = Counter(opponents_recent_history)
        shape = counts.most_common()[0][0]
        return shape

    def random_strategy(self):
        """ randomly select a shape """
        return choice(["r", "p", "s"])

    def show_hands(self, player_shape, challenger_shape):
        self.histories[0].appendleft(player_shape)
        self.histories[1].appendleft(challenger_shape)
        return player_shape, challenger_shape

    def rules(self, a: str, b: str) -> str:
        """
            Returns who wins, given shapes played by two players a and b
            a: Player
            b: Challenger
            Returns one of:
                "1": Player wins
                "X": Tie
                "2": Challenger wins
        """

        # XXX
        if (a not in ["r", "p", "s"] or
                b not in ["r", "p", "s"]):
            return None

        # XXX
        if a == b:
            return "X"

        # XXX
        result = "2" if b == self.what_beats_key[a] else "1"
        return result

g = Game()

games = [g.show_hands(g.random_strategy(), g.beat_previous_play(1)) for _ in range(10_000)]
results = [g.rules(a, b) for a, b in games]
ranking = Counter(results)
print("1: Player 1 wins, X: tie, 2: Player 2 wins.")
print(f"{ranking = }")

Another Sample Solution

from enum import Enum, auto
from random import choice
from itertools import combinations, product, count
from collections import defaultdict, Counter, deque

class Shape(Enum):
    Rock     = auto()
    Paper    = auto()
    Scissors = auto()

class Winner(Enum):
    Player     = auto()
    Challenger = auto()

beats = {
    Shape.Rock:    Shape.Scissors,
    Shape.Scissors:   Shape.Paper,
    Shape.Paper:       Shape.Rock,
}
beaten_by = {v:k for k,v in beats.items()}

def game(a : Shape, b : Shape):
    if beats[a] == b:
        return Winner.Player
    elif beaten_by[a] == b:
        return Winner.Challenger

STRATEGIES = {}
def strategy(name):
    def dec(g):
        def inner(*args, **kwargs):
            gi = g(*args, **kwargs)
            return gi.send
        STRATEGIES[inner] = name
        return inner
    return dec

@strategy('dumb & random')
def random_strategy():
    return (choice([*Shape]) for _ in count())

@strategy('smarter')
def most_common(n=10):
    history = deque(maxlen=n)
    last_play = yield
    while True:
        if not history:
            play = choice([*Shape])
        else:
            play = Counter(history).most_common()[0][0]
        last_play = yield play

if __name__ == '__main__':
    ROUNDS = 10_000
    results = defaultdict(Counter)
    for p_st, c_st in product(STRATEGIES, STRATEGIES):
        ps = p_st()
        cs = c_st()
        c, p = ps(None), cs(None)
        games = ((p := ps(c), c := cs(p)) for _ in range(ROUNDS))
        results[p_st, c_st].update(game(p, c) for p, c in games)

    for (p_st, c_st), res_cntr in results.items():
        print(f'Player:     {STRATEGIES[p_st]}')
        print(f'Challenger: {STRATEGIES[c_st]}')
        for res, cnt in sorted(res_cntr.items(), key=str):
            print(f'    {res!s:<20} {cnt:,}')