ts-python

Seminar II: “Artists and Models” (the Python Object Model)

Abigail ‘Abby’ Parker (Dorothy Malone): I’m doing an advertising layout. So if you’ll excuse me, I’ll get my models and get to work.

Richard ‘Rick’ Todd (Dean Martin): Models?

Abby: Yes, models. You understand. They’re people who pose.

Artists and Models (1955)

Date Time Track Meeting Link
June 4, 2021 11:00 AM EST Better Use of Python Seminar II: “Artists and Models”

Contents

Audience

These sessions are designed for a broad audience of modelers and software programmers of all backgrounds and skill-levels.

Our expected audience should comprise attendees with a…

… or greater!

During this session, we will endeavour to guide our audience to developing…

… and we will share additional tips, tricks, and in-depth guidance on all of these topics!

Abstract

In a previous episode, we looked at motivating the use of object orientation in Python in analytical work, looking at boilerplate reduction tools such as dataclasses.dataclass and collections.namedtuple.

Let’s turn our attention to the Python object model itself and the various mechanics it supports and the protocols it provides.

In this episode, we’ll look at the Python object model and the various “hook points” it provides to users to integrate within the common vocabulary–the “language”—of the language. We’ll look at these in terms of their overall design as protocols and look at common conventions and rules around the most common protocols users may implement:

To Be Continued…

Did you enjoy this episode? Did you learn something new that will help you as you continue or begin to use object orientated approaches and the Python data model in your work?

If so, stay tuned for future episodes, which may…

If there are other related topics you’d like to see covered, please reach out to Diego Torres Quintanilla.

Notes

print("Let's go!")
print("Let's go!")
print("Let's go!")

Part I: why objects?

class T:
    def f(self): pass

obj = T(...)
obj.f()
def f(obj): pass

obj = ...
f(obj)
from pandas import MultiIndex, DataFrame, date_range

index = MultiIndex.from_arrays([
    ['ABC', 'ABC', 'XYZ'],
    date_range('2021-06-04', periods=3),
])

df = DataFrame({
    'signal': range(3)
}, index=index)

print(
    df.groupby('signal')
)
from numpy import array
xs = array([1, 2, 3])

print(
    xs.sum(),
    xs.mean(),
    xs.dot(xs),
)
from numpy import zeros, ones, linspace, logspace
from numpy import vstack
from numpy import convolve
from numpy.linalg import det

xs = zeros(3)
xs = ones(3)
xs = logspace(1, 5, 3)
xs = linspace(1, 5, 3)

print(
    det(vstack([xs] * len(xs))),
    convolve(xs, ones(1)),
)
from numpy.random import normal
from scipy.spatial.transform import Rotation
from matplotlib.pyplot import pcolormesh, show

r = Rotation.from_euler('x', 90, degrees=True)

print(
    #  r.as_matrix().round(2),
    r.apply(normal(size=(3, 3))),
    #  pcolormesh(normal(size=(3, 3))),
)
#  show()
class T:
    def __init__(self, state):
        self.state = state
    def inc(self):
        self.state += 1 
    def dec(self):
        self.state -= 1 
    def get(self):
        return self.state

x = T(10)
print(f'{x.get() = }')
x.inc()
x.inc()
print(f'{x.get() = }')
print(dir(x))
def f(state):
    def inc():
        nonlocal state
        state += 1
    def dec():
        nonlocal state
        state -= 1
    def get():
        return state
    return inc, dec, get

x = f(10)
print(f'{x[-1]() = }')
x[0]()
x[0]()
print(f'{x[-1]() = }')
print(dir(x))
def f(state):
    def inc():
        nonlocal state
        state += 1
    def dec():
        nonlocal state
        state -= 1
    def get():
        return state
    return inc, dec, get

inc, dec, get = f(10)
print(f'{get() = }')
inc()
inc()
print(f'{get() = }')
from collections import namedtuple

def f(state):
    def inc():
        nonlocal state
        state += 1
    def dec():
        nonlocal state
        state -= 1
    def get():
        return state
    return namedtuple('f', 'inc dec get')(inc, dec, get)

x = f(10)
print(f'{x.get() = }')
x.inc()
x.inc()
print(f'{x.get() = }')
print(dir(x))
def f(x):
    def g():
        x
    return g

print(f'{f(123).__closure__[0].cell_contents = }')
from dis import dis
dis(f(...))
def f(x):
    def g():
        nonlocal x
        x = x + 1
    return g

from dis import dis
dis(f(...))
class T:
    def load_data(self):
        self.data = ...
    def clean_data(self):
        self.cleaned_data = ...
    def process_data(self):
        pass
    def query_data(self):
        pass

x = T()
from functools import wraps
@lambda x: wraps(x)(lambda *a, **kw: next(inst := x(*a, **kw)) or (lambda val=None: inst.send(val)))
def coro():
    _ = yield # dummy
    # load_data 
    _ = yield
    # clean_data 
    _ = yield
    # process_data 
    _ = yield
    # query_data 
    _ = yield

x = coro()
print(f'{x() = }')
print(f'{x() = }')
print(f'{x() = }')
def f():
    pass
def g():
    pass
def h():
    pass

from diego_lib import functions
functions.f
functions.g
functions.h
portfolio = ...
with bump_ir(.10):
    print(f'{portfolio.dv01() = }')
    with bump_ir(.10):
        print(f'{portfolio.dv01() = }')
        with bump_rr(.10):
            print(f'{portfolio.dv01() = }')
from contextlib import contextmanager
@contextmanager
def ctx():
    print('before')
    yield
    print('after')

with ctx():
    print('inside')
class T:
    def __enter__(self):
        print('before')
    def __exit__(self, *_):
        print('after')

with T():
    print('inside')
from enum import Enum, auto
class Choices(Enum):
    a = auto()
    b = auto()
    x = auto()

for choice in Choices:
    print(f'{choice = }')
from random import choice as random_choice
print(random_choice([*Choices]))
class T:
    def __add__(self, other):
        pass
    def __sub__(self, other):
        pass
    def __mul__(self, other):
        pass
    def __truediv__(self, other):
        pass
    def __floordiv__(self, other):
        pass
    def __pow__(self, other):
        pass
    def __matmul__(self, other):
        pass
    def __pos__(self):
        pass
    def __neg__(self):
        pass
    def __lshift__(self, other):
        pass
    def __rshift__(self, other):
        pass
    def __and__(self, other):
        pass
    def __or__(self, other):
        pass
    def __xor__(self, other):
        pass
    def __not__(self, other):
        pass
    def __invert__(self):
        pass
    def __mod__(self, other):
        pass

T() + T()
T() - T()
T() * T()
T() / T()
T() // T()
T() ** T()
T() @ T()
+T()
-T()
T() << 0
T() >> 0
T() & T()
T() | T()
T() ^ T()
~T()
T() % 0
from pathlib import Path
path = Path('/tmp') / 'abc' / 'def'
print(f'{path = }')
xs = [1, 2, 3]
print(f'{len(xs) = }')
class T:
    def __len__(self):
        return 0
        
x = T()
print(f'{len(x) = }')
from pandas import DataFrame
df = DataFrame({
    'a': range(3),
    'b': range(3),
})
print(f'{len(df)  = }')
print(f'{df.shape = }')
print(f'{df.size  = }')
#  print(df.count(axis=0))
#  print(df.count(axis=1))
#  print(df.nunique(axis=0))
#  print(df.nunique(axis=1))
from pandas import Series
s = Series(range(3), index=[20, 10, 0])
print(
    #  s[0], # implicit .loc/label
    #  s[0:], # implicit .iloc/position
    s.iloc[0],
    s.loc[0],
)
class T:
    def __getitem__(self, key):
        pass

obj = T()
obj[123]
from collections import Counter
c = Counter('aabbccccc')
print(f'{c["c"] = }')
print(f'{c["d"] = }')
print(f'{c + c  = }')
print(f'{c | c  = }')
print(f'{c & c  = }')
from collections import defaultdict, ChainMap

dd = defaultdict(list)
print(f'{dd["abc"] = }')

class passthru(dict):
    def __missing__(self, key):
        return key

overrides = passthru({'a': 'aa'})
print(f'{overrides["a"] = }')
print(f'{overrides["b"] = }')

cm = ChainMap({'abc': 123}, {'abc': 456, 'def': 789})
print(f'{cm["abc"] = }')
print(f'{cm["def"] = }')
class T:
    @property
    def is_open(self):
        return True

obj = T()
print(f'{not obj.is_open = }')
class T:
    @property
    def is_open(self):
        return True
    structure = is_open

obj = T()
print(f'{obj.is_open   = }')
print(f'{obj.structure = }')
from enum import Enum, auto

class Structure(Enum):
    open   = auto()
    closed = auto()
    clopen = auto()

class T:
    @property
    def is_open(self):
        return True
    @property
    def structure(self):
        if self.is_open:
            return Structure.open

obj = T()
print(f'{obj.is_open   = }')
print(f'{obj.structure = }')

Part II: most minimal use of objects

def f(data):
    pass

def g(data):
    pass

def h(data):
    pass
class T:
    @staticmethod
    def f(data):
        pass

    @staticmethod
    def g(data):
        pass

    @staticmethod
    def h(data):
        pass
from pandas import read_csv
from io import StringIO
data = StringIO('''
2021-06-04,abc,123
2021-06-04,xyz,456
2021-06-05,abc,789
''')

#  df = read_csv(data)
df = read_csv(data, header=None, index_col=[0, 1], names='date ticker price'.split())
print(
    #  df,
    df.groupby('ticker').transform(
        lambda s: s.reset_index('ticker', drop=True).rolling(1, min_periods=1).mean()
    )
)
from pandas import DataFrame, date_range, IndexSlice
from string import ascii_lowercase
from numpy import repeat, tile

from numpy.random import default_rng
from pandas import Timestamp
rng = default_rng(Timestamp('2021-06-04').asm8.astype('uint32'))

# load data
tickers = rng.choice([*ascii_lowercase], size=(5, 4)).view('<U4').ravel()
dates = date_range('2021-06-04 9:00', periods=100, freq='3T')
df = DataFrame({
    'ticker': tile(tickers, len(dates)),
    'date':   repeat(dates, len(tickers)),
    'price':  (rng.normal(loc=1, scale=.02, size=(len(dates), len(tickers))).cumprod(axis=0)
              * rng.random(size=len(tickers)) * 100).ravel(),
    'signal': rng.choice([True, False], size=(len(dates), len(tickers))).ravel(),
}).set_index(['ticker', 'date']).sort_index()

print(
    #  df,
    #  df.sample(3),
    #  df.groupby('ticker')['signal'].sum(),
    #  df.groupby('ticker')['signal'].sum().median(),
    #  df.groupby('ticker')['signal'].sum() >
    #      df.groupby('ticker')['signal'].sum().median(),
    df.loc[IndexSlice[
        (df.groupby('ticker')['signal'].sum() >
             df.groupby('ticker')['signal'].sum().median())[lambda df: df].index
    , :, :]]
)
from pandas import DataFrame
from numpy.random import random

df = DataFrame({'x': random(size=10)})
filtered_df = df.filter(items=range(5), axis=0)

transformed_df = df.copy()
transformed_df[transformed_df > .5] += 2_000

filtered_transformed_df = df.copy()
filtered_transformed_df[filtered_transformed_df > .5] += 1_000
from collections import namedtuple
from pandas import DataFrame
from numpy.random import random

Analysis = namedtuple('Analysis', 'df transformed')

df = DataFrame({'x': random(size=10)})
filtered_df = df.filter(items=range(5), axis=0)

transformed_df = df.copy()
transformed_df[transformed_df > .5] += 100

filtered_transformed_df = df.copy()
filtered_transformed_df[filtered_transformed_df > .5] += 100

an = Analysis(df, transformed_df)
filt_an = Analysis(filtered_df, filtered_transformed_df)

print(
    an.transformed,
    #  filt_an.df,
)
from collections import namedtuple
from pandas import DataFrame
from numpy.random import random

class Analysis(namedtuple('Analysis', 'raw transformed')):
    @classmethod
    def from_raw(cls, raw):
        transformed = raw.copy()
        transformed[transformed > .5] += 100
        return cls(raw, transformed)
    def __repr__(self):
        return 'Analysis(raw=..., transformed=...)'
    #  def __len__(self):
    #      return len(self.raw)

df = DataFrame({'x': random(size=10)})
filtered_df = df.filter(items=range(5), axis=0)

an = Analysis.from_raw(df)
filt_an = Analysis.from_raw(filtered_df)
print(
    filt_an.transformed
)
from dataclasses import dataclass
from pandas import DataFrame
from numpy.random import random

@dataclass(frozen=True)
class Analysis:
    raw : DataFrame
    transformed : DataFrame
    def __post_init__(self):
        pass
    @classmethod
    def from_raw(cls, raw):
        transformed = raw.copy()
        transformed[transformed > .5] += 100
        return cls(raw, transformed)

df = DataFrame({'x': random(size=10)})
filtered_df = df.filter(items=range(5), axis=0)

an = Analysis.from_raw(df)
filt_an = Analysis.from_raw(filtered_df)
from dataclasses import dataclass
from pandas import DataFrame, date_range, IndexSlice
from string import ascii_lowercase
from numpy import repeat, tile

from numpy.random import default_rng
from pandas import Timestamp
rng = default_rng(Timestamp('2021-06-04').asm8.astype('uint32'))

class SignalMeta(type):
    def __get__(self, instance, owner):
        return Signal(instance.data)
@dataclass
class Signal(metaclass=SignalMeta):
    data : DataFrame
    value : DataFrame = None
    def __post_init__(self):
        if self.value is None:
            self.value = self.data.groupby('ticker')['signal'].sum()
    def __repr__(self):
        return 'Signal(data=..., value=...)'
    @property
    def high(self):
        return self.value > self.value.median()
    @property
    def low(self):
        return self.value < self.value.median()

@dataclass
class Analysis:
    data : DataFrame

    @classmethod
    def from_random(cls):
        tickers = rng.choice([*ascii_lowercase], size=(5, 4)).view('<U4').ravel()
        dates = date_range('2021-06-04 9:00', periods=100, freq='3T')
        df = DataFrame({
            'ticker': tile(tickers, len(dates)),
            'date':   repeat(dates, len(tickers)),
            'price':  (rng.normal(loc=1, scale=.02, size=(len(dates), len(tickers))).cumprod(axis=0)
                      * rng.random(size=len(tickers)) * 100).ravel(),
            'signal': rng.choice([True, False], size=(len(dates), len(tickers))).ravel(),
        }).set_index(['ticker', 'date']).sort_index()
        return cls(df)

    @property
    def signal(self):
        return self.data.groupby('ticker')['signal'].sum()

    signal = Signal
    @property
    def affected(self):
        return self.data.loc[IndexSlice[self.signal.high.index, :, :]]

    @property
    def unaffected(self):
        return self.data.loc[IndexSlice[self.signal.low.index, :, :]]

    def with_affected(self):
        flag = self.signal.value.rename('flag') > self.signal.value.median()
        return self.data.merge(flag, left_on='ticker', right_index=True)

    def __repr__(self):
        return 'Analysis(data=...)'

an = Analysis.from_random()

print(
    #  an,
    #  an.data,
    #  an.signal,
    #  an.signal.high,
    #  an.affected,
    #  an.unaffected,
    an.with_affected(),
)

Part III: basic conventions

class T:
    def __len__(self):
        return -1
        
print(f'{len(T()) = }')
class T:
    def __len__(self):
        return 0.5
        
print(f'{len(T()) = }')
class T:
    def __bool__(self):
        #  return ...
        return True
    def __contains__(self, item):
        return ...
        
obj = T()
print(f'{bool(obj) = }')
print(f'{0 in obj = }')
class T:
    def __lt__(self, other):
        return ...
    def __gt__(self, other):
        return ...
        
obj = T()
print(f'{... < obj < ... = }')
print(f'{obj < ... < ... = }')
class T:
    def __init__(self, a, b):
        self.a, self.b = a, b
            
    def __repr__(self):
        return f'T({self.a!r}, {self.b!r})'

print(f'{T(123, "abc") = }')
from dataclasses import dataclass

@dataclass
class T:
    value : int = 0
    def __add__(self, other):
        return self.value + other
    def __radd__(self, other):
        return self.value + other
    def __iadd__(self, other):
        self.value += other

x, y = T(), 123
print(f'{x + y = }')
print(f'{x + y = }')
x += 123
print(f'{x     = }')
from numpy.random import randint
xs = randint(0, 10, size=3, dtype=int)

#  xs = xs + 1.0
#  xs += 1.0
print(xs)
class T:
    def __hash__(self):
        return -1
obj = T()

print(f'{hash(obj) = }')
class metaclass(type):
    def __call__(self):
        print(f'metaclass.__call__({self!r})')
        return super().__call__()

    # QUESTION: what is `__new__` do for a normal class?
    def __new__(cls, name, bases, body, **kwds):
        print(f'metaclass.__new__({cls!r}, {name!r}, {bases!r}, {body!r}, {kwds!r})')
        return super().__new__(cls, name, bases, body)

    def __init__(self, name, bases, body, **kwds):
        print(f'metaclass.__init__({self!r}, {name!r}, {bases!r}, {body!r}, {kwds!r})')
        return super().__init__(name, bases, body)

    @classmethod
    def __prepare__(cls, name, bases, **kwds):
        print(f'metaclass.__prepare__({cls!r}, {name!r}, {bases!r}, {kwds!r})')
        return super().__prepare__(cls, name, bases, **kwds)
            
class A(metaclass=metaclass): pass
class B(A, x=10, y=200, z=3_000): pass
class C(B):
    def __new__(cls):
        print(f'C.__new__({cls!r})')
        return super().__new__(cls)
    def __init__(self):
        print(f'C.__init__({self!r})')
        super().__init__()
    def __call__(self):
        print(f'C.__call__({self!r})')
    def __init_subclass__(cls, **kwds):
        print(f'C.__init_subclass__({cls!r}, {kwds!r})')
class D(C): pass
class E(D, x='ecks', y='why', z='zee'): pass

print(f'{A = }')
print(f'{B = }')
print(f'{C = }')
print(f'{B() = }')
print(f'{C() = }')
x = C()
print(f'{x() = }')

Part IV: complex considerations

# what is the difference between with (i) ~ (iv) below?

from fake_c_lib import get_from_path, save_to_path
class Database:
    # i.
    def get(self, path):        return get_from_path(path)
    def set(self, path, value): return save_to_path(path, value)

    # ii.
    def __getitem__(self, path):        return get_from_path(path)
    def __setitem__(self, path, value): return save_to_path(path, value)

    # iii.
    def __getattr__(self, path):        return get_from_path(path)
    def __setattr__(self, path, value): return save_to_path(path, value)

    # iv.
    Missing = object() # QUESTION: why like this, and why does this work?
    def __call__(self, path, value=Missing):
        if value is self.Missing:
            return get_from_path(path)
        return save_to_path(path, value)

# the below code saves the object `obj` to path `/a/b/c`,
#   using the protocls above
db = Database()

# i.
db.set('/a/b/c', obj)
obj = db.get('/a/b/c')

# ii.
db['/a/b/c'] = obj
obj = db['/a/b/c']

# iii.
db.a_b_c = obj
obj = db.a_b_c

# iv.
db('/a/b/c', obj)
obj = db('/a/b/c')
f(x) # f - function, x - argument
     # - "compute something"/"perform some action"
     # - stateful/stateless: anything goes
     # - fast/slow: anything goes
     # - error: anything goes
     # - modalities

     # k is some data, belongs to some potentially unbounded set of possibilities
d[k] # d - dict/list, k - index/key
     # - "look something up"
     # - stateful/stateless: stateless
     # - fast/slow: fast
     # - error: not-found, IndexError/KeyError
     # - modalities

     # a belongs to some "bounded" set of possibilities
x.a  # x - object, a - attribute
     # - "look something up"
     # - stateful/stateless: stateless
     # - fast/slow: fast
     # - error: probably rare, AttributeError
     # - modalities
class E:
    def __get__(self, instance, owner):
        return 50_000
        
class A:
    w = 1
class B(A):
    x = 20
class C(A):
    y = 300
class D(B, C):
    z = 4_000
    def __init__(self, aa, bb):
        self.aa, self.bb = aa, bb
    cc = E()

    #  def __getattr__(self, attr):
    #      pass
    #  def __getattribute__(self, attr):
    #      pass

obj = D(123, 456)

def _getattr(obj, attr):
    #  if attr in obj.__dict__:
    #      return obj.__dict__[attr]
    #  for cls in type(obj).__mro__:
    #      if attr in cls.__dict__:
    #          rv = cls.__dict__[attr]
    #          #  if hasattr(rv, '__get__'):
    #          #      return rv.__get__(obj, type(obj))
    #          return rv
    return 0

print(f'{getattr(obj, "w")  = :>6,} {_getattr(obj, "w")  = :>6,}')
print(f'{getattr(obj, "x")  = :>6,} {_getattr(obj, "x")  = :>6,}')
print(f'{getattr(obj, "y")  = :>6,} {_getattr(obj, "y")  = :>6,}')
print(f'{getattr(obj, "z")  = :>6,} {_getattr(obj, "z")  = :>6,}')
print(f'{getattr(obj, "aa") = :>6,} {_getattr(obj, "aa") = :>6,}')
print(f'{getattr(obj, "bb") = :>6,} {_getattr(obj, "bb") = :>6,}')
print(f'{getattr(obj, "cc") = :>6,} {_getattr(obj, "cc") = :>6}')