Abigail ‘Abby’ Parker (Dorothy Malone): I’m doing an advertising layout. So if you’ll excuse me, I’ll get my models and get to work.
Richard ‘Rick’ Todd (Dean Martin): Models?
Abby: Yes, models. You understand. They’re people who pose.
– Artists and Models (1955)
| Date | Time | Track | Meeting Link |
|---|---|---|---|
| June 4, 2021 | 11:00 AM EST | Better Use of Python | Seminar II: “Artists and Models” |
These sessions are designed for a broad audience of modelers and software programmers of all backgrounds and skill-levels.
Our expected audience should comprise attendees with a…
… or greater!
During this session, we will endeavour to guide our audience to developing…
__getattr__ vs __getitem__ vs __call__.pandas… and we will share additional tips, tricks, and in-depth guidance on all of these topics!
In a previous episode, we looked at motivating the use of object orientation in Python in analytical work, looking at boilerplate reduction tools such as dataclasses.dataclass and collections.namedtuple.
Let’s turn our attention to the Python object model itself and the various mechanics it supports and the protocols it provides.
In this episode, we’ll look at the Python object model and the various “hook points” it provides to users to integrate within the common vocabulary–the “language”—of the language. We’ll look at these in terms of their overall design as protocols and look at common conventions and rules around the most common protocols users may implement:
__init__, __new__, and @classmethod factories__str__ and __repr__; retrieval, access, and computation with __getattr__, __getitem__, and __call____iter__ and __next__.Did you enjoy this episode? Did you learn something new that will help you as you continue or begin to use object orientated approaches and the Python data model in your work?
If so, stay tuned for future episodes, which may…
getattr(…) protocol in greater depth, discussing __getattr__ vs __getattribute__, the __mro__, and the descriptor protocol (i.e., __get__, __set__, __delete__.)__new__ and __init__ in the context of mechanisms such as __reduce__, common patterns (e.g., singleton mechanisms) using @classmethod factories or type.__call__.type.__new__ and type.__init__ as well as __build_class__, metaclasses in general, __init_subclass__, and __set_name__.If there are other related topics you’d like to see covered, please reach out to Diego Torres Quintanilla.
print("Let's go!")
print("Let's go!")
print("Let's go!")
class T:
def f(self): pass
obj = T(...)
obj.f()
def f(obj): pass
obj = ...
f(obj)
from pandas import MultiIndex, DataFrame, date_range
index = MultiIndex.from_arrays([
['ABC', 'ABC', 'XYZ'],
date_range('2021-06-04', periods=3),
])
df = DataFrame({
'signal': range(3)
}, index=index)
print(
df.groupby('signal')
)
from numpy import array
xs = array([1, 2, 3])
print(
xs.sum(),
xs.mean(),
xs.dot(xs),
)
from numpy import zeros, ones, linspace, logspace
from numpy import vstack
from numpy import convolve
from numpy.linalg import det
xs = zeros(3)
xs = ones(3)
xs = logspace(1, 5, 3)
xs = linspace(1, 5, 3)
print(
det(vstack([xs] * len(xs))),
convolve(xs, ones(1)),
)
from numpy.random import normal
from scipy.spatial.transform import Rotation
from matplotlib.pyplot import pcolormesh, show
r = Rotation.from_euler('x', 90, degrees=True)
print(
# r.as_matrix().round(2),
r.apply(normal(size=(3, 3))),
# pcolormesh(normal(size=(3, 3))),
)
# show()
class T:
def __init__(self, state):
self.state = state
def inc(self):
self.state += 1
def dec(self):
self.state -= 1
def get(self):
return self.state
x = T(10)
print(f'{x.get() = }')
x.inc()
x.inc()
print(f'{x.get() = }')
print(dir(x))
def f(state):
def inc():
nonlocal state
state += 1
def dec():
nonlocal state
state -= 1
def get():
return state
return inc, dec, get
x = f(10)
print(f'{x[-1]() = }')
x[0]()
x[0]()
print(f'{x[-1]() = }')
print(dir(x))
def f(state):
def inc():
nonlocal state
state += 1
def dec():
nonlocal state
state -= 1
def get():
return state
return inc, dec, get
inc, dec, get = f(10)
print(f'{get() = }')
inc()
inc()
print(f'{get() = }')
from collections import namedtuple
def f(state):
def inc():
nonlocal state
state += 1
def dec():
nonlocal state
state -= 1
def get():
return state
return namedtuple('f', 'inc dec get')(inc, dec, get)
x = f(10)
print(f'{x.get() = }')
x.inc()
x.inc()
print(f'{x.get() = }')
print(dir(x))
def f(x):
def g():
x
return g
print(f'{f(123).__closure__[0].cell_contents = }')
from dis import dis
dis(f(...))
def f(x):
def g():
nonlocal x
x = x + 1
return g
from dis import dis
dis(f(...))
class T:
def load_data(self):
self.data = ...
def clean_data(self):
self.cleaned_data = ...
def process_data(self):
pass
def query_data(self):
pass
x = T()
from functools import wraps
@lambda x: wraps(x)(lambda *a, **kw: next(inst := x(*a, **kw)) or (lambda val=None: inst.send(val)))
def coro():
_ = yield # dummy
# load_data
_ = yield
# clean_data
_ = yield
# process_data
_ = yield
# query_data
_ = yield
x = coro()
print(f'{x() = }')
print(f'{x() = }')
print(f'{x() = }')
def f():
pass
def g():
pass
def h():
pass
from diego_lib import functions
functions.f
functions.g
functions.h
portfolio = ...
with bump_ir(.10):
print(f'{portfolio.dv01() = }')
with bump_ir(.10):
print(f'{portfolio.dv01() = }')
with bump_rr(.10):
print(f'{portfolio.dv01() = }')
from contextlib import contextmanager
@contextmanager
def ctx():
print('before')
yield
print('after')
with ctx():
print('inside')
class T:
def __enter__(self):
print('before')
def __exit__(self, *_):
print('after')
with T():
print('inside')
from enum import Enum, auto
class Choices(Enum):
a = auto()
b = auto()
x = auto()
for choice in Choices:
print(f'{choice = }')
from random import choice as random_choice
print(random_choice([*Choices]))
class T:
def __add__(self, other):
pass
def __sub__(self, other):
pass
def __mul__(self, other):
pass
def __truediv__(self, other):
pass
def __floordiv__(self, other):
pass
def __pow__(self, other):
pass
def __matmul__(self, other):
pass
def __pos__(self):
pass
def __neg__(self):
pass
def __lshift__(self, other):
pass
def __rshift__(self, other):
pass
def __and__(self, other):
pass
def __or__(self, other):
pass
def __xor__(self, other):
pass
def __not__(self, other):
pass
def __invert__(self):
pass
def __mod__(self, other):
pass
T() + T()
T() - T()
T() * T()
T() / T()
T() // T()
T() ** T()
T() @ T()
+T()
-T()
T() << 0
T() >> 0
T() & T()
T() | T()
T() ^ T()
~T()
T() % 0
from pathlib import Path
path = Path('/tmp') / 'abc' / 'def'
print(f'{path = }')
xs = [1, 2, 3]
print(f'{len(xs) = }')
class T:
def __len__(self):
return 0
x = T()
print(f'{len(x) = }')
from pandas import DataFrame
df = DataFrame({
'a': range(3),
'b': range(3),
})
print(f'{len(df) = }')
print(f'{df.shape = }')
print(f'{df.size = }')
# print(df.count(axis=0))
# print(df.count(axis=1))
# print(df.nunique(axis=0))
# print(df.nunique(axis=1))
from pandas import Series
s = Series(range(3), index=[20, 10, 0])
print(
# s[0], # implicit .loc/label
# s[0:], # implicit .iloc/position
s.iloc[0],
s.loc[0],
)
class T:
def __getitem__(self, key):
pass
obj = T()
obj[123]
from collections import Counter
c = Counter('aabbccccc')
print(f'{c["c"] = }')
print(f'{c["d"] = }')
print(f'{c + c = }')
print(f'{c | c = }')
print(f'{c & c = }')
from collections import defaultdict, ChainMap
dd = defaultdict(list)
print(f'{dd["abc"] = }')
class passthru(dict):
def __missing__(self, key):
return key
overrides = passthru({'a': 'aa'})
print(f'{overrides["a"] = }')
print(f'{overrides["b"] = }')
cm = ChainMap({'abc': 123}, {'abc': 456, 'def': 789})
print(f'{cm["abc"] = }')
print(f'{cm["def"] = }')
class T:
@property
def is_open(self):
return True
obj = T()
print(f'{not obj.is_open = }')
class T:
@property
def is_open(self):
return True
structure = is_open
obj = T()
print(f'{obj.is_open = }')
print(f'{obj.structure = }')
from enum import Enum, auto
class Structure(Enum):
open = auto()
closed = auto()
clopen = auto()
class T:
@property
def is_open(self):
return True
@property
def structure(self):
if self.is_open:
return Structure.open
obj = T()
print(f'{obj.is_open = }')
print(f'{obj.structure = }')
def f(data):
pass
def g(data):
pass
def h(data):
pass
class T:
@staticmethod
def f(data):
pass
@staticmethod
def g(data):
pass
@staticmethod
def h(data):
pass
from pandas import read_csv
from io import StringIO
data = StringIO('''
2021-06-04,abc,123
2021-06-04,xyz,456
2021-06-05,abc,789
''')
# df = read_csv(data)
df = read_csv(data, header=None, index_col=[0, 1], names='date ticker price'.split())
print(
# df,
df.groupby('ticker').transform(
lambda s: s.reset_index('ticker', drop=True).rolling(1, min_periods=1).mean()
)
)
from pandas import DataFrame, date_range, IndexSlice
from string import ascii_lowercase
from numpy import repeat, tile
from numpy.random import default_rng
from pandas import Timestamp
rng = default_rng(Timestamp('2021-06-04').asm8.astype('uint32'))
# load data
tickers = rng.choice([*ascii_lowercase], size=(5, 4)).view('<U4').ravel()
dates = date_range('2021-06-04 9:00', periods=100, freq='3T')
df = DataFrame({
'ticker': tile(tickers, len(dates)),
'date': repeat(dates, len(tickers)),
'price': (rng.normal(loc=1, scale=.02, size=(len(dates), len(tickers))).cumprod(axis=0)
* rng.random(size=len(tickers)) * 100).ravel(),
'signal': rng.choice([True, False], size=(len(dates), len(tickers))).ravel(),
}).set_index(['ticker', 'date']).sort_index()
print(
# df,
# df.sample(3),
# df.groupby('ticker')['signal'].sum(),
# df.groupby('ticker')['signal'].sum().median(),
# df.groupby('ticker')['signal'].sum() >
# df.groupby('ticker')['signal'].sum().median(),
df.loc[IndexSlice[
(df.groupby('ticker')['signal'].sum() >
df.groupby('ticker')['signal'].sum().median())[lambda df: df].index
, :, :]]
)
from pandas import DataFrame
from numpy.random import random
df = DataFrame({'x': random(size=10)})
filtered_df = df.filter(items=range(5), axis=0)
transformed_df = df.copy()
transformed_df[transformed_df > .5] += 2_000
filtered_transformed_df = df.copy()
filtered_transformed_df[filtered_transformed_df > .5] += 1_000
from collections import namedtuple
from pandas import DataFrame
from numpy.random import random
Analysis = namedtuple('Analysis', 'df transformed')
df = DataFrame({'x': random(size=10)})
filtered_df = df.filter(items=range(5), axis=0)
transformed_df = df.copy()
transformed_df[transformed_df > .5] += 100
filtered_transformed_df = df.copy()
filtered_transformed_df[filtered_transformed_df > .5] += 100
an = Analysis(df, transformed_df)
filt_an = Analysis(filtered_df, filtered_transformed_df)
print(
an.transformed,
# filt_an.df,
)
from collections import namedtuple
from pandas import DataFrame
from numpy.random import random
class Analysis(namedtuple('Analysis', 'raw transformed')):
@classmethod
def from_raw(cls, raw):
transformed = raw.copy()
transformed[transformed > .5] += 100
return cls(raw, transformed)
def __repr__(self):
return 'Analysis(raw=..., transformed=...)'
# def __len__(self):
# return len(self.raw)
df = DataFrame({'x': random(size=10)})
filtered_df = df.filter(items=range(5), axis=0)
an = Analysis.from_raw(df)
filt_an = Analysis.from_raw(filtered_df)
print(
filt_an.transformed
)
from dataclasses import dataclass
from pandas import DataFrame
from numpy.random import random
@dataclass(frozen=True)
class Analysis:
raw : DataFrame
transformed : DataFrame
def __post_init__(self):
pass
@classmethod
def from_raw(cls, raw):
transformed = raw.copy()
transformed[transformed > .5] += 100
return cls(raw, transformed)
df = DataFrame({'x': random(size=10)})
filtered_df = df.filter(items=range(5), axis=0)
an = Analysis.from_raw(df)
filt_an = Analysis.from_raw(filtered_df)
from dataclasses import dataclass
from pandas import DataFrame, date_range, IndexSlice
from string import ascii_lowercase
from numpy import repeat, tile
from numpy.random import default_rng
from pandas import Timestamp
rng = default_rng(Timestamp('2021-06-04').asm8.astype('uint32'))
class SignalMeta(type):
def __get__(self, instance, owner):
return Signal(instance.data)
@dataclass
class Signal(metaclass=SignalMeta):
data : DataFrame
value : DataFrame = None
def __post_init__(self):
if self.value is None:
self.value = self.data.groupby('ticker')['signal'].sum()
def __repr__(self):
return 'Signal(data=..., value=...)'
@property
def high(self):
return self.value > self.value.median()
@property
def low(self):
return self.value < self.value.median()
@dataclass
class Analysis:
data : DataFrame
@classmethod
def from_random(cls):
tickers = rng.choice([*ascii_lowercase], size=(5, 4)).view('<U4').ravel()
dates = date_range('2021-06-04 9:00', periods=100, freq='3T')
df = DataFrame({
'ticker': tile(tickers, len(dates)),
'date': repeat(dates, len(tickers)),
'price': (rng.normal(loc=1, scale=.02, size=(len(dates), len(tickers))).cumprod(axis=0)
* rng.random(size=len(tickers)) * 100).ravel(),
'signal': rng.choice([True, False], size=(len(dates), len(tickers))).ravel(),
}).set_index(['ticker', 'date']).sort_index()
return cls(df)
@property
def signal(self):
return self.data.groupby('ticker')['signal'].sum()
signal = Signal
@property
def affected(self):
return self.data.loc[IndexSlice[self.signal.high.index, :, :]]
@property
def unaffected(self):
return self.data.loc[IndexSlice[self.signal.low.index, :, :]]
def with_affected(self):
flag = self.signal.value.rename('flag') > self.signal.value.median()
return self.data.merge(flag, left_on='ticker', right_index=True)
def __repr__(self):
return 'Analysis(data=...)'
an = Analysis.from_random()
print(
# an,
# an.data,
# an.signal,
# an.signal.high,
# an.affected,
# an.unaffected,
an.with_affected(),
)
class T:
def __len__(self):
return -1
print(f'{len(T()) = }')
class T:
def __len__(self):
return 0.5
print(f'{len(T()) = }')
class T:
def __bool__(self):
# return ...
return True
def __contains__(self, item):
return ...
obj = T()
print(f'{bool(obj) = }')
print(f'{0 in obj = }')
class T:
def __lt__(self, other):
return ...
def __gt__(self, other):
return ...
obj = T()
print(f'{... < obj < ... = }')
print(f'{obj < ... < ... = }')
class T:
def __init__(self, a, b):
self.a, self.b = a, b
def __repr__(self):
return f'T({self.a!r}, {self.b!r})'
print(f'{T(123, "abc") = }')
from dataclasses import dataclass
@dataclass
class T:
value : int = 0
def __add__(self, other):
return self.value + other
def __radd__(self, other):
return self.value + other
def __iadd__(self, other):
self.value += other
x, y = T(), 123
print(f'{x + y = }')
print(f'{x + y = }')
x += 123
print(f'{x = }')
from numpy.random import randint
xs = randint(0, 10, size=3, dtype=int)
# xs = xs + 1.0
# xs += 1.0
print(xs)
class T:
def __hash__(self):
return -1
obj = T()
print(f'{hash(obj) = }')
class metaclass(type):
def __call__(self):
print(f'metaclass.__call__({self!r})')
return super().__call__()
# QUESTION: what is `__new__` do for a normal class?
def __new__(cls, name, bases, body, **kwds):
print(f'metaclass.__new__({cls!r}, {name!r}, {bases!r}, {body!r}, {kwds!r})')
return super().__new__(cls, name, bases, body)
def __init__(self, name, bases, body, **kwds):
print(f'metaclass.__init__({self!r}, {name!r}, {bases!r}, {body!r}, {kwds!r})')
return super().__init__(name, bases, body)
@classmethod
def __prepare__(cls, name, bases, **kwds):
print(f'metaclass.__prepare__({cls!r}, {name!r}, {bases!r}, {kwds!r})')
return super().__prepare__(cls, name, bases, **kwds)
class A(metaclass=metaclass): pass
class B(A, x=10, y=200, z=3_000): pass
class C(B):
def __new__(cls):
print(f'C.__new__({cls!r})')
return super().__new__(cls)
def __init__(self):
print(f'C.__init__({self!r})')
super().__init__()
def __call__(self):
print(f'C.__call__({self!r})')
def __init_subclass__(cls, **kwds):
print(f'C.__init_subclass__({cls!r}, {kwds!r})')
class D(C): pass
class E(D, x='ecks', y='why', z='zee'): pass
print(f'{A = }')
print(f'{B = }')
print(f'{C = }')
print(f'{B() = }')
print(f'{C() = }')
x = C()
print(f'{x() = }')
# what is the difference between with (i) ~ (iv) below?
from fake_c_lib import get_from_path, save_to_path
class Database:
# i.
def get(self, path): return get_from_path(path)
def set(self, path, value): return save_to_path(path, value)
# ii.
def __getitem__(self, path): return get_from_path(path)
def __setitem__(self, path, value): return save_to_path(path, value)
# iii.
def __getattr__(self, path): return get_from_path(path)
def __setattr__(self, path, value): return save_to_path(path, value)
# iv.
Missing = object() # QUESTION: why like this, and why does this work?
def __call__(self, path, value=Missing):
if value is self.Missing:
return get_from_path(path)
return save_to_path(path, value)
# the below code saves the object `obj` to path `/a/b/c`,
# using the protocls above
db = Database()
# i.
db.set('/a/b/c', obj)
obj = db.get('/a/b/c')
# ii.
db['/a/b/c'] = obj
obj = db['/a/b/c']
# iii.
db.a_b_c = obj
obj = db.a_b_c
# iv.
db('/a/b/c', obj)
obj = db('/a/b/c')
f(x) # f - function, x - argument
# - "compute something"/"perform some action"
# - stateful/stateless: anything goes
# - fast/slow: anything goes
# - error: anything goes
# - modalities
# k is some data, belongs to some potentially unbounded set of possibilities
d[k] # d - dict/list, k - index/key
# - "look something up"
# - stateful/stateless: stateless
# - fast/slow: fast
# - error: not-found, IndexError/KeyError
# - modalities
# a belongs to some "bounded" set of possibilities
x.a # x - object, a - attribute
# - "look something up"
# - stateful/stateless: stateless
# - fast/slow: fast
# - error: probably rare, AttributeError
# - modalities
class E:
def __get__(self, instance, owner):
return 50_000
class A:
w = 1
class B(A):
x = 20
class C(A):
y = 300
class D(B, C):
z = 4_000
def __init__(self, aa, bb):
self.aa, self.bb = aa, bb
cc = E()
# def __getattr__(self, attr):
# pass
# def __getattribute__(self, attr):
# pass
obj = D(123, 456)
def _getattr(obj, attr):
# if attr in obj.__dict__:
# return obj.__dict__[attr]
# for cls in type(obj).__mro__:
# if attr in cls.__dict__:
# rv = cls.__dict__[attr]
# # if hasattr(rv, '__get__'):
# # return rv.__get__(obj, type(obj))
# return rv
return 0
print(f'{getattr(obj, "w") = :>6,} {_getattr(obj, "w") = :>6,}')
print(f'{getattr(obj, "x") = :>6,} {_getattr(obj, "x") = :>6,}')
print(f'{getattr(obj, "y") = :>6,} {_getattr(obj, "y") = :>6,}')
print(f'{getattr(obj, "z") = :>6,} {_getattr(obj, "z") = :>6,}')
print(f'{getattr(obj, "aa") = :>6,} {_getattr(obj, "aa") = :>6,}')
print(f'{getattr(obj, "bb") = :>6,} {_getattr(obj, "bb") = :>6,}')
print(f'{getattr(obj, "cc") = :>6,} {_getattr(obj, "cc") = :>6}')