ts-python

Seminar IX: “How to Train Your Dragon” (type-checking with PEP-484 & mypy)

Hiccup (Jay Baruchel): Everything we know about you guys is wrong.

Date Time Track Meeting Link
Fri, Nov 12, 2021 9:30 AM EDT Python fundamentals https://primetime.bluejeans.com/a2m/live-event/dwqcpqgq

Contents

Audience

These sessions are designed for a broad audience of modelers and software programmers of all backgrounds and skill levels.

Our expected audience should comprise attendees…

During this session, we will endeavour to guide our audience to developing…

…and we will share additional tips, tricks, and in-depth guidance on all of these topics!

Abstract

In this episode, we will talk about new techniques and tools for ensuring the correctness of your code. You may even have seen these tools, mypy in particular, adopted more and more in the code you interact with, and may be curious how to interpret the new syntax they introduce and how to let your code benefit from these approaches.

We will start by broadly discussing the challenges of static verification in Python (in relation to other languages and ecosystems you may be familiar with, such as C, C++, or Java.) We cover new syntax added to Python 3 for annotating functions and variables, and how these fit into third-party tooling like mypy. We’ll discuss where mypy can give good guidance for code improvements and can help reïnforce good design principles in your code. Finally, we’ll discuss areas where mypy may not provide significant benefits and where other checking and verification techniques may be superior.

To Be Continued…

Did you enjoy this episode? Did you learn something new that will help you as you continue or begin to use window methods in your work?

If so, stay tuned for future episodes, which may…

If there are other related topics you’d like to see covered, please reach out to Diego Torres Quintanilla.

Notes

print("Let's get started!")

Annotations & Motivation

Start with our theory of static verifiability, applied to structures…

class Base:
    def bar(self):
        pass

class Derived(Base):
    def foo(self):
        return self.bar()

d = Derived()
# some time later…
d.foo()
class Base:
    def bar(self):
        pass

assert hasattr(Base, 'bar')
class Derived(Base):
    def foo(self):
        return self.bar()

d = Derived()
# some time later…
d.foo()
class Base:
    def foo(self):
        return self.bar()

class Derived(Base):
    def bar(self):
        pass

d = Derived()
# some time later…
d.foo()
from abc import ABC, abstractmethod
class Base(ABC):
    def foo(self):
        return self.bar()
    bar = abstractmethod(lambda: None)

class Derived(Base):
    def bar(self):
        pass

d = Derived()
# some time later…
d.foo()
class Base:
    def foo(self):
        return self.bar()
    def __init_subclass__(cls):
        assert hasattr(cls, 'bar') 

class Derived(Base):
    def bar(self):
        pass

d = Derived()
# some time later…
d.foo()

Now let’s apply it to computation…

def f(x, *, y=0):
    return x + y

f(123)
#  f(123, 456)
#  f(123, y=456)
#  {0: f}[0](123, y=456)
def f(x: int, y: int) -> int:
    return x + y

# int f(int x, int y) {
#     return x + y;
# }

print(f'{f(123, 456)     = }')
print(f'{f("abc", "def") = }')

Annotations can be thought of as a form of structured, programmatic documentation…

from pandas import Series

def f(x, y, z):
    values = Series([0, 10, 200, 3_000, 40_000])
    x, y = values.iloc[x], values.iloc[y]
    return (x + y) // z

print(f'{f(3, 2, 1) = }')
#  print(f'{f(3.0, 2, 1) = }')
#  print(f'{f(6, 2, 1) = }')
#  print(f'{f(3, 2, 0) = }')

def f(x, y, z):
    # x, y, and z must be integers
    # z cannot be 0
    # the result will be an integer
    values = Series([0, 10, 200, 3_000, 40_000])
    x, y = values.iloc[x], values.iloc[y]
    return (x + y) // z

def f(x, y, z):
    '''
        x, y, and z must be integers
        z cannot be 0
        the result will be an integer
    '''
    values = Series([0, 10, 200, 3_000, 40_000])
    x, y = values.iloc[x], values.iloc[y]
    return (x + y) // z

#  help(f)

def f(x, y, z):
    '''
        Performs some computation.

        Parameters
        ----------
        x : int
            A positional index into some structure.
        y : int
            A positional index into some structure.
        z : int
            Some denominator.

        Returns
        -------
        int
            Some computational result.
    '''
    values = Series([0, 10, 200, 3_000, 40_000])
    x, y = values.iloc[x], values.iloc[y]
    return (x + y) // z

#  print(f'{f.__doc__ = }')

def f(
    x : (int, 'A positional index into some structure.'),
    y : (int, 'A positional index into some structure.'),
    z : (int, 'Some denominator.'),
) -> (int, 'Some computational result.'):
    '''Performs some computation.'''
    values = Series([0, 10, 200, 3_000, 40_000])
    x, y = values.iloc[x], values.iloc[y]
    return (x + y) // z

#  print(f'{f.__annotations__ = }')
from functools import wraps
from inspect import signature
from collections.abc import Callable

def smart_defaults(f):
    sig, annots = signature(f), f.__annotations__
    @wraps(f)
    def inner(*args, **kwargs):
        params = sig.bind(*args, **kwargs).arguments
        defaults = {
            k: v(**params)
            for k, v in annots.items()
            if isinstance(v, Callable)
        }
        return f(*args, **{**kwargs, **defaults})
    return inner

@smart_defaults
def f(x, y : lambda **params: params['x'] * 2 = ...):
    print(f'f({x = }, {y = })')

f(x=1)
from dataclasses import dataclass

@dataclass
class T:
    a : int
    b : int
    c : int = 789

obj = T(123, 456)
print(f'{obj = }')

#  obj = T(123, ...)
#  print(f'{obj = }')
from functools import wraps
from inspect import signature
from collections.abc import Callable

def smart_defaults(**spec):
    def dec(f):
        sig = signature(f)
        @wraps(f)
        def inner(*args, **kwargs):
            params = sig.bind(*args, **kwargs).arguments
            defaults = {
                k: v(**params)
                for k, v in spec.items()
                if isinstance(v, Callable)
            }
            return f(*args, **{**kwargs, **defaults})
        return inner
    return dec

@smart_defaults(
    y=lambda **params: params['x'] * 2
)
def f(x, y=...):
    print(f'f({x = }, {y = })')

f(x=1)
from hypothesis.strategies import integers
from hypothesis import given

@given(
    x=integers(min_value=0, max_value=10),
    y=integers(min_value=0, max_value=10),
)
def test(x, y):
    assert x + y >= 0

Mechanics & Strategies

from typing import Callable, Dict
def f(x : int, y : int): pass

f(123, 456)

#  f(..., 456)

#  {0: f}[0](..., 456)

#  {0: f, 1: lambda: None}[0](..., 456)

#  d : Dict[int, Callable[[int, int], None]]
#  d = {0: f, 1: lambda: None}
#  d[0](..., 456)

You’re working collaboratively with the narrowing mechanism to add coverage…

def f(x) -> int:
    if isinstance(x, int):
        return x + 1
    if isinstance(x, str):
        return len(x)

f(123)
f('...')

If you interact with code that isn’t type-hinted…

def f(x):
    return g(x)

def g(x : int): pass

f(123)
f(...)
def f() -> int:
    return g()

#  def g():
#      return 0

#  def g():
#      return ...
def old_code():
    return 1

def new_code(x : int) -> int:
    return x + old_code()

new_code(123)
#  new_code(123.)
def old_code():
    return 1

def new_code(x : int) -> int:
    y : int = old_code()
    return x + y

new_code(123)
#  new_code(123.)

Additional Mechanisms & Motivation

def f(data, *, mode):
    if mode == 'readonly':
        ...
    elif mode == 'readwrite':
        ...

f(..., 'read-write')
#  f(..., mode='read-write')
from enum import Enum

Mode = Enum('Mode', 'readonly readwrite')

def f(data, *, mode : Mode):
    if mode is Mode.readonly:
        ...
    elif mode is Mode.readwrite:
        ...

#  f(..., mode='read-write')
#  f(..., mode=Mode.read_write)
#  f(..., mode=Mode.readonly)
from typing import Literal

def f(data, *, mode : Literal['readonly', 'readwrite']):
    if mode == 'readonly':
        ...
    elif mode == 'readwrite':
        ...

f(..., mode='readonly')
f(..., mode='read_write')

Sadly, mypy simple cannot encode certain dynamic constructs…

from typing import Literal
from enum import Enum

Mode = Enum('Mode', 'readonly readwrite')
def f(data, *, mode : Literal[(*Mode,)]):
    mode_ = Mode[mode]
    if mode_ is Mode.readonly:
        ...
    elif mode_ is Mode.readwrite:
        ...
def f(data, *, filename=None):
    if filename is not None:
        with open(filename) as f:
            pass

f(...)
f(..., filename=None)
f(..., filename='...')
def f(data, *, filename : None = None):
    if filename is not None:
        with open(filename) as f:
            pass

f(...)
f(..., filename=None)
f(..., filename='...')
from typing import Union

def f(data, *, filename : Union[None, str] = None):
    if filename is not None:
        with open(filename) as f:
            pass

f(...)
f(..., filename=None)
f(..., filename='...')
def f(data) -> int:
    return sum(data)

f([1, 2, 3])
f('1, 2, 3')
f([1., 2., 3.])
from typing import List

def f(data : List[int]) -> int:
    return sum(data)

f([1, 2, 3])
#  f('1, 2, 3')
#  f([1., 2., 3.])

These things can get very ugly in practice…

from decimal import Decimal
from numbers import Number
from typing import Union, List

def f(data : List[int]) -> int:
    return int(sum(data))

#  def f(data : list[Number]) -> int:
#      return int(sum(data))

#  def f(data : list[Union[Decimal, float]]) -> int:
#      return int(sum(data))

f([1, 2, 3])
#  f([1., 2., 3.])
#  f([Decimal('1.'), Decimal('2.'), Decimal('3.')])

What about structures?

def f(t : tuple):
    pass

f(('a', 1, None))
f((None, 1, 'a'))
from typing import Tuple

def f(t : Tuple[str, int, None]):
    pass

f(('a', 1, None))
#  f((None, 1, 'a'))
from typing import Union, List

xs : List[Union[float, complex]] = [1, 2., 3+4j]
for x in xs:
    x + 1
def f(g):
    return g()

f(lambda: None)
f(lambda x: None)
from typing import Callable

def f(g : Callable[[int], None]):
    return g(0)

#  f(lambda: None)
f(lambda x: None)
#  f(lambda x: 123)
#  f(lambda x, y: 123)
from typing import Callable, Protocol

f : Callable[[int], None] = lambda x: None

class Func(Protocol):
    def __call__(self, *args): pass

g : Func = lambda *args: None

Note that this reïnforces our conceptualisation of tuple vs list!

from numpy import dot, array
from numpy.typing import ArrayLike

def f(xs : list, ys : list) -> float:
    return dot(xs, ys)

#  def f(xs : ArrayLike, ys : ArrayLike) -> float:
#      return dot(xs, ys)

f(      [1, 2, 3],        [4, 5, 6])
f(      [1, 2, 3],  array([4, 5, 6]))
f(array([1, 2, 3]), array([4, 5, 6]))
from numpy.typing import ArrayLike
from numpy.random import default_rng
from pandas import date_range, DataFrame, Series
rng = default_rng(0)

def f(x : ArrayLike, y : ArrayLike):
    return x @ y

f(
    x=rng.integers(10, size=(2, 3)),
    y=rng.integers(10, size=(3, 4)),
)

def g(df, s):
    return df['col'] * s

g(
    df=DataFrame({
        'col': rng.integers(10, size=(size := 5))
    }, index=date_range('2000-01-01', periods=size)),
    s=Series(
        data=rng.integers(10, size=(size := 4)),
        index=date_range('2000-01-01', periods=size)
    ),
)
from typing import TypeVar, Generic, Literal

M = TypeVar('M', bound=int)
N = TypeVar('N', bound=int)
P = TypeVar('P', bound=int)

class Matrix(Generic[M, N]):
    def __matmul__(self, other : Matrix[N, P]) -> Matrix[M, P]: pass

xs : Matrix[Literal[2], Literal[3]]
ys : Matrix[Literal[2], Literal[3]]
#  ys : Matrix[Literal[3], Literal[4]]

xs @ ys
from typing import TypeVar, Dict

K = TypeVar('K')
V = TypeVar('V')

def __getitem__(self : Dict[K, V], k : K) -> V:
    return d[k]
from typing import Dict

#  d : Dict[str, str]
d = {}

for k in d: ...
for v in d.values(): ...
for k, v in d.items(): ...

def f(v : str): pass
f(d[k])
from mypy_extensions import TypedDict
from datetime import datetime

Metadata = TypedDict('Metadata', {'author': str, 'published': datetime})

md : Metadata = {
    'author': '...',
    'published': datetime.now(),
}
from mypy_extensions import TypedDict
from datetime import datetime

Metadata = TypedDict('Metadata', {
    'author': str,
    'published': datetime,
    'editor': str,
}, total=False)

md : Metadata = {
    'author': '...',
    'published': datetime.now(),
}
from collections.abc import Iterable

def f(xs : Iterable[int]) -> int:
    return sum(xs)

f([1, 2, 3])
f({1, 2, 3})
from typing_extensions import Protocol

class HasMethod(Protocol):
    def method(self, x : int) -> int: pass

def f(obj : HasMethod): pass

class T:
    pass
    #  def method(self): pass
    #  def method(self, x : int) -> int: pass
obj = T()
f(obj)

Encouraging Good Coding Habits

def fizzbuzz(n):
    rv = []
    for x in range(n):
        if x % 15 == 0:
            rv.append('fizzbuzz')
        elif x % 5 == 0:
            rv.append('buzz')
        elif x % 3 == 0:
            rv.append('fizz')
        else:
            rv.append(x)
    return rv

print(f'{fizzbuzz(20) = }')
from typing import Union, List

def fizzbuzz(n : int) -> List[Union[str, int]]:
    rv = []
    for x in range(n):
        if x % 15 == 0:
            rv.append('fizzbuzz')
        elif x % 5 == 0:
            rv.append('buzz')
        elif x % 3 == 0:
            rv.append('fizz')
        else:
            rv.append(x)
    return rv

print(f'{fizzbuzz(20) = }')
for x in fizzbuzz(20):
    pass
    #  print(f'{x + 1     = }')
    #  print(f'{x.upper() = }')
    #  if isinstance(x, int):
    #      print(f'{x + 1     = }')
    #  elif isinstance(x, str):
    #      print(f'{x.upper() = }')
from typing import List, Tuple

def fizzbuzz(n) -> List[Tuple[str, int]]:
    rv : List[Tuple[str, int]] = []
    for x in range(n):
        if x % 15 == 0:
            rv.append(('fizzbuzz', x))
        elif x % 5 == 0:
            rv.append(('buzz', x))
        elif x % 3 == 0:
            rv.append(('fizz', x))
        else:
            rv.append(('', x))
    return rv

print(f'{fizzbuzz(20) = }')
for w, n in fizzbuzz(20):
    print(f'{n + 1     = }')
    print(f'{w.upper() = }')
from typing import List, Tuple

def fizzbuzz(n) -> List[Tuple[List[str], int]]:
    rv : List[Tuple[List[str], int]] = []
    for x in range(n):
        if x % 15 == 0:
            rv.append((['fizz', 'buzz'], x))
        elif x % 5 == 0:
            rv.append((['buzz'], x))
        elif x % 3 == 0:
            rv.append((['fizz'], x))
        else:
            rv.append(([], x))
    return rv

print(f'{fizzbuzz(20) = }')
for ws, n in fizzbuzz(20):
    if ws:
        print(f'{"".join(ws).upper() = }')
    else:
        print(f'{n + 1               = }')
from typing import List, Tuple

def fizzbuzz(n) -> List[Tuple[List[str], int]]:
    rv : List[Tuple[List[str], int]] = []
    for x in range(n):
        terms = []
        if x % 15 == 0:
            terms.extend(['fizz', 'buzz'])
        elif x % 5 == 0:
            terms.append('buzz')
        elif x % 3 == 0:
            terms.append('fizz')
        rv.append((terms, x))
    return rv
from typing import List, Tuple

def fizzbuzz(n) -> List[Tuple[List[str], int]]:
    rv : List[Tuple[List[str], int]] = []
    for x in range(n):
        terms = []
        if x % 5 == 0:
            terms.append('buzz')
        if x % 3 == 0:
            terms.append('fizz')
        rv.append((terms, x))
    return rv
from typing import Generator, List, Tuple
from itertools import count

def fizzbuzz() -> Generator[Tuple[List[str], int], None, None]:
    for x in count():
        terms = []
        if x % 5 == 0:
            terms.append('buzz')
        if x % 3 == 0:
            terms.append('fizz')
        yield terms, x
from typing import Generator, Tuple, List
from itertools import count

divisors = {3: 'fizz', 5: 'buzz'}
def fizzbuzz() -> Generator[Tuple[List[str], int], None, None]:
    for x in count():
        terms = [w for w, d in divisors.items() if x % d == 0]
        yield terms, x
from typing import Generator, Dict, List, Tuple
from itertools import count

def fizzbuzz(divs : Dict[int, str]) -> Generator[Tuple[List[str], int], None, None]:
    for x in count():
        yield [w for d, w in divs.items() if x % d == 0], x

divisors = {3: 'fizz', 5: 'buzz'}
fizzbuzz(divisors)
from typing import Generator, Dict, List, Tuple, Iterable, Callable
from itertools import count

def fizzbuzz(
    divs : Dict[int, str],
    *,
    count : Callable[[], Iterable[int]] = count,
) -> Generator[Tuple[List[str], int], None, None]:
    for x in count():
        yield [w for d, w in divs.items() if x % d == 0], x

divisors = {3: 'fizz', 5: 'buzz'}
fizzbuzz(divisors)
fizzbuzz(divisors, count=lambda: range(10))
#  fizzbuzz(divisors, count=lambda: (x**2 for x in range(10)))
#  fizzbuzz(divisors, count=lambda: (... for x in range(10)))