# Python @ Two Sigma

## Reproducibility II: Better Notebooks (and Beyond?)
## (a.k.a., Notebook Tricks & Tips)
## (a.k.a., Holiday Grab Bag “Stocking Stuffers”)

In [5]:
%load_ext watermark
%watermark -g -b -v -p pandas,numpy,xarray,networkx -d -u

Last updated: 2021-01-15

Python implementation: CPython
Python version       : 3.8.3
IPython version      : 7.16.1

pandas  : 1.0.5
numpy   : 1.18.5
xarray  : not installed
networkx: 2.4

Git hash: 104ada9451aea5f113bf7160f944c81aa4b50abd

Git branch: main



In [9]:
%%time

from time import sleep
sleep(2)
print('Hello')

Hello
CPU times: user 5.75 ms, sys: 261 µs, total: 6.01 ms
Wall time: 2 s


In [19]:
%%time

from time import sleep
sleep(1)
%time sleep(1)
sleep(1)

CPU times: user 130 µs, sys: 54 µs, total: 184 µs
Wall time: 1 s
CPU times: user 6.61 ms, sys: 974 µs, total: 7.59 ms
Wall time: 3.01 s


In [None]:
%pip install ...
%pycat ...
%loadext ...
%load ...

In [21]:
%%file sample.csv
aapl,100,123.56
goog,200,543.23

Writing sample.csv


In [None]:
# %load sample.csv
aapl,100,123.56
goog,200,543.23


In [27]:
files = !! ls *.csv

In [30]:
for f in files[1:]:
    ...

In [None]:
# %load bls.py
#!/usr/bin/env python3

from collections import namedtuple
from httpx import AsyncClient
from itertools import groupby
from json import dumps
from pandas import DataFrame, to_numeric, Period, Index

from .utils import asyncnullcontext

_fields = '''
series_id
area_type_code
area_code
measure_code
seasonal
srd_code
series_title
footnote_codes
begin_year
begin_period
end_year
end_period
'''

class Series(namedtuple('SeriesBase', _fields)):
    @classmethod
    async def from_url(cls, url, *, client=None):
        ctx = AsyncClient if client is None else asyncnullcontext(client)
        async with ctx() as c:
            res = await c.get(url)
            return cls.from_text(res.text)

    @classmethod
    def from_file(cls, file):
        return cls.from_text(file.read())

    @classmethod
    def from_filename(cls, filename):
        with open(filename) as f:
            return cls.from_file(f)

    @classmethod
    def from_text(cls, text):
        lines = text.splitlines()
        for lineno, line in enumerate(lines, start=1):
            if lineno == 1: continue
            yield cls.from_line(line)

    @classmethod
    def from_line(cls, line):
        (
            series_id,
            area_type_code,
            area_code,
            measure_code,
            seasonal,
            srd_code,
            series_title,
            footnote_codes,
            begin_year,
            begin_period,
            end_year,
            end_period,
        ) = line.split('\t')
        series_id = series_id.strip()
        begin_year = int(begin_year)
        end_year = int(end_year)
        return cls(
            series_id,
            area_type_code,
            area_code,
            measure_code,
            seasonal,
            srd_code,
            series_title,
            footnote_codes,
            begin_year,
            begin_period,
            end_year,
            end_period,
        )

ResultDataFrame = namedtuple('ResultDataFrame', 'series catalog dataframe')
class Result(namedtuple('ResultBase', 'series catalog data')):
    def to_pandas(self):
        raw_df   = DataFrame(self.data)
        years    = to_numeric(raw_df['year'])
        months   = to_numeric(raw_df['period'].str[1:])
        index    = Index([Period(year=y, month=m, freq='M') for y, m in zip(years, months)], name='period')
        data     = to_numeric(raw_df['value'], errors='coerce')
        df       = DataFrame(data)
        df.index = index
        df.index = index
        return ResultDataFrame(self.series, self.catalog, df)

class Response(namedtuple('ResponseBase', 'request response')):
    @classmethod
    def from_respobnse(cls, request, response):
        return cls(request, response.json())

    @property
    def results(self):
        for r in self.response['Results']['series']:
            series = r['seriesID']
            catalog = r['catalog']
            data = r['data']
            yield Result(series, catalog, data)

    def with_client(self, client):
        return self._replace(request=self.request._replace(client=client))

class Request(namedtuple('RequestBase', 'series start end key context')):
    base_url = 'https://api.bls.gov/'
    url = 'https://api.bls.gov/publicAPI/v2/timeseries/data/'
    headers = {
        'Content-type': 'application/json',
    }

    def __new__(cls, series, start, end=None, key=None, context=None):
        if end is None:
            end = start
        series = [*series]
        if context is None:
            context = AsyncClient
        return super().__new__(cls, series, start, end, key, context)

    def with_key(self, key):
        return self._replace(key=key)

    def with_client(self, client):
        return self._replace(context=asyncnullcontext(client))

    @property
    def data(self):
        return {
            'seriesid':        [s.series_id for s in self.series],
            'registrationkey': self.key,
            'catalog':         'true',
            'startyear':        str(self.start),
            'endyear':          str(self.end),
        }

    async def __call__(self):
        async with self.context() as c:
            res = await c.post(self.url, data=dumps(self.data), headers=self.headers)
            return Response.from_response(self, res)

    def chunked(self, *, size=10):
        chunks = groupby(enumerate(self.series), lambda idx_val: idx_val[0] // size)
        for _, ch in chunks:
            yield self._replace(series=[s for _, s in ch])


In [37]:
class A:
    pass

x = A()

In [38]:
x = A()

In [36]:
def func():
    pass

func()

In [63]:
%%writefile lib.py

def func():
    return 1_111_000_000_000 * 2

Overwriting lib.py


In [43]:
%pycat lib.py

In [64]:
from lib import func
func()

2222000000000

In [52]:
from sys import modules
del modules['lib']
'lib' in modules

False

In [56]:
from importlib import reload
import lib; lib = reload(lib); del lib

In [59]:
%load_ext autoreload
%autoreload 2

In [68]:
class A:
    def __init__(self, y, z):
        self.y, self.z = y, z

In [66]:
x = A(10, 20)

In [69]:
isinstance(x, A)

False

In [70]:
def f(x):
    return g(x)

def g(x):
    return h(x)

def h(x):
    return 1 / x

f(0)

ZeroDivisionError: division by zero

In [71]:
%debug

> [0;32m<ipython-input-70-dc527b03cfff>[0m(8)[0;36mh[0;34m()[0m
[0;32m      6 [0;31m[0;34m[0m[0m
[0m[0;32m      7 [0;31m[0;32mdef[0m [0mh[0m[0;34m([0m[0mx[0m[0;34m)[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m----> 8 [0;31m    [0;32mreturn[0m [0;36m1[0m [0;34m/[0m [0mx[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m      9 [0;31m[0;34m[0m[0m
[0m[0;32m     10 [0;31m[0mf[0m[0;34m([0m[0;36m0[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m
ipdb> up
> [0;32m<ipython-input-70-dc527b03cfff>[0m(5)[0;36mg[0;34m()[0m
[0;32m      3 [0;31m[0;34m[0m[0m
[0m[0;32m      4 [0;31m[0;32mdef[0m [0mg[0m[0;34m([0m[0mx[0m[0;34m)[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m----> 5 [0;31m    [0;32mreturn[0m [0mh[0m[0;34m([0m[0mx[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m      6 [0;31m[0;34m[0m[0m
[0m[0;32m      7 [0;31m[0;32mdef[0m [0mh[0m[0;34m([0m[0mx[0m[0;34m)[0m[0;34m:[0m[0;34m[0m[0;34m[0m[

In [72]:
print(1 + 1)

2


In [76]:
1+1

2

In [78]:
In

['',
 '# Python @ Two Sigma',
 "get_ipython().run_line_magic('watermark', '-g -b -v -p pandas,numpy,xarray,networkx -d -u')",
 "get_ipython().run_line_magic('load_ext', 'watermark')\nget_ipython().run_line_magic('watermark', '-g -b -v -p pandas,numpy,xarray,networkx -d -u')",
 "get_ipython().run_line_magic('pip', 'install watermark')",
 "get_ipython().run_line_magic('load_ext', 'watermark')\nget_ipython().run_line_magic('watermark', '-g -b -v -p pandas,numpy,xarray,networkx -d -u')",
 'from time import sleep\nsleep(2)',
 "get_ipython().run_cell_magic('time', '', '\\nfrom time import sleep\\nsleep(2)\\n')",
 'get_ipython().run_cell_magic(\'time\', \'\', "\\nfrom time import sleep\\nsleep(2)\\nprint(\'Hell\')\\n")',
 'get_ipython().run_cell_magic(\'time\', \'\', "\\nfrom time import sleep\\nsleep(2)\\nprint(\'Hello\')\\n")',
 "get_ipython().system(' ls .')",
 "get_ipython().system(' head -n 5 bls.py')",
 "get_ipython().system(' head -n 15 bls.py')",
 "get_ipython().run_line_magic('pycat'

In [81]:
print('Good Afternoon, Jesse! I hope you are looking forward to the weekend!')

Good Afternoon, Jesse! I hope you are looking forward to the weekend!


In [82]:
In

['',
 '# Python @ Two Sigma',
 "get_ipython().run_line_magic('watermark', '-g -b -v -p pandas,numpy,xarray,networkx -d -u')",
 "get_ipython().run_line_magic('load_ext', 'watermark')\nget_ipython().run_line_magic('watermark', '-g -b -v -p pandas,numpy,xarray,networkx -d -u')",
 "get_ipython().run_line_magic('pip', 'install watermark')",
 "get_ipython().run_line_magic('load_ext', 'watermark')\nget_ipython().run_line_magic('watermark', '-g -b -v -p pandas,numpy,xarray,networkx -d -u')",
 'from time import sleep\nsleep(2)',
 "get_ipython().run_cell_magic('time', '', '\\nfrom time import sleep\\nsleep(2)\\n')",
 'get_ipython().run_cell_magic(\'time\', \'\', "\\nfrom time import sleep\\nsleep(2)\\nprint(\'Hell\')\\n")',
 'get_ipython().run_cell_magic(\'time\', \'\', "\\nfrom time import sleep\\nsleep(2)\\nprint(\'Hello\')\\n")',
 "get_ipython().system(' ls .')",
 "get_ipython().system(' head -n 5 bls.py')",
 "get_ipython().system(' head -n 15 bls.py')",
 "get_ipython().run_line_magic('pycat'

In [83]:
Out

{28: ['/bin/zsh: /opt/anaconda3/lib/libtinfo.so.6: no version information available (required by /bin/zsh)',
  'a.csv',
  'b.csv',
  'c.csv',
  'sample.csv'],
 29: ['a.csv', 'b.csv', 'c.csv', 'sample.csv'],
 40: 10,
 44: 10,
 45: 10,
 46: 10,
 47: 10,
 48: 10,
 49: 10,
 50: 10,
 51: True,
 52: False,
 53: 11000,
 57: 111000000,
 60: 111000000,
 62: 1111000000000,
 64: 2222000000000,
 69: False,
 73: 2,
 74: 32,
 75: 62,
 76: 2,
 77: 32,
 78: ['',
  '# Python @ Two Sigma',
  "get_ipython().run_line_magic('watermark', '-g -b -v -p pandas,numpy,xarray,networkx -d -u')",
  "get_ipython().run_line_magic('load_ext', 'watermark')\nget_ipython().run_line_magic('watermark', '-g -b -v -p pandas,numpy,xarray,networkx -d -u')",
  "get_ipython().run_line_magic('pip', 'install watermark')",
  "get_ipython().run_line_magic('load_ext', 'watermark')\nget_ipython().run_line_magic('watermark', '-g -b -v -p pandas,numpy,xarray,networkx -d -u')",
  'from time import sleep\nsleep(2)',
  "get_ipython().run_c

In [100]:
from numpy.random import normal, choice
from string import ascii_lowercase
from pandas import DataFrame

df = DataFrame({
    'ticker': choice([*ascii_lowercase], size=(10, 4)).view('<U4').ravel(),
    'price': normal(size=10),
})
df

Unnamed: 0,ticker,price
0,gxam,-1.252182
1,odhu,0.105944
2,otcz,-0.050938
3,hcpz,0.288723
4,srqd,-1.066613
5,vpqq,-0.929788
6,vnid,1.043725
7,bykl,-0.830025
8,gbbg,0.627247
9,akhn,-1.004481


In [95]:
del df

In [96]:
from numpy.random import normal
normal(size=10_000_000)

array([-1.74741923,  1.83171975,  0.30292035, ...,  0.06732917,
        0.37078384, -1.46130829])

In [97]:
Out[96].nbytes

80000000

In [101]:
%xdel df

In [102]:
Out.clear()