ts-python

Seminar III: “Sunset Boulevard” (better understanding plotting with matplotlib)

Betty Schaefer (Nancy Olson): Oh, the old familiar story. You help a timid little soul cross a crowded street, she turns out to be a multimillionaire and leave you all her money.

Joe Gillis (William Holden): That’s the trouble with you readers—you know all the plots.

Sunset Boulevard (1950)

Date Time Track Meeting Link
July 2, 2021 9:30 AM EST Fluent, Effective Visualization Seminar III: “Sunset Boulevard”

Audience

These sessions are designed for a broad audience of modelers and software programmers of all backgrounds and skill-levels.

Our expected audience should comprise attendees with a…

… or greater!

During this session, we will endeavour to guide our audience to developing…

… and we will share additional tips, tricks, and in-depth guidance on all of these topics!

Abstract

Let’s turn our attention to visualizing data with matplotlib and the underlying mechanics and theory of how the library and API are designed.

In this episode, we’ll take a look at matplotlib’s underlying design, tying it to common visualization tasks we want to perform. We’ll look at the underlying conceptual entities involved—e.g., Figure, Axes, subplots, Tick, Patch—to attempt to build a strong conceptual understanding of how matplotlib constructs a visualization. We’ll bridge this conceptual understanding to common customizations we may want to perform, and use it to structure and categorize our knowledge of the multitude of “conceptual entities” encountered when attempting a complex visualization.

To Be Continued…

Did you enjoy this episode? Did you learn something new that will help you as you continue or begin to use matplotlib in your work?

If so, stay tuned for future episodes, which may…

If there are other related topics you’d like to see covered, please reach out to Diego Torres Quintanilla.

Contents

Notes

print("Let's get started.")
print("Let's get started.")
print("Let's get started.")

Via pandas.{DataFrame,Series}.plot or xarray.{DataArray,Dataset}.plot

from numpy import tile, repeat
from numpy.random import default_rng
from pandas import DataFrame, date_range
from pandas.tseries.offsets import Day
from matplotlib.pyplot import show
from string import ascii_lowercase

rng = default_rng(0)

tickers = rng.choice([*ascii_lowercase], size=(5, 4)).view('<U4').ravel()
dates = date_range('2020-07-04', periods=365)
df = DataFrame({
    'date':   repeat(dates, len(tickers)),
    'ticker': tile(tickers, len(dates)),
    'price':  (tile(
        rng.normal(loc=100, scale=50, size=len(tickers)).clip(10),
        len(dates)
    ) + rng.normal(scale=1, size=(len(dates), len(tickers))).cumsum(axis=0).ravel()).clip(0),
}).set_index(['date', 'ticker']).sort_index()

smoothed = DataFrame({
    win: df['price'].groupby('ticker').transform(
        lambda df: df.reset_index('ticker', drop=True).rolling(win, min_periods=1).mean()
    )
    for win in {Day(0), Day(7), Day(31)}
})
smoothed.columns.name = 'smoothing'
print(
    smoothed.head(3),
)

smoothed.unstack().plot()
show()
from matplotlib.pyplot import show
from numpy.random import default_rng
from numpy import linspace, arange, multiply, meshgrid
from xarray import DataArray

rng = default_rng(0)
data = DataArray(
    multiply(*meshgrid(
        ((xs := linspace(0, 10, 10)) - 5)**2,
        ((ys := linspace(0, 10, 10)) - 5)**2,
    )),
    dims=['x', 'y'],
    coords={
        'x': xs,
        'y': ys,
    }
)

print(
    data,
)

#  data.plot.pcolormesh()
#  show()

data.plot.surface()
show()
from numpy import tile, repeat
from numpy.random import default_rng
from pandas import DataFrame, date_range, IndexSlice
from pandas.tseries.offsets import Day
from matplotlib.pyplot import show
from string import ascii_lowercase

rng = default_rng(0)

tickers = rng.choice([*ascii_lowercase], size=(5, 4)).view('<U4').ravel()
dates = date_range('2020-07-04', periods=180)
df = DataFrame({
    'date':   repeat(dates, len(tickers)),
    'ticker': tile(tickers, len(dates)),
    'price':  (tile(
        rng.normal(loc=100, scale=50, size=len(tickers)).clip(10),
        len(dates)
    ) + rng.normal(scale=1, size=(len(dates), len(tickers))).cumsum(axis=0).ravel()).clip(0),
    'volume': (1_000 + rng.integers(-50, 50, size=len(dates) * len(tickers)).cumsum(axis=0).ravel()).clip(0)
}).set_index(['date', 'ticker']).sort_index()

smoothed = DataFrame({
    win: df['price'].groupby('ticker').transform(
        lambda df: df.reset_index('ticker', drop=True).rolling(win, min_periods=1).mean()
    )
    for win in {Day(0), Day(7), Day(31)}
})
smoothed.columns.name = 'smoothing'

#  smoothed.unstack().plot()
#  df[['volume']].unstack().plot.bar()

#  ax = smoothed.unstack().plot()
#  df[['volume']].unstack().plot.bar(ax=ax)

#  ax = smoothed.unstack().plot()
#  df[['volume']].unstack().plot.bar(ax=ax, secondary_y=True)

#  loc = IndexSlice[:, 'evqx', :]
#  smoothed.loc[loc].unstack().plot()
#  df[['volume']].loc[loc].unstack().plot.bar()

#  loc = IndexSlice[:, 'evqx', :]
#  smoothed.loc[loc].plot()
#  df[['volume']].loc[loc].plot.bar()

#  loc = IndexSlice[:, 'evqx', :]
#  ax = smoothed.loc[loc].plot()
#  df[['volume']].loc[loc].plot.bar(ax=ax, secondary_y=True)

loc = IndexSlice[dates[:10], 'evqx', :]
ax = df[['volume']].loc[loc].reset_index('ticker').plot.bar()
smoothed.loc[loc].reset_index('ticker').plot(ax=ax, secondary_y=True)

show()

Via .pyplot

from matplotlib.pyplot import plot, show
from numpy import linspace

xs = linspace(-10, 10, 100)
ys = xs ** 2
plot(xs, ys)

show()
from matplotlib.pyplot import hist, show
from numpy.random import default_rng
rng = default_rng(0)

xs = rng.normal(size=100_000)
hist(xs, bins=100)
show()
from matplotlib.pyplot import hist, show
from numpy.random import default_rng
from numpy import vstack
rng = default_rng(0)

xs = vstack([
    rng.normal(size=10_000),
    rng.normal(size=10_000),
    rng.normal(size=10_000),
]).T

hist(xs, bins=100, stacked=True)
show()
from matplotlib import rcParams
rcParams['text.usetex'] = True

from matplotlib.pyplot import subplots, show, title, suptitle, legend
from numpy import linspace

xs = linspace(-10, 10, 100)
ys = {
    r'parabola: ${\frac{1}{2}x^2}$':       .5 * xs ** 2,
    r'cubic parabola: ${\frac{1}{2}x^3}$': .5 * xs ** 3,
}

fig, axes = subplots(1, 2, constrained_layout=True, sharey=True)
suptitle(r'Polynomials')

for ax, (title_, values) in zip(axes, ys.items()):
    #  title(title_)
    ax.set_title(title_)
    ax.plot(xs, values)

show()
  1. “implicit” API vs “explicit” API
  2. “object-oriented” API vs “function-based” API
  3. “Backend-layer” vs “scripting-layer” vs “Artist-layer”

“Figures” and “Artists”

from matplotlib.pyplot import figure, show
from inspect import getmodule
from numpy import linspace

xs = linspace(-10, 10, 1_000)
ys = xs ** 2

fig = figure()
#  fig = figure(figsize=(1200, 720))
#  fig = figure(dpi=(dpi := 72), figsize=(1200/dpi, 700/dpi))

print(f'{fig            = }')
print(f'{type(fig)      = }')
print(f'{getmodule(fig) = }')
print(f'{dir(fig)       = }')
print(f'{fig.axes       = }')

#  show()
from matplotlib.figure import Figure
from matplotlib.axes import Axes
from matplotlib.pyplot import show

from numpy import linspace

xs = linspace(-10, 10, 1_000)
ys = xs ** 2

fig = Figure()
fig.add_axes([0, 0, 1, 1])

print(f'{fig              = }')
print(f'{fig.axes         = }')
print(f'{fig.axes[0].plot = }')

fig.axes[0].plot(xs, ys)
#  show()
fig.show()
from matplotlib import get_backend, cbook
from matplotlib.figure import Figure
from matplotlib.axes import Axes
from numpy import linspace
from importlib import import_module

xs = linspace(-10, 10, 1_000)
ys = xs ** 2

fig = Figure()
ax = fig.add_subplot(1, 1, 1)
ln = ax.plot(xs, ys)

backend = import_module(cbook._backend_module_name(get_backend()))
FigureCanvas, FigureManager = backend.FigureCanvas, backend.FigureManager
manager = FigureManager(FigureCanvas(fig), 1)

backend.show()
from matplotlib.pyplot import subplots, show
from numpy import linspace

xs = linspace(-10, 10, 1_000)
ys = xs ** 2

_, ax = subplots(1, 1)
ax.plot(xs, ys)
show()
from matplotlib.pyplot import figure, show
from numpy import linspace

xs = linspace(-10, 10, 1_000)
ys = xs ** 2

fig = figure()
ax = fig.subplots(1, 1)
ax.plot(xs, ys)
show()
from matplotlib.pyplot import figure, axes, show
from numpy import linspace

xs = linspace(-10, 10, 1_000)
ys = xs ** 2

fig = figure()
ax = axes()
ax.plot(xs, ys)
show()
from matplotlib.pyplot import figure, show
from numpy import linspace

xs = linspace(-10, 10, 1_000)
ys = xs ** 2

fig = figure()
ax = fig.add_axes([0, 0, 1, 1])
ax.plot(xs, ys)
show()
from matplotlib.pyplot import figure, show
from numpy import linspace

xs = linspace(-10, 10, 1_000)
ys = xs ** 2

fig = figure()
gs = fig.add_gridspec(1, 1)
axs = gs.subplots()
axs.plot(xs, ys)
show()
from matplotlib.pyplot import figure, show
from numpy import linspace

xs = linspace(-10, 10, 1_000)
ys = xs ** 2

fig = figure()
ax = fig.subplots(1, 1)
ax.plot(xs, ys)
print(f'{type(ax)          = }')
print(f'{ax.title          = }')
print(f'{ax.title.set_text = }')
ax.title.set_text('Parabola')

print(f'{ax.xaxis          = }')
print(f'{ax.yaxis          = }')

print(f'{fig.draw          = }')
print(f'{ax.draw           = }')
print(f'{ax.title.draw     = }')
print(f'{ax.xaxis.draw     = }')
print(f'{ax.yaxis.draw     = }')

#  show()

“Artists” and “Patches”

from matplotlib.pyplot import hist
print(f'{help(hist) = }')
from matplotlib.pyplot import figure, show
from matplotlib.patches import Circle

fig = figure()
fig.add_artist(
    #  Circle((0, 0)),
    #  Circle((0, 0), radius=.5),
    Circle((.5, .5), radius=.25),
)

show()
from matplotlib.pyplot import figure, show
from matplotlib.patches import Circle

fig = figure()
axes = fig.subplot_mosaic('''
    ab
    ac
''')

axes['a'].add_patch(
    Circle((.5, .5), radius=.25, color='r')
)
axes['b'].add_patch(
    Circle((.5, .5), radius=.25, color='g')
)
axes['c'].add_patch(
    Circle((.5, .5), radius=.25, color='b')
)

show()
from matplotlib import patches 
from pprint import pprint
pprint({
    name: obj
    for name in dir(patches)
    if isinstance(obj := getattr(patches, name), type) and 
       issubclass(obj, patches.Patch)
})
from matplotlib.text import Text
from matplotlib.patches import Polygon

print(f'{Text            = }')
print(f'{Text.__mro__    = }')

print(f'{Polygon         = }')
print(f'{Polygon.__mro__ = }')
from matplotlib.patches import Patch
help(Patch)

Bringing it All Together

from matplotlib.pyplot import subplots, show
from numpy import linspace

xs = linspace(-10, 10, 100)
ys = xs ** 2

fig, ax = subplots()

ln = ax.plot(xs, ys, label='x²')
print(f'{ln = }')

leg = ax.legend(loc='upper right')
print(f'{leg       = }')
print(f'{dir(leg)  = }')
print(f'{leg.texts = }')

#  ax = ax.twiny()

*_, cont = ax.hist(ys, bins=10)
print(f'{cont = }')

#  print(f'{cont.patches = }')
#  print(f'{cont.patches[2] = }')
#  print(f'{cont.patches[2].fill = }')

cont.patches[2].fill = False
cont.patches[5].fill = False

show()
from matplotlib.pyplot import figure
from numpy import linspace
from subprocess import run

xs = linspace(-10, 10, 100)
ys = xs ** 2

fig = figure()
ax = fig.subplots()
ax.plot(xs, ys)

fig.savefig('/tmp/test.png')
run('eog /tmp/test.png'.split())
from matplotlib.pyplot import figure
from numpy import linspace

xs = linspace(-10, 10, 100)
ys = xs ** 2

fig = figure()
ax = fig.subplots()
ax.plot(xs, ys)

print(f'{fig                      = }')
print(f'{fig.canvas               = }')
print(f'{fig.canvas.figure        = }')
print(f'{fig.canvas.figure is fig = }')
from matplotlib.pyplot import figure, show
from numpy import linspace

xs = linspace(-10, 10, 100)
ys = xs ** 2

fig = figure()
ax = fig.subplots()
ax.plot(xs, ys)

def onclick(event):
    ax.text(event.xdata, event.ydata,
            f'\N{heavy black heart} ({round(event.xdata, 2)}, {round(event.ydata, 2)})')
    fig.canvas.draw()

fig.canvas.mpl_connect('button_press_event', onclick)

show()
from matplotlib.pyplot import figure, show
from numpy import linspace

xs = linspace(-10, 10, 100)
ys = xs ** 2

fig = figure()
ax = fig.subplots()
ax.plot(xs, ys)

hl = ax.axvspan(0, 2, alpha=.5)
annot = ax.annotate('region', (.5, 100))

def onclick(event):
    hl.xy[:2, 0] = event.xdata - 1
    hl.xy[2:, 0] = event.xdata + 1
    hl.xy[-1, 0] = event.xdata - 1
    annot.set_x(event.xdata - .5)
    fig.canvas.draw()
fig.canvas.mpl_connect('button_press_event', onclick)

show()
from numpy import tile, repeat
from numpy.random import default_rng
from pandas import DataFrame, date_range, IndexSlice, Series
from pandas.tseries.offsets import Day
from matplotlib.pyplot import show, figure
from matplotlib.animation import FuncAnimation
from matplotlib.dates import DateFormatter, MonthLocator
from string import ascii_lowercase
from collections import namedtuple

RollingWindow = namedtuple('RollingWindow', 'size')

rng = default_rng(0)

tickers = rng.choice([*ascii_lowercase], size=(5, 4)).view('<U4').ravel()
dates = date_range('2020-07-04', periods=365)
df = DataFrame({
    'date':   repeat(dates, len(tickers)),
    'ticker': tile(tickers, len(dates)),
    'price':  (tile(
        rng.normal(loc=100, scale=50, size=len(tickers)).clip(10),
        len(dates)
    ) + rng.normal(scale=1, size=(len(dates), len(tickers))).cumsum(axis=0).ravel()).clip(0),
}).set_index(['date', 'ticker']).sort_index()

loc = IndexSlice[:, 'evqx', :]
smoothed = DataFrame({
    RollingWindow(win): df.loc[loc]['price'].rolling(win, min_periods=1).mean()
    for win in {Day(0), Day(7), Day(31)}
})

fig = figure()
ax = fig.subplots()
artists = {
    col: art
    for col in smoothed.columns
    for art in ax.plot([], [])
}

frames = Series(smoothed.index).expanding()

def init_func():
    ax.set_xlim(smoothed.index.min(), smoothed.index.max())
    ax.set_ylim(smoothed.min().min(), smoothed.max().max())
    ax.xaxis.set_major_locator(MonthLocator(interval=3))
    ax.xaxis.set_major_formatter(DateFormatter('%b-%Y'))
    return artists.values()

def update(frame):
    for col, art in artists.items():
        art.set_data(frame, smoothed.loc[frame][col])
    return artists.values()
    
anim = FuncAnimation(
    fig,

    update,
    frames=frames,
    init_func=init_func,

    interval=10,
    blit=True,
    repeat=False,
)
show()