ts-python

How do I make Matplotlib look good?

Date: Friday, Oct 4, 2024 at 09:30 US/Eastern

Topics: matpltolib

Communicate your insights with stunning charts in Matplotlib.

Creating a data visualization is easy, but crafting one that effectively communicates a message and looks great requires deliberate refinement.

Join us for our seminar, “How do I make Matplotlib look good?” where you’ll master Matplotlib’s API and learn how to create charts that impress. We’ll guide you through the essential mechanics of chart building, ensuring your visualizations are not only consistent and aesthetic but also intuitive. Additionally, we’ll explore classic data visualization principles, like the “data-to-ink” ratio, and demonstrate how to apply these concepts in Matplotlib to craft charts that deliver a clear and concise message.

Don’t miss this opportunity to elevate the way you share insights.

pip install pandas numpy scipy matplotlib

Notes

How do I make Matplotlib look good?

from matplotlib.pyplot import figure, show, rc
from textwrap import fill

fig = figure()
fig.text(.4, .5, "How do I make Matplotlib look good?", size=40, ha='center')
fig.text(.4, .45, "Cameron Riddell", size=20, ha='center')

show()
fig.savefig('images/welcome.png', bbox_inches='tight')

Matplotlib is Object Oriented

Primitives

Figure → Axes (often organized into columns/rows called subplots)

Don’t take my word for it, here’s the documentation

# adapted from https://matplotlib.org/stable/gallery/showcase/anatomy.html

import matplotlib.pyplot as plt
import numpy as np

from matplotlib.patches import Circle
from matplotlib.patheffects import withStroke
from matplotlib.ticker import AutoMinorLocator, MultipleLocator

royal_blue = [0, 20/256, 82/256]


np.random.seed(19680801)

X = np.linspace(0.5, 3.5, 100)
Y1 = 3+np.cos(X)
Y2 = 1+np.cos(1+X/0.75)/2
Y3 = np.random.uniform(Y1, Y2, len(X))

fig, ax = plt.subplots()

ax.xaxis.set_major_locator(MultipleLocator(1.000))
ax.xaxis.set_minor_locator(AutoMinorLocator(4))
ax.yaxis.set_major_locator(MultipleLocator(1.000))
ax.yaxis.set_minor_locator(AutoMinorLocator(4))
ax.xaxis.set_minor_formatter("{x:.2f}")

ax.set_xlim(0, 4)
ax.set_ylim(0, 4)

ax.tick_params(which='major', width=1.0, length=10, labelsize=14)
ax.tick_params(which='minor', width=1.0, length=5, labelsize=10,
               labelcolor='0.25')

ax.grid(linestyle="--", linewidth=0.5, color='.25', zorder=-10)

ax.plot(X, Y1, c='C0', lw=2.5, label="Blue signal", zorder=10)
ax.plot(X, Y2, c='C1', lw=2.5, label="Orange signal")
ax.plot(X[::3], Y3[::3], linewidth=0, markersize=9,
        marker='s', markerfacecolor='none', markeredgecolor='C4',
        markeredgewidth=2.5)

ax.set_title("Anatomy of a figure", fontsize=20, verticalalignment='bottom')
ax.set_xlabel("x Axis label", fontsize=14)
ax.set_ylabel("y Axis label", fontsize=14)
ax.legend(loc="upper right", fontsize=14)


# Annotate the figure

def annotate(x, y, text, code):
    # Circle marker
    c = Circle((x, y), radius=0.15, clip_on=False, zorder=10, linewidth=2.5,
               edgecolor=royal_blue + [0.6], facecolor='none',
               path_effects=[withStroke(linewidth=7, foreground='white')])
    ax.add_artist(c)

    # use path_effects as a background for the texts
    # draw the path_effects and the colored text separately so that the
    # path_effects cannot clip other texts
    for path_effects in [[withStroke(linewidth=7, foreground='white')], []]:
        color = 'white' if path_effects else royal_blue
        t1 = ax.text(x, y-0.2, text, zorder=100,
                ha='center', va='top', weight='bold', color=color,
                style='italic', fontfamily='monospace',
                path_effects=path_effects, backgroundcolor='white')

        color = 'white' if path_effects else 'black'
        t2 = ax.annotate(
                code,
                xy=(0.5, 0), xycoords=t1,
                xytext=(0, -5), textcoords='offset points',
                zorder=100,
                ha='center', va='top', weight='normal', color=color,
                fontfamily='monospace', fontsize='medium',
                path_effects=path_effects, backgroundcolor='white')
    return c, t1, t2

def gen():
    yield annotate(4, 4.5, "Figure", "plt.figure")
    yield annotate(2.5, 0.55, "Axes", "fig.subplots")
    yield annotate(4.0, 0.7, "Spine", "ax.spines")
    yield annotate(0.65, 0.01, "x Axis", "ax.xaxis")
    yield annotate(0, 0.36, "y Axis", "ax.yaxis")
    yield annotate(3.00, 3.00, "Grid", "ax.grid")

from matplotlib.pyplot import waitforbuttonpress
for c, *_ in gen():
    c.set_edgecolor('red')
    waitforbuttonpress()
    c.remove()

fig.savefig('images/anatomyoffigure.png', bbox_inches='tight')

Setting Parameters: rcParams & Object Orientation

from matplotlib.pyplot import rcParams, rc, subplots, show
from pprint import pprint

# pprint(rcParams)

# # Set default parameters
# rc('font', size=30)
# rcParams['font.size'] = 30

# rc('axes.spines', right=False, top=False)

fig, ax = subplots()
ax.set_title('This is My Title', size=30)

ax.spines[['right', 'top']].set_visible(False)

# # Programatically set all text to 2× the base
# from matplotlib.text import Text
# for t in fig.findobj(Text):
#     t.set_size(t.get_size() * 2)

show()

Aside: Ticks & Tickformatting

from functools import partial
from flexitext import flexitext
from textwrap import fill
from numpy import pi, sin, cos, linspace, round as np_round
from matplotlib.ticker import (
    AutoLocator, FixedLocator, LinearLocator,
    MaxNLocator, MultipleLocator, IndexLocator,
)
from matplotlib.pyplot import subplots, show, rc, waitforbuttonpress
from matplotlib.transforms import IdentityTransform, blended_transform_factory

rc('font', size=18)
rc('axes.spines', top=False, right=False, left=False)

def twinner(orig_ax):
    yield orig_ax
    while True:
        child_ax = orig_ax.twiny()
        child_ax.set_xlim(*orig_ax.get_xlim())
        yield child_ax

locators = [
    partial(AutoLocator),
    partial(FixedLocator, [0, .5*pi, 2*pi]),
    partial(LinearLocator, 5),
    partial(MaxNLocator, 5),
    partial(MultipleLocator, pi, .5*pi),
]

xs = linspace(0, 2*pi, 200)
fig, ax = subplots(gridspec_kw={'bottom': .6})
ax.plot(xs, sin(xs), label='sin', lw=3)
ax.plot(xs, cos(xs), label='cos', lw=3)
ax.yaxis.set_tick_params(labelleft=False, left=False, labelright=True)
ax.yaxis.set_major_locator(MultipleLocator(1))
ax.margins(y=.1)

ax.set_title('Fixed & Programmatic Tick Location in matplotlib', loc='left', weight='semibold')
offset = 10
bottom = ax.get_tightbbox().y0
t = twinner(ax)
for loc, curax, in zip(locators, twinner(ax)):
    curax.xaxis.set_major_locator(loc()) # AutoLocator, FixedLocator, LinearLocator, ...
    # curax.xaxis.set_minor_locator(loc()) # AutoLocator, FixedLocator, LinearLocator, ...

    curax.xaxis.set_major_formatter(lambda x, pos: fr'{x/pi:.2f}$\pi$')
    curax.tick_params(top=False, labeltop=False, labelbottom=True, bottom=True)

    args = tuple(type(arg)(np_round(arg, 2).tolist()) for arg in loc.args)
    docs = fill(
        ' '.join(loc.func.__doc__.strip().split('.')[0].split()), width=120
    )
    text = f'<color: red>{loc.func.__name__}{args!r}\n</><size: small>{docs}.</>'
    curax.xaxis.set_label_position('bottom')
    curax.spines['bottom'].set_position(('outward', offset))
    bbox = flexitext(
        x=.01, y=0, s=text, ax=curax, va='bottom',
    )
    bbox.xycoords = bbox.boxcoords = (curax.transAxes, curax.spines['bottom'].get_spine_transform())
    offset += 90
    waitforbuttonpress()

fig.savefig('images/ticklocators_formatters.png', bbox_inches='tight')

Matplotlib Has Coordinates

Coordinate Spaces: values → … → screen

from pandas import DataFrame, date_range
from numpy.random import default_rng

from matplotlib.pyplot import subplots, show, figure
from matplotlib.transforms import blended_transform_factory

rng = default_rng(0)
df = DataFrame(
    index=(dates := date_range('2000-01-01', periods=365)),
    data={
        'Store A': 9_000 + rng.normal(1,   scale=10, size=len(dates)).cumsum(),
        'Store B': 8_000 + rng.normal(5,   scale=20, size=len(dates)).cumsum(),
        'Store C': 10_000 + rng.normal(-.2, scale=30, size=len(dates)).cumsum(),
    }
)

fig = figure()

## add_axes          (Figure proportional coordinates)
#                   x   y   w   h
ax = fig.add_axes([.1, .1, .5, .8])
## plotting data     (data coordinates)
for label, s in df.items():
    ax.plot(s.index, s, label=label)

## place legend      (Axes proportional coordinates)
# ax.legend(
#     loc='upper left',
#     bbox_to_anchor=(1, .9)
# )

## place annotation  (Axes proportional coordinates, Figure points, Axes data, blended)
from pandas import Timestamp

ax.transAxes # proportional coordinate of the Axes
ax.transData # data coordinate of the Axes

ax.annotate(
    'hello world',
    xy=(Timestamp('2000-11-15'), 8900),
    xytext=(1, .6), textcoords=ax.transAxes,
    arrowprops=dict(facecolor='black', shrink=.05),
)

show()

Conceptual Review

  1. Matplotlib is Object Oriented and hierarchical
  2. Has default settings (rcParams) and Artist specific settigns (OO)
    • x/y tick positions/labels are set via Locators and Formatters
  3. Uses various coordinate systems to place things onto the Figure

But, How do I actually Make it Look Better?

Eliminate Chart Junk — Edward Tufte.

data-to-ink ratio

from matplotlib.pyplot import subplots, show, rc
from pandas import DataFrame

df = DataFrame({
    'city': ['San Francisco', 'Los Angeles', 'Austin', 'Chicago', 'Baltimore', 'New York City'],
    'temperature': [70, 90, 91, 62, 68, 58],
})

fig, ax = subplots()
ax.set_title('Austin is the Hottest City in the US', loc='left')

ax.set_xlabel('Temperature (°F)')
ax.set_ylabel('City')

bc = ax.barh(df['city'], df['temperature'])

ax.yaxis.set_tick_params(width=2, length=5)
ax.yaxis.grid()
ax.xaxis.grid()
fig.savefig('images/clutteredmess.png', bbox_inches='tight')

def improvements():
    title_kwargs = dict(weight='bold', bbox={'facecolor': 'yellow'}, size='xx-large', va='bottom')
    yield

    ax.set_title('', loc='left')
    ax.set_title('Remove Grid', **title_kwargs)
    ax.grid(axis='both', visible=False) # update the grid
    # ax.xaxis.grid(visible=False) # update the grid
    # ax.yaxis.grid(visible=False) # update the grid

    yield

    ax.set_title('Inline (or remove)-X/Y Labels', **title_kwargs)
    # ax.yaxis.set_axis_label
    ax.set_ylabel('') # remove xaxis label
    ax.set_xlabel('') # remove yaxis label

    # update xaxis ticklabel formatting
    ax.xaxis.set_major_formatter(lambda x, pos: f'{x:g}°F')

    yield

    ax.set_title('Remove Axes Spines', **title_kwargs)
    # ax.spines[['top', 'left', 'right', 'bottom']].set_visible(False) # remove spines
    ax.spines[:].set_visible(False) # remove spines

    yield

    ax.set_title('Reduce Margins', **title_kwargs)
    ax.margins(y=0) # reduce extra whitespace within the Axes along the y-axis

    yield

    ax.set_title('Remove Tick Markers', weight='bold', bbox={'facecolor': 'yellow'}, size='xx-large', va='bottom')
    ax.yaxis.set_tick_params(left=False)   # remove tick markers on yaxis
    ax.xaxis.set_tick_params(bottom=False) # remove tick markers on xaxis
    # ax.tick_params

    yield

    ax.set_title('Sort by Data (for Nominal categories)', **title_kwargs)
    # typically one would do this before the initial plotting, and simply use
    # ax.bar('temperature', 'city', data=df.sort_values('temperature'))
    # however I am updating the bars inplace, so need to reach a bit deeper
    plot_df = df.sort_values('temperature')
    ax.set_yticks(range(len(plot_df)), plot_df['city'])
    for rect, value in zip(ax.containers[0], plot_df['temperature']):
        rect.set_width(value)

    yield

    ax.set_title('Simplify Visual Comparison', **title_kwargs)
    # add bars that exist underneath the currently displayed bars.
    ax.barh(df['city'], df['temperature'].max(), color='gainsboro', zorder=0)

    yield

    ax.set_title('Inline Data Labels (optional)', **title_kwargs)
    # one may also use `Axes.bar_label` as a shorthand, but this approach is much more flexible
    ax.xaxis.set_tick_params(labelbottom=False)
    for rect in ax.containers[0]:
        ax.annotate(
            f'{rect.get_width():g}°F',
            xy=(1, .5), xycoords=rect,
            xytext=(-5, 0), textcoords='offset points',
            ha='right',
            va='center',
            size='large',
            color='white'
        )

    yield


from matplotlib.pyplot import waitforbuttonpress
fig.subplots_adjust(left=.15)
for _ in improvements():
    waitforbuttonpress()
fig.savefig('images/cleansimple.png', bbox_inches='tight')

Intentionality: Context & Guided Attention

from numpy.random import default_rng
from pandas import DataFrame, date_range
rng = default_rng(0)

df = DataFrame(
    index=(index := date_range('2000', periods=600, freq='D')),
    data={
        'A': 70 * rng.normal(1,       .01, size=index.size).cumprod(),
        'B': 90 * rng.normal(1,       .05, size=index.size).cumprod(),
        'C': 82 * rng.normal(1,       .02, size=index.size).cumprod(),
        'D': 77 * rng.normal(1.001,   .01, size=index.size).cumprod(),
        'E': 77 * rng.normal(1,       .01, size=index.size).cumprod(),
    }
)

from matplotlib.pyplot import subplots, rc, show
from flexitext import flexitext

fig, ax = subplots()
ax.spines[['top', 'right']].set_visible(False)

# df.plot(ax=ax)

## Removal of legend & focused title
## Color & Width to guide attention
ax.margins(x=0)
for label, s in df.items():
    color, lw = 'darkgray', 1
    if label == 'D':
        color, lw = 'tab:red', 2

    line, = ax.plot(s.index, s, label=label, color=color, lw=lw)

    ax.annotate(
        label,
        xy=(s.index[-1], s.iloc[-1]),
        xytext=(5, 0), textcoords='offset points',
        va='center',
        color=line.get_color(),
        size='large',
    )

from flexitext import flexitext
flexitext(
    s='<size:x-large>Store <color:tab:red>D</> outperforms all other</>',
    x=0, y=1, va='bottom', ax=ax
)

from itertools import islice, pairwise
from matplotlib.dates import DateFormatter, MonthLocator, ConciseDateFormatter
ax.xaxis.set_major_locator(MonthLocator())
ax.xaxis.set_minor_locator(loc := MonthLocator(bymonthday=15, interval=2))
ax.xaxis.set_minor_formatter(
    ConciseDateFormatter(loc, formats=['%b\n%Y', '%b', '%d', '%H:%M', '%H:%M', '%S.%f'])
)
ax.xaxis.set_tick_params(which='both', bottom=False)
ax.xaxis.set_tick_params(which='major', labelbottom=False)

for left, right in islice(pairwise(ax.get_xticks()), 0, None, 2):
    ax.axvspan(left, right, 0, 1, color='gainsboro', alpha=.3, zorder=0)


show()
fig.savefig('images/focused.png', bbox_inches='tight')

Review

Matplotlib Mechanics

  1. Matplotlib is Object Oriented and hierarchical
  2. Has default settings (rcParams) and Artist specific settigns (OO)
    • x/y tick positions/labels are set via Locators and Formatters
  3. Uses various coordinate systems to place things onto the Figure

Data Viz Concepts

  1. Minimalism
    • Remove unnecessary redundancies (obvious labels, repeated unimportant features)
    • Remove superfluous aesthetics (usually spines and tick markers)
    • Legends increase working memory capacity
  2. Don’t overuse color → 3-4 colors maximum for unique features.
    • Color vs non-color is a VERY effective way to guide attention
    • Be consistent with your colors, use them in your title, plot, and annotations
    • “Other” categories are your friend.
  3. Know your Audience
    • Don’t share statistical outputs with non-technical audiences
    • Transformed metrics can both help and hurt
    • Aggregated metrics should be avoided unless presenting “all” the data is visually overwhelming