ts-python

What’s New in pandas 2.0?

Date: Friday, May 31st, 2024 at 09:30 AM US/Eastern

Topics: pandas 2.0, Python

Materials

Recently Upgrade to the latest version of pandas?

The pandas 2 changelog consists of over 2000 lines of text, code, and bullet points. While the largest changes revolve around the introduction to the PyArrow backend, there are also a plethora of bug fixes, backwards incompatible changes, deprecations, and much more to discuss. With all of the new features and updates it is hard to stay up-to-date with pandas best practices without reading the changelog yourself.

Thankfully, we’ve done that reading for you and have distilled the most important updates and where they will impact your day-to-day work. Join us for “What’s New in pandas 2” to keep up with the best practices in the most veteran DataFrame library in the Python ecosystem.

Notes

print("Let's take a look!")

Setup

import pandas; assert pandas.__version__ == '1.3.5'

print("Let's take a look!")
import pandas; assert pandas.__version__ == '1.4.4'

print("Let's take a look!")
import pandas; assert pandas.__version__ == '1.5.3'

print("Let's take a look!")
import pandas; assert pandas.__version__ == '2.0.3'

print("Let's take a look!")
import pandas; assert pandas.__version__ == '2.1.4'

print("Let's take a look!")
import pandas; assert pandas.__version__ == '2.2.2'

print("Let's take a look!")

Deprecations

.is_monotonic.is_monotonic_increasing, .is_monotonic_decreasing

Many deprecation warnings have now become errors.

import pandas; assert pandas.__version__ == '1.5.3'

from pandas import Index

idx = Index([0, 10, 200, 3_000])

print(f'{idx.is_monotonic = }')
from pandas import Series
from numpy.random import default_rng

rng = default_rng(0)

s = Series(
    # index=(idx := [0, 10, 200, 3_000]),
    # index=(idx := [0, 10, 3_000, 200]),
    # index=(idx := [0, 10, 200, 200, 3_000]),
    index=(idx := [0, 10, 200, 3_000, 200]),
    data=rng.integers(-10, +10, size=len(idx)),
)

assert s.index.is_monotonic_increasing

print(
    s.loc[10:200]
)
import pandas; assert pandas.__version__ == '2.0.3'

from pandas import Index

idx = Index([0, 10, 200, 3_000])

print(
    # f'{idx.is_monotonic = }',
    f'{idx.is_monotonic_increasing = }',
    f'{idx.is_monotonic_decreasing = }',
    sep='\n',
)

.iteritems.items

import pandas; assert pandas.__version__ == '1.5.3'

from pandas import Series, DataFrame, date_range
from numpy.random import default_rng

rng = default_rng(0)

s = Series(
    index=(idx := date_range('2020-01-01', periods=3)),
    data=rng.integers(-10, +10, size=len(idx)),
)

df = DataFrame(
    index=(idx := date_range('2020-01-01', periods=3)),
    data={
        'a': rng.integers(-10, +10, size=len(idx)),
        'b': rng.integers(-10, +10, size=len(idx)),
    }
)

print(
    *s.iteritems(),
    sep='\n',
    end='\n{}\n'.format('\N{box drawings light horizontal}' * 40),
)
print(
    *df.iteritems(),
    sep='\n',
    end='\n{}\n'.format('\N{box drawings light horizontal}' * 40),
)
import pandas; assert pandas.__version__ == '2.2.2'

from pandas import Series, DataFrame, date_range
from numpy.random import default_rng

rng = default_rng(0)

s = Series(
    index=(idx := date_range('2020-01-01', periods=3)),
    data=rng.integers(-10, +10, size=len(idx)),
)

df = DataFrame(
    index=(idx := date_range('2020-01-01', periods=3)),
    data={
        'a': rng.integers(-10, +10, size=len(idx)),
        'b': rng.integers(-10, +10, size=len(idx)),
    }
)

print(
    # *s.iteritems(),
    *s.items(),
    sep='\n',
    end='\n{}\n'.format('\N{box drawings light horizontal}' * 40),
)
print(
    # *df.iteritems(),
    *df.items(),
    sep='\n',
    end='\n{}\n'.format('\N{box drawings light horizontal}' * 40),
)
import pandas; assert pandas.__version__ == '1.5.3'

from pandas import Series, DataFrame, date_range
from numpy.random import default_rng
from warnings import catch_warnings, simplefilter

rng = default_rng(0)

s0 = Series(
    index=(idx := date_range('2020-01-01', periods=3)),
    data=rng.integers(-10, +10, size=len(idx)),
)

s1 = Series(
    index=(idx := date_range('2020-01-02', periods=3)),
    data=rng.integers(-10, +10, size=len(idx)),
)

with catch_warnings():
    simplefilter('ignore')
    for k, v in s0.iteritems():
        if k not in s1: continue
        if v % 2 == 0:
            s1.loc[k] += v ** 2
        else:
            s1.loc[k] += v ** 3

print(
    s0,
    s1,
    sep='\n',
)
import pandas; assert pandas.__version__ == '1.5.3'

from pandas import Series, date_range
from numpy.random import default_rng
from numpy import where

rng = default_rng(0)

s0 = Series(
    index=(idx := date_range('2020-01-01', periods=3)),
    data=rng.integers(-10, +10, size=len(idx)),
)

s1 = Series(
    index=(idx := date_range('2020-01-02', periods=3)),
    data=rng.integers(-10, +10, size=len(idx)),
)

idx = s0.index.intersection(s1.index)
s1.loc[idx] += Series(
    index=s0.index,
    data=where(s0 % 2 == 0, s0 ** 2, s0 ** 3),
).loc[idx]

print(
    s0,
    s1,
    sep='\n',
)

.apply

from pandas import Series, date_range
from numpy.random import default_rng
from numpy import where, abs as np_abs
from _utils import timed

rng = default_rng(0)

s = Series(
    index=(idx := date_range('2020-01-01', periods=90*24*60*60, freq='s')),
    data=rng.integers(-10, +10, size=len(idx))
)

with timed('abs(...)'):
    abs(s)
with timed('.apply(abs)'):
    s.apply(abs)
with timed('.apply("abs")'):
    s.apply("abs")
with timed('.apply(np_abs)'):
    s.apply(np_abs)
with timed('.apply(lambda x: abs(x))'):
    s.apply(lambda x: abs(x))
with timed('[abs(x) for x in s]'):
    [abs(x) for x in s]

print(
    f'{len(s) = :,}',
    sep='\n',
)
import builtins

print(
    f'{builtins.abs        = }',
    f'{abs is builtins.abs = }',
    sep='\n',
)
import builtins
from functools import wraps
from inspect import isbuiltin
from logging import getLogger, basicConfig, INFO

logger = getLogger(__name__)
basicConfig(level=INFO)

@lambda f: [setattr(builtins, f.__name__, rv := f(getattr(builtins, f.__name__))), rv][-1]
def abs(abs):
    @wraps(abs)
    def inner(*args, **kwargs):
        rv = abs(*args, **kwargs)
        logger.info(f'<wrapped abs>(*%r, **%r) ⇒ %r', args, kwargs, rv)
        return rv
    return inner

print(
    f'{abs(123) = }',
    f'{abs is builtins.abs = }',
    f'{isbuiltin(abs) = }',
    sep='\n',
)
import pandas; assert pandas.__version__ == '2.1.0'

from pandas import Series, date_range
from numpy.random import default_rng
from numpy import where, abs as np_abs
from _utils import timed

rng = default_rng(0)

s = Series(
    index=(idx := date_range('2020-01-01', periods=90*24*60*60, freq='s')),
    data=rng.integers(-10, +10, size=len(idx))
)

with timed('abs(...)'):
    abs(s)
with timed('.apply(abs, by_row=False)'):
    s.apply(abs, by_row=False)
with timed('.apply("abs", by_row=False)'):
    s.apply("abs", by_row=False)
with timed('.apply(np_abs, by_row=False)'):
    s.apply(np_abs, by_row=False)
with timed('.apply(lambda x: abs(x), by_row=False)'):
    s.apply(lambda x: abs(x), by_row=False)
with timed('[abs(x) for x in s]'):
    [abs(x) for x in s]

print(
    f'{len(s) = :,}',
    sep='\n',
)
import pandas; assert pandas.__version__ == '2.0.3'

from pandas import Series, date_range
from numpy.random import default_rng
from numpy import where, abs as np_abs
from _utils import timed

rng = default_rng(0)

s = Series(
    index=(idx := date_range('2020-01-01', periods=90*24*60*60, freq='s')),
    data=rng.integers(-10, +10, size=len(idx))
)

s.apply(abs, by_row=False)

print(
    f'{len(s) = :,}',
    sep='\n',
)
import pandas; assert pandas.__version__ == '2.0.3'

from pandas import Series, date_range
from numpy.random import default_rng
from numpy import where, abs as np_abs
from _utils import timed

rng = default_rng(0)

s = Series(
    index=(idx := date_range('2020-01-01', periods=24*60*60, freq='s')),
    data=rng.integers(-10, +10, size=len(idx))
)

def f(x, *, mode=True, by_row=...):
    return x**2 if mode else x**3

print(
    f'{len(s) = :,}',
    s.apply(f, mode=True),
    s.apply(f, mode=False),
    sep='\n{}\n'.format('\N{box drawings light horizontal}' * 40),
)
import pandas; assert pandas.__version__ == '2.2.2'

from pandas import DataFrame, date_range
from numpy.random import default_rng
from numpy import where, abs as np_abs
from _utils import timed

rng = default_rng(0)

df = DataFrame(
    index=(idx := date_range('2020-01-01', periods=90*24*60*60, freq='s')),
    data={
        'a': rng.integers(-10, +10, size=len(idx)),
        'b': rng.normal(size=len(idx)),
    },
)

with timed('abs(...)'):
    abs(df)
with timed('.apply(abs)'):
    df.apply(abs)
with timed('.apply("abs")'):
    df.apply("abs")
with timed('.apply(np_abs)'):
    df.apply(np_abs)
with timed('.apply(lambda x: abs(x))'):
    df.apply(lambda x: abs(x))
with timed('[abs(df[x]) for x in df]'):
    [abs(df[x]) for x in df]

print(
    f'{len(df) = :,}',
    sep='\n',
)
import pandas; assert pandas.__version__ == '2.2.2'

from pandas import DataFrame, date_range
from numpy.random import default_rng
from numpy import empty_like
from _utils import timed

rng = default_rng(0)

df = DataFrame(
    index=(idx := date_range('2020-01-01', periods=30*24*60*60, freq='s')),
    data={
        'a': rng.integers(-10, +10, size=len(idx)),
        'b': rng.normal(size=len(idx)),
    },
)

def f(s):
    rv = empty_like(s)
    for idx, x in enumerate(s):
        if x > 0:
            rv[idx] = x ** 2
        else:
            rv[idx] = x ** 3
    return rv

with timed('.apply(f)'):
    df.apply(f)
with timed('.apply(lambda x: f(x))'):
    df.apply(lambda x: f(x))
# with timed(".apply(f, engine='numba')"):
#     df.apply(f, engine='numba')
# with timed(".apply(f, engine='numba', raw=True)"):
#     df.apply(f, engine='numba', raw=True)
# with timed(".apply(f, engine='numba', raw=True, engine_kwargs={'parallel': True})"):
#     df.apply(f, engine='numba', raw=True, engine_kwargs={'parallel': True})

print(
    f'{len(df) = :,}',
    sep='\n',
)

numeric_only

import pandas; assert pandas.__version__ == '1.5.3'

from pandas import DataFrame, date_range
from numpy.random import default_rng
from string import ascii_lowercase
from _utils import timed

rng = default_rng(0)

df = DataFrame(
    index=(idx := date_range('2020-01-01', periods=7*24*60*60, freq='s')),
    data={
        'a': rng.integers(-10, +10, size=len(idx)),
        'b': rng.normal(size=len(idx)),
        'c': rng.choice([*ascii_lowercase], size=len(idx)),
    },
)

# with timed('.sum()'):
#     df.sum()
with timed('.sum(numeric_only=True)'):
    df.sum(numeric_only=True)

print(
    f'{len(df) = :,}',
    # df.sum(),
    df.sum(numeric_only=True),
    sep='\n{}\n'.format('\N{box drawings light horizontal}' * 40)
)
import pandas; assert pandas.__version__ == '2.2.2'

from pandas import DataFrame, date_range
from numpy.random import default_rng
from string import ascii_lowercase
from _utils import timed

rng = default_rng(0)

df = DataFrame(
    index=(idx := date_range('2020-01-01', periods=7*24*60*60, freq='s')),
    data={
        'a': rng.integers(-10, +10, size=len(idx)),
        'b': rng.normal(size=len(idx)),
        'c': rng.choice([*ascii_lowercase], size=len(idx)),
    },
)

with timed('.sum()'):
    df.sum()
with timed('.sum(numeric_only=True)'):
    df.sum(numeric_only=True)

print(
    f'{len(df) = :,}',
    # df.sum(),
    # df.sum(numeric_only=True),
    sep='\n{}\n'.format('\N{box drawings light horizontal}' * 40)
)
import pandas; assert pandas.__version__ == '2.2.2'

from pandas import DataFrame, date_range
from numpy.random import default_rng
from string import ascii_lowercase
from _utils import timed

rng = default_rng(0)

df = DataFrame(
    index=(idx := date_range('2020-01-01', periods=24*60*60, freq='s')),
    data={
        'a': rng.integers(-10, +10, size=len(idx)),
        'b': rng.normal(size=len(idx)),
        'c': rng.choice([*ascii_lowercase], size=len(idx)),
    },
)

with timed('.mean()'):
    df.mean()

print(
    f'{len(df) = :,}',
    df.mean(),
    sep='\n{}\n'.format('\N{box drawings light horizontal}' * 40)
)
import pandas; assert pandas.__version__ == '1.5.3'

from pandas import DataFrame, date_range
from numpy.random import default_rng
from string import ascii_lowercase
from _utils import timed

rng = default_rng(0)

df = DataFrame(
    index=(idx := date_range('2020-01-01', periods=24*60*60, freq='s')),
    data={
        'a': rng.integers(-10, +10, size=len(idx)),
        'b': rng.normal(size=len(idx)),
        'c': rng.choice([*ascii_lowercase], size=len(idx)),
    },
)

with timed('.mean()'):
    df.mean()

print(
    f'{len(df) = :,}',
    df.mean(),
    sep='\n{}\n'.format('\N{box drawings light horizontal}' * 40)
)
import pandas; assert pandas.__version__ == '2.2.2'

from pandas import DataFrame, date_range
from numpy.random import default_rng
from string import ascii_lowercase
from _utils import timed

rng = default_rng(0)

df = DataFrame(
    index=(idx := date_range('2020-01-01', periods=24*60*60, freq='s')),
    data={
        'a': rng.integers(-10, +10, size=len(idx)),
        'b': rng.normal(size=len(idx)),
        'c': rng.choice([*ascii_lowercase], size=len(idx)),
    },
)

with timed('.mean(numeric_only=True)'):
    df.mean(numeric_only=True)

with timed('[[…]].mean()'):
    df[['a', 'b']].mean()

with timed(".select_dtypes(include='number').mean()"):
    df.select_dtypes(include='number').mean()

print(
    f'{len(df) = :,}',
    # df.mean(numeric_only=True),
    # df[['a', 'b']].mean(),
    # df.select_dtypes(include='number').mean(),
    sep='\n{}\n'.format('\N{box drawings light horizontal}' * 40)
)

copy on write

import pandas; assert pandas.__version__ == '1.5.3'

from pandas import DataFrame, date_range
from numpy.random import default_rng

rng = default_rng(0)

df = DataFrame(
    index=(idx := date_range('2020-01-01', periods=24*60*60, freq='s')),
    data={
        'a': rng.integers(-10, +10, size=len(idx)),
        'b': rng.normal(size=len(idx)),
    },
)

# df.loc[lambda df_: df_['b'] < 0][lambda df_: df_['a'] % 2 == 0]['a'] = 0
# df.loc[lambda df_: (df_['b'] < 0) & (df_['a'] % 2 == 0)]['a'] = 0
# df.loc[lambda df_: (df_['b'] < 0) & (df_['a'] % 2 == 0), 'a'] = 0
df['a'].loc[(df['b'] < 0) & (df['a'] % 2 == 0)] = 0

print(
    # df,
    df[lambda df_: df_['b'] < 0][lambda df_: df_['a'] % 2 == 0]['a'],
    sep='\n{}\n'.format('\N{box drawings light horizontal}' * 40)
)
import pandas; assert pandas.__version__ == '2.2.2'

from pandas import DataFrame, date_range
from numpy.random import default_rng
from pandas import option_context
from contextlib import nullcontext

rng = default_rng(0)

df = DataFrame(
    index=(idx := date_range('2020-01-01', periods=24*60*60, freq='s')),
    data={
        'a': rng.integers(-10, +10, size=len(idx)),
        'b': rng.normal(size=len(idx)),
    },
)

with option_context('mode.copy_on_write', True):
    # df.loc[lambda df_: df_['b'] < 0][lambda df_: df_['a'] % 2 == 0]['a'] = 0
    # df.loc[lambda df_: (df_['b'] < 0) & (df_['a'] % 2 == 0)]['a'] = 0
    # df.loc[lambda df_: (df_['b'] < 0) & (df_['a'] % 2 == 0), 'a'] = 0
    df['a'].loc[(df['b'] < 0) & (df['a'] % 2 == 0)] = 0

print(
    df,
    df[lambda df_: df_['b'] < 0][lambda df_: df_['a'] % 2 == 0]['a'],
    sep='\n{}\n'.format('\N{box drawings light horizontal}' * 40)
)
import pandas; assert pandas.__version__ == '1.4.4'

from pandas import option_context

with option_context('mode.copy_on_write', True):
    pass
import pandas; assert pandas.__version__ == '1.5.3'

from pandas import option_context

with option_context('mode.copy_on_write', True):
    pass
from numpy.random import default_rng
from numpy import shares_memory

rng = default_rng(0)

xs = rng.integers(-10, +10, size=(3, 3))

print(
    xs,

    # xs[1:],
    # f'{shares_memory(xs[1:], xs) = }',

    # xs[:, 1:],
    # f'{shares_memory(xs[:, 1:], xs) = }',

    xs[xs > 0],
    f'{shares_memory(xs[xs > 0], xs) = }',

    sep='\n{}\n'.format('\N{box drawings light horizontal}' * 40)
)
from pandas import Series, date_range
from numpy.random import default_rng

rng = default_rng(0)

s0 = Series(
    index=(idx := date_range('2020-01-01', periods=90)),
    data=rng.integers(-10, +10, size=len(idx)),
)

s1 = s0.iloc[:2]
s2 = s0.loc[:'2020-02-01']
s3 = s0.loc[s0 > 0]
s4 = s0.sort_index()

print(
    f'{s0._is_view = }',
    f'{s1._is_view = }',
    f'{s2._is_view = }',
    f'{s3._is_view = }',
    f'{s4._is_view = }',
    sep='\n',
)
import pandas; assert pandas.__version__ == '1.5.3'

from pandas import DataFrame, date_range
from numpy.random import default_rng

rng = default_rng(0)

df0 = DataFrame(
    index=(idx := date_range('2020-01-01', periods=90)),
    data={
        'a': rng.integers(-10, +10, size=len(idx)),
        'b': rng.normal(size=len(idx)),
    },
)

df1 = df0[['a', 'b']]
df2 = df0[:]
s0 = df0['a']

# df1.loc[:, 'a'] = 0
df2.loc[:, 'a'] = 0

print(
    f'{df0._is_view = }',
    f'{df1._is_view = }',
    f'{df2._is_view = }',
    f'{s0._is_view  = }',
    df0,
    sep='\n',
)
import pandas; assert pandas.__version__ == '2.2.2'

from pandas import DataFrame, date_range, option_context
from numpy.random import default_rng

rng = default_rng(0)

df0 = DataFrame(
    index=(idx := date_range('2020-01-01', periods=90)),
    data={
        'a': rng.integers(-10, +10, size=len(idx)),
        'b': rng.normal(size=len(idx)),
    },
)

with option_context('mode.copy_on_write', True):
    df1 = df0[['a', 'b']]
    df2 = df0[:]

    df1.loc[:, 'a'] = 0
    df2.loc[:, 'a'] = 0

print(
    f'{df0._is_view = }',
    f'{df1._is_view = }',
    f'{df2._is_view = }',
    df0,
    sep='\n',
)

in_place=True

import pandas; assert pandas.__version__ == '2.2.2'

from pandas import option_context, Series
from contextlib import nullcontext

with option_context('mode.copy_on_write', True):
    s = Series([1, 2, 3])
    s.to_numpy()[:] = 0
    print(s)
import pandas; assert pandas.__version__ == '1.5.3'

from pandas import Series, date_range

s = Series(index=date_range('2020-01-01', periods=90), data=0)
s.sort_index(inplace=True, ascending=False)

print(
    s,
    f'{s.index.is_monotonic_decreasing = }',
)
import pandas; assert pandas.__version__ == '2.2.2'

from pandas import Series, date_range

s = Series(index=date_range('2020-01-01', periods=90), data=0)
s.sort_index(inplace=True, ascending=False)

print(
    s,
    f'{s.index.is_monotonic_decreasing = }',
)

Series.case_when

from pandas import Series, date_range
from numpy.random import default_rng
from numpy import where, select

rng = default_rng(0)

s = Series(
    index=(idx := date_range('2020-01-01', periods=90)),
    data=rng.normal(size=len(idx)),
)

print(
    s.case_when(caselist=[
        (s > 0, s ** 2),
        (s < 0, s ** 3),
    ]),
    # Series(
    #     index=s.index,
    #     data=where(s > 0, s ** 2, s ** 3),
    # ),
    # Series(
    #     index=s.index,
    #     data=select(
    #         [ s > 0,  s < 0],
    #         [s ** 2, s ** 3],
    #     ),
    # ),
    sep='\n{}\n'.format('\N{box drawings light horizontal}' * 40)
)

Index dtypes

import pandas; assert pandas.__version__ == '1.3.5'

from pandas import Series
from numpy import array

s = Series(
    index=array([0, 1, 2], dtype='int8'),
    data=0,
)

print(
    f'{s.index.dtype = }',
)
import pandas; assert pandas.__version__ == '2.2.2'

from pandas import Series
from numpy import array

s = Series(
    index=array([0, 1, 2], dtype='int8'),
    data=0,
)

print(
    f'{s.index.dtype = }',
)
import pandas; assert pandas.__version__ == '1.4.4'

from pandas import Series, to_datetime

s = Series(
    index=to_datetime(['2020-01-01']).astype('datetime64[s]'),
    data=0,
)

print(
    f'{s.index.dtype = }',
    sep='\n',
)
import pandas; assert pandas.__version__ == '1.5.3'

from pandas import Series, to_datetime

s = Series(
    index=to_datetime(['2020-01-01']).astype('datetime64[s]'),
    data=0,
)

print(
    f'{s.index.dtype = }',
    sep='\n',
)

Series.str & Series.dt

import pandas; assert pandas.__version__ == '1.3.5'

from pandas import Series

s0 = Series(['abc', 'def'])
s1 = Series(['abc', 'def'], dtype='string')
s2 = Series(['abc', 'def'], dtype='string[pyarrow]')

print(
    f'{s0.dtype = }',
    f'{s1.dtype = }',
    f'{s2.dtype = }',
    sep='\n',
)
import pandas; assert pandas.__version__ == '1.3.5'

from pandas import Series, date_range

s0 = Series(date_range('2020-01-01', periods=3), dtype='datetime64[ns]')
s1 = Series(date_range('2020-01-01', periods=3), dtype='timestamp[ns][pyarrow]')

print(
    f'{s0.dt.floor("D") = }',
    f'{s1.dt.floor("D") = }',
    sep='\n{}\n'.format('\N{box drawings light horizontal}' * 40)
)
import pandas; assert pandas.__version__ == '2.0.3'

from pandas import Series, date_range

s0 = Series(date_range('2020-01-01', periods=3), dtype='datetime64[ns]')
s1 = Series(date_range('2020-01-01', periods=3), dtype='timestamp[ns][pyarrow]')

print(
    f'{s0.dt.floor("D") = }',
    f'{s1.dt.floor("D") = }',
    sep='\n{}\n'.format('\N{box drawings light horizontal}' * 40)
)

Series.struct & Series.list

from pandas import Series, ArrowDtype
from pyarrow  import list_, int64

s0 = Series([[0, 1], [2, 3, 4]])
s1 = Series([[0, 1], [2, 3, 4]], dtype=ArrowDtype(list_(int64())))

print(
    f'{s0.dtype = }',
    f'{s1.dtype = }',
    # s0.list,
    # s1.list,
    s1.list.len(),
    s1.list.flatten(),
    sep='\n',
)
from pandas import Series, ArrowDtype
from pyarrow  import struct, string, int64

s0 = Series([
    {'name': 'abc', 'value': 123},
    {'name': 'def', 'value': 456},
])
s1 = Series([
    {'name': 'abc', 'value': 123},
    {'name': 'def', 'value': 456},
], dtype=ArrowDtype(
    struct([
        ('name', string()),
        ('value', int64()),
    ])
))

print(
    f'{s0.dtype = }',
    f'{s1.dtype = }',
    # s0.struct,
    # s1.struct,
    s1.struct.field('name'),
    sep='\n',
)

.groupby(group_keys=…)

import pandas; assert pandas.__version__ == '1.4.4'

from pandas import Series, date_range, MultiIndex, concat
from numpy.random import default_rng
from string import ascii_lowercase

rng = default_rng(0)

s = Series(
    index=(idx := MultiIndex.from_product([
        date_range('2020-01-01', periods=3),
        rng.choice([*ascii_lowercase], size=(3, 4)).view('<U4').ravel(),
    ], names='date category'.split())),
    data=0,
)

print(
    s.groupby('category').sum(),
    s.groupby('category').apply(lambda g: g),
    sep='\n{}\n'.format('\N{box drawings light horizontal}' * 40)
)
import pandas; assert pandas.__version__ == '1.5.3'

from pandas import Series, date_range, MultiIndex, concat
from numpy.random import default_rng
from string import ascii_lowercase

rng = default_rng(0)

s = Series(
    index=(idx := MultiIndex.from_product([
        date_range('2020-01-01', periods=3),
        rng.choice([*ascii_lowercase], size=(3, 4)).view('<U4').ravel(),
    ], names='date category'.split())),
    data=0,
)

print(
    s.groupby('category').sum(),
    s.groupby('category').apply(lambda g: g),
    sep='\n{}\n'.format('\N{box drawings light horizontal}' * 40)
)
import pandas; assert pandas.__version__ == '2.2.2'

from pandas import Series, date_range, MultiIndex, concat
from numpy.random import default_rng
from string import ascii_lowercase

rng = default_rng(0)

s = Series(
    index=(idx := MultiIndex.from_product([
        date_range('2020-01-01', periods=3),
        rng.choice([*ascii_lowercase], size=(3, 4)).view('<U4').ravel(),
    ], names='date category'.split())),
    data=0,
)

print(
    s.groupby('category').sum(),
    s.groupby('category').apply(lambda g: g),
    s.groupby('category', group_keys=False).apply(lambda g: g),
    sep='\n{}\n'.format('\N{box drawings light horizontal}' * 40)
)

DataFrame.stack

from pandas import DataFrame, date_range
from numpy.random import default_rng

rng = default_rng(0)

df = DataFrame(
    index=(idx := date_range('2020-01-01', periods=3)),
    data={
        'a': rng.integers(-10, +10, size=len(idx)),
        'b': rng.integers(-10, +10, size=len(idx)),
    }
)
df.loc[df.sample(random_state=rng).index, :] = float('nan')

print(
    df,
    # df.stack(),
    # df.stack(dropna=False),
    df.stack(future_stack=True),
    sep='\n{}\n'.format('\N{box drawings light horizontal}' * 40),
)