Date: Friday, May 31st, 2024 at 09:30 AM US/Eastern
Topics: pandas 2.0, Python
Recently Upgrade to the latest version of pandas?
The pandas 2 changelog consists of over 2000 lines of text, code, and bullet points. While the largest changes revolve around the introduction to the PyArrow backend, there are also a plethora of bug fixes, backwards incompatible changes, deprecations, and much more to discuss. With all of the new features and updates it is hard to stay up-to-date with pandas best practices without reading the changelog yourself.
Thankfully, we’ve done that reading for you and have distilled the most important updates and where they will impact your day-to-day work. Join us for “What’s New in pandas 2” to keep up with the best practices in the most veteran DataFrame library in the Python ecosystem.
print("Let's take a look!")
import pandas; assert pandas.__version__ == '1.3.5'
print("Let's take a look!")
import pandas; assert pandas.__version__ == '1.4.4'
print("Let's take a look!")
import pandas; assert pandas.__version__ == '1.5.3'
print("Let's take a look!")
import pandas; assert pandas.__version__ == '2.0.3'
print("Let's take a look!")
import pandas; assert pandas.__version__ == '2.1.4'
print("Let's take a look!")
import pandas; assert pandas.__version__ == '2.2.2'
print("Let's take a look!")
.is_monotonic ⇒ .is_monotonic_increasing, .is_monotonic_decreasingMany deprecation warnings have now become errors.
import pandas; assert pandas.__version__ == '1.5.3'
from pandas import Index
idx = Index([0, 10, 200, 3_000])
print(f'{idx.is_monotonic = }')
from pandas import Series
from numpy.random import default_rng
rng = default_rng(0)
s = Series(
# index=(idx := [0, 10, 200, 3_000]),
# index=(idx := [0, 10, 3_000, 200]),
# index=(idx := [0, 10, 200, 200, 3_000]),
index=(idx := [0, 10, 200, 3_000, 200]),
data=rng.integers(-10, +10, size=len(idx)),
)
assert s.index.is_monotonic_increasing
print(
s.loc[10:200]
)
import pandas; assert pandas.__version__ == '2.0.3'
from pandas import Index
idx = Index([0, 10, 200, 3_000])
print(
# f'{idx.is_monotonic = }',
f'{idx.is_monotonic_increasing = }',
f'{idx.is_monotonic_decreasing = }',
sep='\n',
)
.iteritems ⇒ .itemsimport pandas; assert pandas.__version__ == '1.5.3'
from pandas import Series, DataFrame, date_range
from numpy.random import default_rng
rng = default_rng(0)
s = Series(
index=(idx := date_range('2020-01-01', periods=3)),
data=rng.integers(-10, +10, size=len(idx)),
)
df = DataFrame(
index=(idx := date_range('2020-01-01', periods=3)),
data={
'a': rng.integers(-10, +10, size=len(idx)),
'b': rng.integers(-10, +10, size=len(idx)),
}
)
print(
*s.iteritems(),
sep='\n',
end='\n{}\n'.format('\N{box drawings light horizontal}' * 40),
)
print(
*df.iteritems(),
sep='\n',
end='\n{}\n'.format('\N{box drawings light horizontal}' * 40),
)
import pandas; assert pandas.__version__ == '2.2.2'
from pandas import Series, DataFrame, date_range
from numpy.random import default_rng
rng = default_rng(0)
s = Series(
index=(idx := date_range('2020-01-01', periods=3)),
data=rng.integers(-10, +10, size=len(idx)),
)
df = DataFrame(
index=(idx := date_range('2020-01-01', periods=3)),
data={
'a': rng.integers(-10, +10, size=len(idx)),
'b': rng.integers(-10, +10, size=len(idx)),
}
)
print(
# *s.iteritems(),
*s.items(),
sep='\n',
end='\n{}\n'.format('\N{box drawings light horizontal}' * 40),
)
print(
# *df.iteritems(),
*df.items(),
sep='\n',
end='\n{}\n'.format('\N{box drawings light horizontal}' * 40),
)
import pandas; assert pandas.__version__ == '1.5.3'
from pandas import Series, DataFrame, date_range
from numpy.random import default_rng
from warnings import catch_warnings, simplefilter
rng = default_rng(0)
s0 = Series(
index=(idx := date_range('2020-01-01', periods=3)),
data=rng.integers(-10, +10, size=len(idx)),
)
s1 = Series(
index=(idx := date_range('2020-01-02', periods=3)),
data=rng.integers(-10, +10, size=len(idx)),
)
with catch_warnings():
simplefilter('ignore')
for k, v in s0.iteritems():
if k not in s1: continue
if v % 2 == 0:
s1.loc[k] += v ** 2
else:
s1.loc[k] += v ** 3
print(
s0,
s1,
sep='\n',
)
import pandas; assert pandas.__version__ == '1.5.3'
from pandas import Series, date_range
from numpy.random import default_rng
from numpy import where
rng = default_rng(0)
s0 = Series(
index=(idx := date_range('2020-01-01', periods=3)),
data=rng.integers(-10, +10, size=len(idx)),
)
s1 = Series(
index=(idx := date_range('2020-01-02', periods=3)),
data=rng.integers(-10, +10, size=len(idx)),
)
idx = s0.index.intersection(s1.index)
s1.loc[idx] += Series(
index=s0.index,
data=where(s0 % 2 == 0, s0 ** 2, s0 ** 3),
).loc[idx]
print(
s0,
s1,
sep='\n',
)
.applyfrom pandas import Series, date_range
from numpy.random import default_rng
from numpy import where, abs as np_abs
from _utils import timed
rng = default_rng(0)
s = Series(
index=(idx := date_range('2020-01-01', periods=90*24*60*60, freq='s')),
data=rng.integers(-10, +10, size=len(idx))
)
with timed('abs(...)'):
abs(s)
with timed('.apply(abs)'):
s.apply(abs)
with timed('.apply("abs")'):
s.apply("abs")
with timed('.apply(np_abs)'):
s.apply(np_abs)
with timed('.apply(lambda x: abs(x))'):
s.apply(lambda x: abs(x))
with timed('[abs(x) for x in s]'):
[abs(x) for x in s]
print(
f'{len(s) = :,}',
sep='\n',
)
import builtins
print(
f'{builtins.abs = }',
f'{abs is builtins.abs = }',
sep='\n',
)
import builtins
from functools import wraps
from inspect import isbuiltin
from logging import getLogger, basicConfig, INFO
logger = getLogger(__name__)
basicConfig(level=INFO)
@lambda f: [setattr(builtins, f.__name__, rv := f(getattr(builtins, f.__name__))), rv][-1]
def abs(abs):
@wraps(abs)
def inner(*args, **kwargs):
rv = abs(*args, **kwargs)
logger.info(f'<wrapped abs>(*%r, **%r) ⇒ %r', args, kwargs, rv)
return rv
return inner
print(
f'{abs(123) = }',
f'{abs is builtins.abs = }',
f'{isbuiltin(abs) = }',
sep='\n',
)
import pandas; assert pandas.__version__ == '2.1.0'
from pandas import Series, date_range
from numpy.random import default_rng
from numpy import where, abs as np_abs
from _utils import timed
rng = default_rng(0)
s = Series(
index=(idx := date_range('2020-01-01', periods=90*24*60*60, freq='s')),
data=rng.integers(-10, +10, size=len(idx))
)
with timed('abs(...)'):
abs(s)
with timed('.apply(abs, by_row=False)'):
s.apply(abs, by_row=False)
with timed('.apply("abs", by_row=False)'):
s.apply("abs", by_row=False)
with timed('.apply(np_abs, by_row=False)'):
s.apply(np_abs, by_row=False)
with timed('.apply(lambda x: abs(x), by_row=False)'):
s.apply(lambda x: abs(x), by_row=False)
with timed('[abs(x) for x in s]'):
[abs(x) for x in s]
print(
f'{len(s) = :,}',
sep='\n',
)
import pandas; assert pandas.__version__ == '2.0.3'
from pandas import Series, date_range
from numpy.random import default_rng
from numpy import where, abs as np_abs
from _utils import timed
rng = default_rng(0)
s = Series(
index=(idx := date_range('2020-01-01', periods=90*24*60*60, freq='s')),
data=rng.integers(-10, +10, size=len(idx))
)
s.apply(abs, by_row=False)
print(
f'{len(s) = :,}',
sep='\n',
)
import pandas; assert pandas.__version__ == '2.0.3'
from pandas import Series, date_range
from numpy.random import default_rng
from numpy import where, abs as np_abs
from _utils import timed
rng = default_rng(0)
s = Series(
index=(idx := date_range('2020-01-01', periods=24*60*60, freq='s')),
data=rng.integers(-10, +10, size=len(idx))
)
def f(x, *, mode=True, by_row=...):
return x**2 if mode else x**3
print(
f'{len(s) = :,}',
s.apply(f, mode=True),
s.apply(f, mode=False),
sep='\n{}\n'.format('\N{box drawings light horizontal}' * 40),
)
import pandas; assert pandas.__version__ == '2.2.2'
from pandas import DataFrame, date_range
from numpy.random import default_rng
from numpy import where, abs as np_abs
from _utils import timed
rng = default_rng(0)
df = DataFrame(
index=(idx := date_range('2020-01-01', periods=90*24*60*60, freq='s')),
data={
'a': rng.integers(-10, +10, size=len(idx)),
'b': rng.normal(size=len(idx)),
},
)
with timed('abs(...)'):
abs(df)
with timed('.apply(abs)'):
df.apply(abs)
with timed('.apply("abs")'):
df.apply("abs")
with timed('.apply(np_abs)'):
df.apply(np_abs)
with timed('.apply(lambda x: abs(x))'):
df.apply(lambda x: abs(x))
with timed('[abs(df[x]) for x in df]'):
[abs(df[x]) for x in df]
print(
f'{len(df) = :,}',
sep='\n',
)
import pandas; assert pandas.__version__ == '2.2.2'
from pandas import DataFrame, date_range
from numpy.random import default_rng
from numpy import empty_like
from _utils import timed
rng = default_rng(0)
df = DataFrame(
index=(idx := date_range('2020-01-01', periods=30*24*60*60, freq='s')),
data={
'a': rng.integers(-10, +10, size=len(idx)),
'b': rng.normal(size=len(idx)),
},
)
def f(s):
rv = empty_like(s)
for idx, x in enumerate(s):
if x > 0:
rv[idx] = x ** 2
else:
rv[idx] = x ** 3
return rv
with timed('.apply(f)'):
df.apply(f)
with timed('.apply(lambda x: f(x))'):
df.apply(lambda x: f(x))
# with timed(".apply(f, engine='numba')"):
# df.apply(f, engine='numba')
# with timed(".apply(f, engine='numba', raw=True)"):
# df.apply(f, engine='numba', raw=True)
# with timed(".apply(f, engine='numba', raw=True, engine_kwargs={'parallel': True})"):
# df.apply(f, engine='numba', raw=True, engine_kwargs={'parallel': True})
print(
f'{len(df) = :,}',
sep='\n',
)
numeric_onlyimport pandas; assert pandas.__version__ == '1.5.3'
from pandas import DataFrame, date_range
from numpy.random import default_rng
from string import ascii_lowercase
from _utils import timed
rng = default_rng(0)
df = DataFrame(
index=(idx := date_range('2020-01-01', periods=7*24*60*60, freq='s')),
data={
'a': rng.integers(-10, +10, size=len(idx)),
'b': rng.normal(size=len(idx)),
'c': rng.choice([*ascii_lowercase], size=len(idx)),
},
)
# with timed('.sum()'):
# df.sum()
with timed('.sum(numeric_only=True)'):
df.sum(numeric_only=True)
print(
f'{len(df) = :,}',
# df.sum(),
df.sum(numeric_only=True),
sep='\n{}\n'.format('\N{box drawings light horizontal}' * 40)
)
import pandas; assert pandas.__version__ == '2.2.2'
from pandas import DataFrame, date_range
from numpy.random import default_rng
from string import ascii_lowercase
from _utils import timed
rng = default_rng(0)
df = DataFrame(
index=(idx := date_range('2020-01-01', periods=7*24*60*60, freq='s')),
data={
'a': rng.integers(-10, +10, size=len(idx)),
'b': rng.normal(size=len(idx)),
'c': rng.choice([*ascii_lowercase], size=len(idx)),
},
)
with timed('.sum()'):
df.sum()
with timed('.sum(numeric_only=True)'):
df.sum(numeric_only=True)
print(
f'{len(df) = :,}',
# df.sum(),
# df.sum(numeric_only=True),
sep='\n{}\n'.format('\N{box drawings light horizontal}' * 40)
)
import pandas; assert pandas.__version__ == '2.2.2'
from pandas import DataFrame, date_range
from numpy.random import default_rng
from string import ascii_lowercase
from _utils import timed
rng = default_rng(0)
df = DataFrame(
index=(idx := date_range('2020-01-01', periods=24*60*60, freq='s')),
data={
'a': rng.integers(-10, +10, size=len(idx)),
'b': rng.normal(size=len(idx)),
'c': rng.choice([*ascii_lowercase], size=len(idx)),
},
)
with timed('.mean()'):
df.mean()
print(
f'{len(df) = :,}',
df.mean(),
sep='\n{}\n'.format('\N{box drawings light horizontal}' * 40)
)
import pandas; assert pandas.__version__ == '1.5.3'
from pandas import DataFrame, date_range
from numpy.random import default_rng
from string import ascii_lowercase
from _utils import timed
rng = default_rng(0)
df = DataFrame(
index=(idx := date_range('2020-01-01', periods=24*60*60, freq='s')),
data={
'a': rng.integers(-10, +10, size=len(idx)),
'b': rng.normal(size=len(idx)),
'c': rng.choice([*ascii_lowercase], size=len(idx)),
},
)
with timed('.mean()'):
df.mean()
print(
f'{len(df) = :,}',
df.mean(),
sep='\n{}\n'.format('\N{box drawings light horizontal}' * 40)
)
import pandas; assert pandas.__version__ == '2.2.2'
from pandas import DataFrame, date_range
from numpy.random import default_rng
from string import ascii_lowercase
from _utils import timed
rng = default_rng(0)
df = DataFrame(
index=(idx := date_range('2020-01-01', periods=24*60*60, freq='s')),
data={
'a': rng.integers(-10, +10, size=len(idx)),
'b': rng.normal(size=len(idx)),
'c': rng.choice([*ascii_lowercase], size=len(idx)),
},
)
with timed('.mean(numeric_only=True)'):
df.mean(numeric_only=True)
with timed('[[…]].mean()'):
df[['a', 'b']].mean()
with timed(".select_dtypes(include='number').mean()"):
df.select_dtypes(include='number').mean()
print(
f'{len(df) = :,}',
# df.mean(numeric_only=True),
# df[['a', 'b']].mean(),
# df.select_dtypes(include='number').mean(),
sep='\n{}\n'.format('\N{box drawings light horizontal}' * 40)
)
import pandas; assert pandas.__version__ == '1.5.3'
from pandas import DataFrame, date_range
from numpy.random import default_rng
rng = default_rng(0)
df = DataFrame(
index=(idx := date_range('2020-01-01', periods=24*60*60, freq='s')),
data={
'a': rng.integers(-10, +10, size=len(idx)),
'b': rng.normal(size=len(idx)),
},
)
# df.loc[lambda df_: df_['b'] < 0][lambda df_: df_['a'] % 2 == 0]['a'] = 0
# df.loc[lambda df_: (df_['b'] < 0) & (df_['a'] % 2 == 0)]['a'] = 0
# df.loc[lambda df_: (df_['b'] < 0) & (df_['a'] % 2 == 0), 'a'] = 0
df['a'].loc[(df['b'] < 0) & (df['a'] % 2 == 0)] = 0
print(
# df,
df[lambda df_: df_['b'] < 0][lambda df_: df_['a'] % 2 == 0]['a'],
sep='\n{}\n'.format('\N{box drawings light horizontal}' * 40)
)
import pandas; assert pandas.__version__ == '2.2.2'
from pandas import DataFrame, date_range
from numpy.random import default_rng
from pandas import option_context
from contextlib import nullcontext
rng = default_rng(0)
df = DataFrame(
index=(idx := date_range('2020-01-01', periods=24*60*60, freq='s')),
data={
'a': rng.integers(-10, +10, size=len(idx)),
'b': rng.normal(size=len(idx)),
},
)
with option_context('mode.copy_on_write', True):
# df.loc[lambda df_: df_['b'] < 0][lambda df_: df_['a'] % 2 == 0]['a'] = 0
# df.loc[lambda df_: (df_['b'] < 0) & (df_['a'] % 2 == 0)]['a'] = 0
# df.loc[lambda df_: (df_['b'] < 0) & (df_['a'] % 2 == 0), 'a'] = 0
df['a'].loc[(df['b'] < 0) & (df['a'] % 2 == 0)] = 0
print(
df,
df[lambda df_: df_['b'] < 0][lambda df_: df_['a'] % 2 == 0]['a'],
sep='\n{}\n'.format('\N{box drawings light horizontal}' * 40)
)
import pandas; assert pandas.__version__ == '1.4.4'
from pandas import option_context
with option_context('mode.copy_on_write', True):
pass
import pandas; assert pandas.__version__ == '1.5.3'
from pandas import option_context
with option_context('mode.copy_on_write', True):
pass
from numpy.random import default_rng
from numpy import shares_memory
rng = default_rng(0)
xs = rng.integers(-10, +10, size=(3, 3))
print(
xs,
# xs[1:],
# f'{shares_memory(xs[1:], xs) = }',
# xs[:, 1:],
# f'{shares_memory(xs[:, 1:], xs) = }',
xs[xs > 0],
f'{shares_memory(xs[xs > 0], xs) = }',
sep='\n{}\n'.format('\N{box drawings light horizontal}' * 40)
)
from pandas import Series, date_range
from numpy.random import default_rng
rng = default_rng(0)
s0 = Series(
index=(idx := date_range('2020-01-01', periods=90)),
data=rng.integers(-10, +10, size=len(idx)),
)
s1 = s0.iloc[:2]
s2 = s0.loc[:'2020-02-01']
s3 = s0.loc[s0 > 0]
s4 = s0.sort_index()
print(
f'{s0._is_view = }',
f'{s1._is_view = }',
f'{s2._is_view = }',
f'{s3._is_view = }',
f'{s4._is_view = }',
sep='\n',
)
import pandas; assert pandas.__version__ == '1.5.3'
from pandas import DataFrame, date_range
from numpy.random import default_rng
rng = default_rng(0)
df0 = DataFrame(
index=(idx := date_range('2020-01-01', periods=90)),
data={
'a': rng.integers(-10, +10, size=len(idx)),
'b': rng.normal(size=len(idx)),
},
)
df1 = df0[['a', 'b']]
df2 = df0[:]
s0 = df0['a']
# df1.loc[:, 'a'] = 0
df2.loc[:, 'a'] = 0
print(
f'{df0._is_view = }',
f'{df1._is_view = }',
f'{df2._is_view = }',
f'{s0._is_view = }',
df0,
sep='\n',
)
import pandas; assert pandas.__version__ == '2.2.2'
from pandas import DataFrame, date_range, option_context
from numpy.random import default_rng
rng = default_rng(0)
df0 = DataFrame(
index=(idx := date_range('2020-01-01', periods=90)),
data={
'a': rng.integers(-10, +10, size=len(idx)),
'b': rng.normal(size=len(idx)),
},
)
with option_context('mode.copy_on_write', True):
df1 = df0[['a', 'b']]
df2 = df0[:]
df1.loc[:, 'a'] = 0
df2.loc[:, 'a'] = 0
print(
f'{df0._is_view = }',
f'{df1._is_view = }',
f'{df2._is_view = }',
df0,
sep='\n',
)
in_place=Trueimport pandas; assert pandas.__version__ == '2.2.2'
from pandas import option_context, Series
from contextlib import nullcontext
with option_context('mode.copy_on_write', True):
s = Series([1, 2, 3])
s.to_numpy()[:] = 0
print(s)
import pandas; assert pandas.__version__ == '1.5.3'
from pandas import Series, date_range
s = Series(index=date_range('2020-01-01', periods=90), data=0)
s.sort_index(inplace=True, ascending=False)
print(
s,
f'{s.index.is_monotonic_decreasing = }',
)
import pandas; assert pandas.__version__ == '2.2.2'
from pandas import Series, date_range
s = Series(index=date_range('2020-01-01', periods=90), data=0)
s.sort_index(inplace=True, ascending=False)
print(
s,
f'{s.index.is_monotonic_decreasing = }',
)
Series.case_whenfrom pandas import Series, date_range
from numpy.random import default_rng
from numpy import where, select
rng = default_rng(0)
s = Series(
index=(idx := date_range('2020-01-01', periods=90)),
data=rng.normal(size=len(idx)),
)
print(
s.case_when(caselist=[
(s > 0, s ** 2),
(s < 0, s ** 3),
]),
# Series(
# index=s.index,
# data=where(s > 0, s ** 2, s ** 3),
# ),
# Series(
# index=s.index,
# data=select(
# [ s > 0, s < 0],
# [s ** 2, s ** 3],
# ),
# ),
sep='\n{}\n'.format('\N{box drawings light horizontal}' * 40)
)
Index dtypesimport pandas; assert pandas.__version__ == '1.3.5'
from pandas import Series
from numpy import array
s = Series(
index=array([0, 1, 2], dtype='int8'),
data=0,
)
print(
f'{s.index.dtype = }',
)
import pandas; assert pandas.__version__ == '2.2.2'
from pandas import Series
from numpy import array
s = Series(
index=array([0, 1, 2], dtype='int8'),
data=0,
)
print(
f'{s.index.dtype = }',
)
import pandas; assert pandas.__version__ == '1.4.4'
from pandas import Series, to_datetime
s = Series(
index=to_datetime(['2020-01-01']).astype('datetime64[s]'),
data=0,
)
print(
f'{s.index.dtype = }',
sep='\n',
)
import pandas; assert pandas.__version__ == '1.5.3'
from pandas import Series, to_datetime
s = Series(
index=to_datetime(['2020-01-01']).astype('datetime64[s]'),
data=0,
)
print(
f'{s.index.dtype = }',
sep='\n',
)
Series.str & Series.dtimport pandas; assert pandas.__version__ == '1.3.5'
from pandas import Series
s0 = Series(['abc', 'def'])
s1 = Series(['abc', 'def'], dtype='string')
s2 = Series(['abc', 'def'], dtype='string[pyarrow]')
print(
f'{s0.dtype = }',
f'{s1.dtype = }',
f'{s2.dtype = }',
sep='\n',
)
import pandas; assert pandas.__version__ == '1.3.5'
from pandas import Series, date_range
s0 = Series(date_range('2020-01-01', periods=3), dtype='datetime64[ns]')
s1 = Series(date_range('2020-01-01', periods=3), dtype='timestamp[ns][pyarrow]')
print(
f'{s0.dt.floor("D") = }',
f'{s1.dt.floor("D") = }',
sep='\n{}\n'.format('\N{box drawings light horizontal}' * 40)
)
import pandas; assert pandas.__version__ == '2.0.3'
from pandas import Series, date_range
s0 = Series(date_range('2020-01-01', periods=3), dtype='datetime64[ns]')
s1 = Series(date_range('2020-01-01', periods=3), dtype='timestamp[ns][pyarrow]')
print(
f'{s0.dt.floor("D") = }',
f'{s1.dt.floor("D") = }',
sep='\n{}\n'.format('\N{box drawings light horizontal}' * 40)
)
Series.struct & Series.listfrom pandas import Series, ArrowDtype
from pyarrow import list_, int64
s0 = Series([[0, 1], [2, 3, 4]])
s1 = Series([[0, 1], [2, 3, 4]], dtype=ArrowDtype(list_(int64())))
print(
f'{s0.dtype = }',
f'{s1.dtype = }',
# s0.list,
# s1.list,
s1.list.len(),
s1.list.flatten(),
sep='\n',
)
from pandas import Series, ArrowDtype
from pyarrow import struct, string, int64
s0 = Series([
{'name': 'abc', 'value': 123},
{'name': 'def', 'value': 456},
])
s1 = Series([
{'name': 'abc', 'value': 123},
{'name': 'def', 'value': 456},
], dtype=ArrowDtype(
struct([
('name', string()),
('value', int64()),
])
))
print(
f'{s0.dtype = }',
f'{s1.dtype = }',
# s0.struct,
# s1.struct,
s1.struct.field('name'),
sep='\n',
)
.groupby(group_keys=…)import pandas; assert pandas.__version__ == '1.4.4'
from pandas import Series, date_range, MultiIndex, concat
from numpy.random import default_rng
from string import ascii_lowercase
rng = default_rng(0)
s = Series(
index=(idx := MultiIndex.from_product([
date_range('2020-01-01', periods=3),
rng.choice([*ascii_lowercase], size=(3, 4)).view('<U4').ravel(),
], names='date category'.split())),
data=0,
)
print(
s.groupby('category').sum(),
s.groupby('category').apply(lambda g: g),
sep='\n{}\n'.format('\N{box drawings light horizontal}' * 40)
)
import pandas; assert pandas.__version__ == '1.5.3'
from pandas import Series, date_range, MultiIndex, concat
from numpy.random import default_rng
from string import ascii_lowercase
rng = default_rng(0)
s = Series(
index=(idx := MultiIndex.from_product([
date_range('2020-01-01', periods=3),
rng.choice([*ascii_lowercase], size=(3, 4)).view('<U4').ravel(),
], names='date category'.split())),
data=0,
)
print(
s.groupby('category').sum(),
s.groupby('category').apply(lambda g: g),
sep='\n{}\n'.format('\N{box drawings light horizontal}' * 40)
)
import pandas; assert pandas.__version__ == '2.2.2'
from pandas import Series, date_range, MultiIndex, concat
from numpy.random import default_rng
from string import ascii_lowercase
rng = default_rng(0)
s = Series(
index=(idx := MultiIndex.from_product([
date_range('2020-01-01', periods=3),
rng.choice([*ascii_lowercase], size=(3, 4)).view('<U4').ravel(),
], names='date category'.split())),
data=0,
)
print(
s.groupby('category').sum(),
s.groupby('category').apply(lambda g: g),
s.groupby('category', group_keys=False).apply(lambda g: g),
sep='\n{}\n'.format('\N{box drawings light horizontal}' * 40)
)
DataFrame.stackfrom pandas import DataFrame, date_range
from numpy.random import default_rng
rng = default_rng(0)
df = DataFrame(
index=(idx := date_range('2020-01-01', periods=3)),
data={
'a': rng.integers(-10, +10, size=len(idx)),
'b': rng.integers(-10, +10, size=len(idx)),
}
)
df.loc[df.sample(random_state=rng).index, :] = float('nan')
print(
df,
# df.stack(),
# df.stack(dropna=False),
df.stack(future_stack=True),
sep='\n{}\n'.format('\N{box drawings light horizontal}' * 40),
)