Forward- or back-fill NA values in DataFrames
Back-filling replaces NaN with the next non-NA value, while forward-filling takes the previous non-NA value.
In pandas, this can be be done with fillna(method=bfill|ffill)
or with the shortcuts bfill()
and ffill()
# Import libraries
import pandas as pd
import numpy as np
from numpy.random import randint
# Set seed
np.random.seed(21)
# Create sample data
df = (
pd.DataFrame({
'group': list('ABCDE'),
'value': [randint(1000, 20000, randint(2, 5)) for i in range(5)]
})
.explode('value')
.reset_index(drop=True)
)
df.loc[df.sample(frac=0.5).index, 'value'] = np.nan
df
|
group |
value |
0 |
A |
6327 |
1 |
A |
NaN |
2 |
A |
NaN |
3 |
B |
NaN |
4 |
B |
13898 |
5 |
C |
NaN |
6 |
C |
18224 |
7 |
C |
2646 |
8 |
D |
NaN |
9 |
D |
16613 |
10 |
E |
17118 |
11 |
E |
NaN |
12 |
E |
3352 |
Back-fill and forward-fill
# Back-fill and forward-fill
df.assign(
value_bfill=df['value'].bfill(),
value_ffill=df['value'].ffill()
)
|
group |
value |
value_bfill |
value_ffill |
0 |
A |
6327 |
6327 |
6327 |
1 |
A |
NaN |
13898 |
6327 |
2 |
A |
NaN |
13898 |
6327 |
3 |
B |
NaN |
13898 |
6327 |
4 |
B |
13898 |
13898 |
13898 |
5 |
C |
NaN |
18224 |
13898 |
6 |
C |
18224 |
18224 |
18224 |
7 |
C |
2646 |
2646 |
2646 |
8 |
D |
NaN |
16613 |
2646 |
9 |
D |
16613 |
16613 |
16613 |
10 |
E |
17118 |
17118 |
17118 |
11 |
E |
NaN |
3352 |
17118 |
12 |
E |
3352 |
3352 |
3352 |
Forward- or back-fill within groups
# Backfill and foward-fill within each group
df.assign(
value_bfill=df.groupby('group').transform(lambda x: x.bfill()), # Back-fill
value_ffill=df.groupby('group').transform(lambda x: x.ffill()), # Forward-fill
value_filled=lambda x: x['value_bfill'].fillna(x['value_ffill']) # Get back- or forward-filled value
)
|
group |
value |
value_bfill |
value_ffill |
value_filled |
0 |
A |
6327 |
6327.0 |
6327.0 |
6327.0 |
1 |
A |
NaN |
NaN |
6327.0 |
6327.0 |
2 |
A |
NaN |
NaN |
6327.0 |
6327.0 |
3 |
B |
NaN |
13898.0 |
NaN |
13898.0 |
4 |
B |
13898 |
13898.0 |
13898.0 |
13898.0 |
5 |
C |
NaN |
18224.0 |
NaN |
18224.0 |
6 |
C |
18224 |
18224.0 |
18224.0 |
18224.0 |
7 |
C |
2646 |
2646.0 |
2646.0 |
2646.0 |
8 |
D |
NaN |
16613.0 |
NaN |
16613.0 |
9 |
D |
16613 |
16613.0 |
16613.0 |
16613.0 |
10 |
E |
17118 |
17118.0 |
17118.0 |
17118.0 |
11 |
E |
NaN |
3352.0 |
17118.0 |
3352.0 |
12 |
E |
3352 |
3352.0 |
3352.0 |
3352.0 |