# Import libraries
import pandas as pd
import numpy as np
# Create sample dataframe
df = pd.DataFrame({
'population':[2148000, np.nan, 861635, 232741, np.nan],
'area_km':[105.4, 47.87, 240.6, np.nan, 78.26]},
index=['Paris', 'Lyon', 'Marseille', 'Lille', 'Strasbourg']
)
df
population | area_km | |
Paris | 2148000.0 | 105.40 |
Lyon | NaN | 47.87 |
Marseille | 861635.0 | 240.60 |
Lille | 232741.0 | NaN |
Strasbourg | NaN | 78.26 |
Select rows that have at least one NA value
# Standard way
df[df.isnull().any(axis=1)]
population | area_km | |
Lyon | NaN | 47.87 |
Lille | 232741.0 | NaN |
Strasbourg | NaN | 78.26 |
# With .loc and lambda (useful for method chaining)
df.loc[lambda x: x.isnull().any(axis=1)]
population | area_km | |
Lyon | NaN | 47.87 |
Lille | 232741.0 | NaN |
Strasbourg | NaN | 78.26 |
Filter out rows with NA values
# Select rows that do not have any NA value
df.dropna()
population | area_km | |
Paris | 2148000.0 | 105.4 |
Marseille | 861635.0 | 240.6 |