15 Python

This chapter collects Python code equivalents to nearly all of the R commands shown in this book. The same code is reproduced, with full output, on the marginaleffects.com website. The small number of features that are not yet available in the Python version of marginaleffects are listed on the roadmap in Section 15.1.

Note that, by default, marginaleffects commands return data frames in the Polars format. These objects are easy to convert to Pandas, Numpy, or other formats using an appropriate .to_*() method. For example, calling the get_dataset() function returns a Polars data frame that we can convert to Pandas with .to_pandas(). Recall that Python uses 0-based indexing whereas R uses 1-based indexing.

import polars as pl
import numpy as np
from marginaleffects import *
from plotnine import *
from scipy.stats import norm
from statsmodels.formula.api import logit, ols

dat = get_dataset("thornton")
dat = dat.to_pandas()
dat.head()

1. Who is this book for?

Section 1.4

dat = get_dataset("Titanic", "Stat2Data")
dat[:6, ["Name", "Survived", "Age"]]
get_dataset(search = "Titanic") 
get_dataset("Titanic", "Stat2Data", docs = True)

3. Conceptual framework

Section 3.2

rng = np.random.default_rng(seed=48103)
N = 10
dat = pl.DataFrame({
  "Num": rng.normal(loc = 0, scale = 1, size = N),
  "Bin": rng.binomial(n = 1, p = 0.5, size = N),
  "Cat": rng.choice(["A","B","C"], size = N)
})

Empirical grid

dat

Interesting grid

datagrid(Bin=[0,1], newdata = dat)
datagrid(
  Num=[dat["Num"].max(), dat["Num"].min()], 
  Bin=dat["Bin"].mean(),
  Cat=dat["Cat"].unique(),
  newdata = dat)

Representative grid

datagrid(grid_type = "mean_or_mode", newdata = dat)

Balanced grid

datagrid(grid_type = "balanced", newdata = dat)

Counterfactual grid

g = datagrid(
  Bin=[0,1],
  grid_type = "counterfactual",
  newdata = dat
)
g.shape
g.filter(pl.col("rowidcf")<3)

4. Hypothesis and equivalence tests

dat = get_dataset("thornton")
dat.head(6)
mod = ols("outcome ~ agecat - 1",
  data=dat.to_pandas()).fit()
mod.params
dat.group_by("agecat").agg(pl.col("outcome").mean())

Section 4.1

Choice of null hypothesis

hypotheses(mod, hypothesis = 0.5)
b = mod.params.iloc[0]
se = np.sqrt(np.diag(mod.cov_params()))[0]
z = (b - 0.5) / se
(norm.cdf(-np.abs(z))) * 2

Linear and non-linear hypothesis tests

hypotheses(mod, hypothesis = "b2 - b0 = 0")
hypotheses(mod, hypothesis = "b2 / b0 = 1")

import numpy as np
hypotheses(mod, hypothesis = "b1**2 * np.exp(b0) = 0")
hypotheses(mod, hypothesis = "b0 - (b1 * b2) = 2")

hypotheses(mod, hypothesis = "difference ~ reference")
hypotheses(mod, hypothesis = "ratio ~ sequential")

Section 4.2

hypotheses(mod, hypothesis = "b2 - b1 = 0")
hypotheses(mod, hypothesis = "b2 - b1 = 0",
  equivalence = [-0.05, 0.05])["p_value_equiv"]

5. Predictions

Section 5.1

dat = get_dataset("thornton")
dat = dat.drop_nulls(subset = ["incentive"])
mod = logit("outcome ~ incentive + agecat",
  data=dat.to_pandas()).fit()
b = mod.params

linpred_treatment_younger = b.iloc[0] + b.iloc[3] * 1 + \
  b.iloc[1] * 1 + b.iloc[2] * 0
linpred_control_older = b.iloc[0] + b.iloc[3] * 0 + \
  b.iloc[1] * 0 + b.iloc[2] * 1

logistic = lambda x: 1 / (1 + np.exp(-x))
logistic(linpred_treatment_younger)
logistic(linpred_control_older)

grid = pl.DataFrame({
    "agecat": ["18 to 35", ">35"],
    "incentive": [1, 0]
})

predictions(mod, newdata = grid)

Section 5.2

mod = logit("outcome ~ incentive + agecat + distance",
  data=dat.to_pandas()).fit()

Empirical grid

p = predictions(mod)
p.shape
p.columns
p[:4, "estimate"]

Interesting grid

datagrid(agecat = "18 to 35", incentive = [0,1], model = mod)
predictions(mod,
  newdata=datagrid(agecat = "18 to 35", incentive = [0,1]))
predictions(mod,
  newdata=datagrid(
    distance = 2,
    agecat = dat["agecat"].unique(),
    incentive = dat["incentive"].max()))

Representative grid

predictions(mod, newdata="mean")

Balanced grid

predictions(mod, 
  newdata = datagrid(
    agecat = dat["agecat"].unique(), 
    incentive = dat["incentive"].unique(), 
    distance = dat["distance"].mean()))
predictions(mod, newdata = "balanced")

Counterfactual grid

p = predictions(mod, variables = {"incentive": [0, 1]})
p.shape
p = pl.DataFrame({
    "Control": p.filter(pl.col("incentive") == 0)["estimate"],
    "Treatment": p.filter(pl.col("incentive") == 1)["estimate"]
})
plot = (
  ggplot(p, aes(x="Control", y="Treatment")) +
  geom_point() +
  geom_abline(
    intercept=0, slope=1, linetype="--", color="grey") +
  labs(
    x="Pr(outcome=1) when incentive = 0",
    y="Pr(outcome=1) when incentive = 1") +
  xlim(0, 1) +
  ylim(0, 1) +
  theme_minimal()
)
plot.show()

Section 5.3

p = predictions(mod)
p["estimate"].mean()
avg_predictions(mod)
avg_predictions(mod, by="agecat")
avg_predictions(mod, newdata="balanced", by="agecat")
dat["incentive"].value_counts()
avg_predictions(mod, by="incentive")
avg_predictions(mod,
  variables={"incentive": [0, 1]},
  by="incentive")

Section 5.5

Null hypothesis tests

p = avg_predictions(mod, by="agecat")
p["estimate"][2]-p["estimate"][1]
avg_predictions(mod, by="agecat", hypothesis = "b2 - b1 = 0")
avg_predictions(mod, by="agecat",
  hypothesis = "difference ~ sequential") 
avg_predictions(mod, by="agecat",
  hypothesis = "difference ~ reference") 
avg_predictions(mod, by=["incentive","agecat"])
avg_predictions(mod, by=["incentive","agecat"],
  hypothesis = "difference ~ sequential | incentive")

Equivalence tests

avg_predictions(mod,
  by="agecat",
  hypothesis = "b2 - b0 = 0",
  equivalence = [-0.1, 0.1])

Section 5.6

Unit predictions

p = predictions(mod)
p = p.with_columns(pl.col("incentive").cast(pl.Utf8))
plot = (
  ggplot(p, aes(x="estimate", fill="incentive")) +
  geom_histogram() +
  labs(x="Pr(outcome=1) when incentive = 0"))
plot = (
  ggplot(p, aes(x="estimate", colour="incentive")) +
  geom_line(stat='ecdf'))

Marginal predictions

avg_predictions(mod, by="incentive")
plot_predictions(mod, by="incentive").show()
plot_predictions(mod, by=["incentive", "agecat"]).show()
plot_predictions(mod, by="incentive",
  newdata = "balanced", draw = False)

Conditional predictions

plot_predictions(mod, condition="distance").show()
plot_predictions(mod, condition=["distance","incentive"]).show()
plot_predictions(mod,
  condition=["distance","incentive", "agecat"]
).show()
plot_predictions(mod,
  condition={"distance" : None, "agecat" : ">35", "incentive" : 0 }
).show()

Customization

plot_predictions(mod, by="incentive", draw = False)

6. Counterfactual comparisons

dat = get_dataset("thornton")
mod = logit("outcome ~ incentive * (agecat + distance)",
  data=dat.to_pandas()).fit()
mod.summary()

Section 6.1.1

grid = pl.DataFrame({
    "distance": 2,
    "agecat": ["18 to 35"],
    "incentive": 1
})

g_treatment = grid.with_columns(pl.lit(1).alias("incentive"))
g_control = grid.with_columns(pl.lit(0).alias("incentive"))
p_treatment = mod.predict(g_treatment.to_pandas())
p_control = mod.predict(g_control.to_pandas())
p_treatment - p_control

comparisons(mod, variables = "incentive", newdata = grid)

Section 6.1.2

comparisons(mod,
  variables = "incentive",
  comparison = "ratio",
  hypothesis = 1,
  newdata = grid)

comparisons(mod,
  variables = "incentive",
  comparison = "lift",
  hypothesis = 1,
  newdata = grid)

Section 6.2

Focal variables.

comparisons(mod,
  variables="incentive",
  newdata=grid)
comparisons(mod,
  variables={"incentive" : [1, 0]},
  newdata=grid)
comparisons(mod,
  variables="agecat",
  newdata=grid)
comparisons(mod,
  variables={"agecat" : ["18 to 35", ">35"]},
  newdata=grid)

# Increase of 5 units
comparisons(mod, variables={"distance": 5}, newdata=grid)
# Increase of 1 standard deviation
comparisons(mod, variables={"distance": "sd"}, newdata=grid)
# Change between specific values
comparisons(mod, variables={"distance" : [0, 3]}, newdata=grid)
# Change across the interquartile range
comparisons(mod, variables={"distance" : "iqr"}, newdata=grid)
# Change across the full range
comparisons(mod, variables={"distance": "minmax"}, newdata=grid)

Cross-comparisons

comparisons(mod,
  variables = ["incentive", "distance"],
  cross = True,
  newdata = grid)

Adjustment variables

comparisons(mod, variables = "incentive")
cmp = comparisons(mod)
cmp.shape

comparisons(mod,
  variables = "incentive", newdata = datagrid(
    agecat = dat["agecat"].unique(),
    distance = dat["distance"].mean()))    
comparisons(mod, variables = "incentive", newdata = "mean")
comparisons(mod, variables = "incentive", newdata = "balanced")

Section 6.3

avg_comparisons(mod, variables = "incentive")
cmp = comparisons(mod, variables = "incentive")
cmp["estimate"].mean()
avg_comparisons(mod, variables = "incentive", by = "agecat")
avg_comparisons(mod,
  variables = "incentive",
  newdata = dat.filter(pl.col("incentive") == 1))

penguins = get_dataset("penguins", "palmerpenguins") \
  .drop_nulls(subset = "species")
penguins \
  .select("species", "body_mass_g", "flipper_length_mm") \
  .group_by("species").mean()
fit = ols(
  "flipper_length_mm ~ body_mass_g * species",
  data = penguins.to_pandas()).fit()
avg_predictions(fit, by = "species")
avg_predictions(fit, variables = "species", by = "species")
avg_comparisons(fit, variables = {"species": "sequential"})

Section 6.5

avg_comparisons(mod, variables = "incentive", by = "agecat")
avg_comparisons(mod, variables = "incentive", by = "agecat",
  hypothesis = "b0 - b2 = 0")

Section 6.6

avg_comparisons(mod,
  variables = "incentive",
  by = "agecat")
plot_comparisons(mod,
  variables = "incentive",
  by = "agecat").show()
plot_comparisons(mod,
  variables = "incentive",
  condition = "distance").show()
plot_comparisons(mod,
  variables = "incentive",
  condition = ["distance", "agecat"]).show()

7. Slopes

import polars as pl
import numpy as np
from marginaleffects import *
from plotnine import *
from statsmodels.formula.api import logit, ols

np.random.seed(48103)
N = int(1e6)
X = np.random.normal(loc=0, scale=2, size=N)
p = 1 / (1 + np.exp(-(-1 + 0.5 * X)))
Y = np.random.binomial(n=1, p=p)
dat = pl.DataFrame({"Y": Y, "X": X})
mod = logit("Y ~ X", data=dat.to_pandas()).fit()
mod.params
slopes(
  mod,
  variables="X",
  newdata=datagrid(X = [-5, 0, 10]))

Section 7.2

dat = get_dataset("thornton")
mod = logit(
  "outcome ~ incentive * distance * I(distance**2)",
  data = dat.to_pandas()).fit()
slopes(
  mod,
  variables = "distance",
  newdata = datagrid(incentive = 1, distance = 1))
slopes(
  mod,
  variables = "distance",
  newdata = "mean")
slopes(mod, variables = "distance")

Section 7.3

avg_slopes(mod, variables = "distance")
avg_slopes(mod, variables = "distance", by = "incentive")

Section 7.5

avg_slopes(mod, variables = "distance", by = "incentive")
avg_slopes(mod,
  variables = "distance",
  by = "incentive",
  hypothesis = "b0 - b1 = 0")

Section 7.6

plot_predictions(mod, condition = "distance").show()
plot_slopes(mod, variables = "distance",
  condition = "distance").show()
plot_slopes(mod,
  variables = "distance",
  condition = ["distance", "incentive"]).show()
plot_slopes(mod,
  variables = "distance",
  by = "incentive").show()

8. Causal inference with G-computation

Section 8.1

dat = get_dataset("lottery")
dat = dat.filter((pl.col('win_big') == 1) | (pl.col('win') == 0))
mod = smf.ols("earnings_post_avg ~ win_big * (
  tickets + man + work + age + education + college + year + 
  earnings_pre_1 + earnings_pre_2 + earnings_pre_3)",
  data=dat).fit()
mod.summary()

d0 = dat.with_columns(pl.lit(0).alias('win_big'))
d1 = dat.with_columns(pl.lit(1).alias('win_big'))
p0 = predictions(mod, newdata = d0)
p1 = predictions(mod, newdata = d1)
dat[5, 'win_big']
p0[5, 'estimate']
p1[5, 'estimate']

p0.select('estimate').mean()
p1.select('estimate').mean()
avg_predictions(mod, variables = "win_big", by = "win_big")

avg_comparisons(mod, variables = "win_big")
avg_comparisons(mod, variables = "win_big",
  newdata=dat.filter(pl.col('win_big') == 1))
avg_comparisons(mod, variables = "win_big",
  newdata=dat.filter(pl.col('win_big') == 0))

Section 8.2

avg_comparisons(mod, variables="win_big", by="work")

9. Experiments

Section 9.1

dat = get_dataset("thornton")
mod = ols("outcome ~ incentive", data = dat.to_pandas()).fit()
mod.params
avg_comparisons(mod, variables = "incentive", vcov = "HC2")

mod = ols("outcome ~ incentive * (age + distance + hiv2004)",
  data=dat.to_pandas()).fit()
mod.params
avg_comparisons(mod, variables = "incentive", vcov = "HC2")

Section 9.2

dat = get_dataset("factorial_01")
mod = ols("Y ~ Ta + Tb + Ta:Tb", data = dat.to_pandas()).fit()
mod.params
plot_predictions(mod, by = ["Ta", "Tb"]).show()
avg_comparisons(mod, variables = "Ta",
  newdata = dat.filter(pl.col("Tb") == 0))
avg_comparisons(mod, variables=["Ta", "Tb"], cross=True)
avg_comparisons(mod, variables = "Ta", by = "Tb")
avg_comparisons(mod, variables = "Ta", by = "Tb",
  hypothesis = "b1 - b0 = 0")

10. Interactions and polynomials

Section 10.1

Categorical-by-categorical

dat = get_dataset("interaction_01").to_pandas()
mod = logit("Y ~ X * M", data=dat).fit()
avg_predictions(mod, by=["X", "M"])
plot_predictions(mod, by=["M", "X"]).show()
avg_comparisons(mod, variables = "X")
avg_comparisons(mod, variables = "X", by = "M")
avg_comparisons(mod,
  variables = "X",
  by = "M",
  hypothesis = "b2 - b0 = 0")

Categorical-by-continuous

dat = get_dataset("interaction_02")
mod = logit("Y ~ X * M", data=dat.to_pandas()).fit()
mod.summary()
fivenum = lambda x: np.quantile(x, [0, 0.25, 0.5, 0.75, 1])
range = lambda x: np.quantile(x, [0, 1])
p = predictions(mod, newdata=datagrid(X = [0, 1], M = fivenum))
plot_predictions(mod, condition = ["M", "X"]).show()
avg_comparisons(mod, variables = "X")
comparisons(mod, variables = "X", newdata=datagrid(M = range))
comparisons(mod, variables = "X",
  newdata=datagrid(M = range),
  hypothesis = "b1 - b0 = 0")

Continuous-by-continuous

dat = get_dataset("interaction_03")
mod = logit("Y ~ X * M", data=dat).fit()
predictions(mod, newdata = datagrid(
  X = [-2, 2], 
  M = [-1, 0, 1]))
plot_predictions(mod, condition=["X", "M"]).show()
avg_slopes(mod, variables = "X")
slopes(mod, variables = "X", newdata = datagrid(M = fivenum))
plot_slopes(mod, variables = "X", condition = "M").show()
slopes(mod, variables = "X", newdata=datagrid(M = range))
slopes(mod, variables = "X", newdata=datagrid(M = range),
  hypothesis = "b1 - b0 = 0")

Multiple interactions

dat = get_dataset("interaction_04")
mod = logit("Y ~ X * M1 * M2", data = dat).fit()
mod.summary()
plot_predictions(mod, by = ["X", "M1", "M2"]).show()
avg_predictions(mod, newdata = datagrid(X = 0, M1 = 0, M2 = 0))
avg_comparisons(mod, variables = "X")
avg_comparisons(mod, variables = "X", by = "M1",
  hypothesis = "b1 - b0 = 0")
avg_comparisons(mod,
  variables = "X", by = ["M2", "M1"])
avg_comparisons(mod,
  hypothesis = "b1 - b0 = 0",
  variables = "X", by = ["M2", "M1"])
avg_comparisons(mod,
  hypothesis = "b3 - b2 = 0",
  variables = "X", by = ["M2", "M1"])
avg_comparisons(mod,
  hypothesis = "(b1 - b0) - (b3 - b2) = 0",
  variables = "X", by = ["M2", "M1"])

Section 10.2

dat = get_dataset("polynomial_01").to_pandas()
mod_linear = ols("Y ~ X", data = dat).fit()
plot_predictions(mod_linear, condition="X", points=0.05).show()
mod_cubic = ols("Y ~ X + I(X**2) + I(X**3)", data = dat).fit()
plot_predictions(mod_cubic, condition="X", points=0.05).show()
slopes(mod_cubic, variables="X", newdata=datagrid(X=[-2, 0, 2]))

dat = get_dataset("polynomial_02").to_pandas()
mod_cubic = ols("Y ~ X + I(X**2) + I(X**3)", data = dat).fit()
plot_predictions(mod_cubic, condition="X", points = 0.1).show()
mod_cubic_interaction = ols("Y ~ M * (X + I(X**2) + I(X**3))",
  data=dat).fit()
plot_predictions(mod_cubic_interaction, 
  condition=["X", "M"], points=0.1).show()
fivenum = lambda x: np.quantile(x, [0, 0.25, 0.5, 0.75, 1])
slopes(mod_cubic_interaction, variables="X",
  newdata=datagrid(M=[0, 1], X=fivenum))

13. Machine learning

from marginaleffects import *
import numpy as np
import polars.selectors as cs
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import (OneHotEncoder,
                                   FunctionTransformer)
from sklearn.compose import make_column_transformer
from xgboost import XGBRegressor

airbnb = get_dataset("airbnb")

airbnb[:5, :6]

train, test = train_test_split(airbnb, train_size = 3/4)

Section 13.2

# Function to convert data frames into X and y matrices
def df_to_xy(data):
  y = data.select(cs.by_name("price", require_all=False))
  X = data.select(~cs.by_name("price", require_all=False))
  return y, X

# Convert categorical variables to dummies
catvar = airbnb.select(~cs.numeric()).columns
preprocessor = make_column_transformer(
  (OneHotEncoder(), catvar),
  remainder=FunctionTransformer(lambda x: x.to_numpy()),)

# Scikit-Learn pipeline: pre-process and fit
pipeline = make_pipeline(preprocessor, XGBRegressor())

# Run the pipeline
mod = fit_sklearn(
  df_to_xy,
  data=train,
  engine=pipeline,)

avg_predictions(mod, by = "unit_type", newdata = test)

plot_predictions(mod, by = ["bedrooms", "unit_type"]).show()

airbnb_subset = train.sample(10000)

grid = datagrid(
  bedrooms = np.unique,
  unit_type = np.unique,
  newdata = airbnb_subset,
  grid_type = "counterfactual")

plot_predictions(mod,
  by = ["bedrooms", "unit_type"],
  newdata = grid).show()

Section 13.3

avg_comparisons(mod, variables = {"bedrooms": 2})
avg_comparisons(mod,
  variables = ["bedrooms", "Wireless Internet"],
  cross = True)

15.1 Roadmap

At the time of writing (April 2025), the following features were not available in the Python version of marginaleffects. Please check marginaleffects.com for developments.

Bayesian models.
Mixed effects models.
Categorical outcome models.
Functions in the comparison argument of comparisons().
Rug plots in plot_*() functions.
Bootstrap, simulation-based inference, and conformal prediction.
Multiple comparison correction.
Clustered standard errors.
Robust standard errors and link scale predictions for logistic regression models with Statsmodels.