Simulated data for the Model to Meaning book

The code below generates simulated datasets used in the book’s examples and tutorials.

# Multiplicative interactions: X x M
set.seed(1024)
N <- 5000
X <- rbinom(N, 1, .5)
M <- sample(c("a", "b", "c"), N, replace = TRUE)
b <- runif(8, -1, 1)
Y <- rbinom(N, 1, prob = plogis(
  b[1] + b[2] * X +
    b[3] * (M == "b") + b[4] * (M == "b") + b[5] * (M == "c") +
    b[6] * X * (M == "a") + b[7] * X + (M == "b") +
    b[8] * X * (M == "c")
))
dat <- data.frame(Y, X, M)
data.table::fwrite(dat, "interaction_01.csv")
nanoparquet::write_parquet(dat, "interaction_01.parquet")

# Multiplicative interactions: X x M
set.seed(1024)
N <- 5000
X <- rbinom(N, 1, .5)
M <- rnorm(N)
b <- runif(4, -1, 1)
Y <- rbinom(N, 1, prob = plogis(
  b[1] + b[2] * X + b[3] * M + b[4] * X * M
))
dat <- data.frame(Y, X, M)
data.table::fwrite(dat, "interaction_02.csv")
nanoparquet::write_parquet(dat, "interaction_02.parquet")

# Multiplicative interactions: X x M
set.seed(1024)
N <- 5000
X <- rnorm(N)
M <- rnorm(N)
b <- runif(4, -1, 1)
Y <- rbinom(N, 1, prob = plogis(
  b[1] + b[2] * X + b[3] * M + b[4] * X * M
))
dat <- data.frame(Y, X, M)
data.table::fwrite(dat, "interaction_03.csv")
nanoparquet::write_parquet(dat, "interaction_03.parquet")

# Multiplicative interactions: X x M1 x M2
set.seed(1024)
N <- 5000
X <- rbinom(N, 1, .5)
M1 <- rbinom(N, 1, .5)
M2 <- rbinom(N, 1, .5)
b <- runif(8, -1, 1)
Y <- rbinom(N, 1, prob = plogis(
  b[1] +
    b[2] * X + b[3] * M1 + b[4] * M2 +
    b[5] * X * M1 + b[6] * X * M2 + b[7] * M1 * M2 +
    b[8] * X * M1 * M2
))
dat <- data.frame(Y, X, M1, M2)
data.table::fwrite(dat, "interaction_04.csv")
nanoparquet::write_parquet(dat, "interaction_04.parquet")

# Polynomial regression: X and Y
set.seed(1024)
N <- 1e3
FUN <- function(X) 2.5 - X^2
X <- runif(N, min = -3, max = 3)
Y <- FUN(X) + rnorm(N, sd = .5)
dat <- data.frame(X, Y)
data.table::fwrite(dat, "polynomial_01.csv")
nanoparquet::write_parquet(dat, "polynomial_01.parquet")

# Polynomial regression: X, D, and Y
set.seed(1024)
N <- 1e3
X <- runif(N, min = -3, max = 3)
M <- rbinom(N, size = 1, prob = .5)
Y <- 2.5 - X^2 - 5 * M + 2 * M * X^2 + rnorm(N, .5)
dat <- data.frame(X, M, Y)
data.table::fwrite(dat, "polynomial_02.csv")
nanoparquet::write_parquet(dat, "polynomial_02.parquet")


# Factorial experiment
dat <- setNames(mtcars[, c("mpg", "vs", "am")], c("Y", "Ta", "Tb"))
dat <- sort_by(dat, ~ Ta + Tb)
data.table::fwrite(dat, "factorial_01.csv")
nanoparquet::write_parquet(dat, "factorial_01.parquet")