penguins |>
ggplot(aes(body_mass_g, flipper_length_mm)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
facet_wrap(~species)
2025-03-06
lm()
lm()
lm()
penguins_adelie <- filter(penguins, species == "Adelie")
lm_out <- lm(flipper_length_mm ~ body_mass_g, data = penguins_adelie)
summary(lm_out)
Call:
lm(formula = flipper_length_mm ~ body_mass_g, data = penguins_adelie)
Residuals:
Min 1Q Median 3Q Max
-14.2769 -3.6192 0.0569 3.4696 18.0477
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.652e+02 3.849e+00 42.929 < 2e-16 ***
body_mass_g 6.677e-03 1.032e-03 6.468 1.34e-09 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 5.798 on 149 degrees of freedom
(1 observation deleted due to missingness)
Multiple R-squared: 0.2192, Adjusted R-squared: 0.214
F-statistic: 41.83 on 1 and 149 DF, p-value: 1.343e-09
map()
to fit models to groups of datamap()
to fit models to groups of datapenguins |>
nest(data = -species) |>
mutate(
# apply linear model to each nested data frame
fit = map(data, ~lm(flipper_length_mm ~ body_mass_g, data = .x))
)
# A tibble: 3 × 3
species data fit
<fct> <list> <list>
1 Adelie <tibble [152 × 7]> <lm>
2 Gentoo <tibble [124 × 7]> <lm>
3 Chinstrap <tibble [68 × 7]> <lm>
map()
to fit models to groups of datalm_data <- penguins |>
nest(data = -species) |>
mutate(
# apply linear model to each nested data frame
fit = map(data, ~lm(flipper_length_mm ~ body_mass_g, data = .x))
)
lm_data$fit[[1]] # first model fit, for Adelie species
Call:
lm(formula = flipper_length_mm ~ body_mass_g, data = .x)
Coefficients:
(Intercept) body_mass_g
1.652e+02 6.677e-03
Call:
lm(formula = flipper_length_mm ~ body_mass_g, data = .x)
Residuals:
Min 1Q Median 3Q Max
-14.2769 -3.6192 0.0569 3.4696 18.0477
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.652e+02 3.849e+00 42.929 < 2e-16 ***
body_mass_g 6.677e-03 1.032e-03 6.468 1.34e-09 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 5.798 on 149 degrees of freedom
(1 observation deleted due to missingness)
Multiple R-squared: 0.2192, Adjusted R-squared: 0.214
F-statistic: 41.83 on 1 and 149 DF, p-value: 1.343e-09
Call:
lm(formula = flipper_length_mm ~ body_mass_g, data = .x)
Residuals:
Min 1Q Median 3Q Max
-12.0194 -2.7401 0.1781 2.9859 8.9806
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.713e+02 4.244e+00 40.36 <2e-16 ***
body_mass_g 9.039e-03 8.321e-04 10.86 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 4.633 on 121 degrees of freedom
(1 observation deleted due to missingness)
Multiple R-squared: 0.4937, Adjusted R-squared: 0.4896
F-statistic: 118 on 1 and 121 DF, p-value: < 2.2e-16
Call:
lm(formula = flipper_length_mm ~ body_mass_g, data = .x)
Residuals:
Min 1Q Median 3Q Max
-14.4296 -3.3315 0.4097 2.8889 11.5941
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.514e+02 6.575e+00 23.024 < 2e-16 ***
body_mass_g 1.191e-02 1.752e-03 6.795 3.75e-09 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 5.512 on 66 degrees of freedom
Multiple R-squared: 0.4116, Adjusted R-squared: 0.4027
F-statistic: 46.17 on 1 and 66 DF, p-value: 3.748e-09
glance()
and tidy()
glance()
provides model-wide summary estimates in tidy format# A tibble: 1 × 12
r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 0.219 0.214 5.80 41.8 0.00000000134 1 -479. 963. 972.
# ℹ 3 more variables: deviance <dbl>, df.residual <int>, nobs <int>
tidy()
provides information about regression coefficients in tidy formatmap()
map()
# A tibble: 3 × 4
species data fit glance_out
<fct> <list> <list> <list>
1 Adelie <tibble [152 × 7]> <lm> <tibble [1 × 12]>
2 Gentoo <tibble [124 × 7]> <lm> <tibble [1 × 12]>
3 Chinstrap <tibble [68 × 7]> <lm> <tibble [1 × 12]>
map()
map()
lm_data |>
mutate(
glance_out = map(fit, glance) # apply `glance()` to each fitted model
) |>
select(species, glance_out) |> # only keep species name and glance output
unnest(cols = glance_out)
# A tibble: 3 × 13
species r.squared adj.r.squared sigma statistic p.value df logLik AIC
<fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Adelie 0.219 0.214 5.80 41.8 1.34e- 9 1 -479. 963.
2 Gentoo 0.494 0.490 4.63 118. 1.33e-19 1 -362. 730.
3 Chinstrap 0.412 0.403 5.51 46.2 3.75e- 9 1 -212. 429.
# ℹ 4 more variables: BIC <dbl>, deviance <dbl>, df.residual <int>, nobs <int>
lm_summary <- penguins |>
nest(data = -species) |>
mutate(
fit = map(data, ~lm(flipper_length_mm ~ body_mass_g, data = .x)),
glance_out = map(fit, glance)
) |>
select(species, glance_out) |>
unnest(cols = glance_out)
lm_summary
# A tibble: 3 × 13
species r.squared adj.r.squared sigma statistic p.value df logLik AIC
<fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Adelie 0.219 0.214 5.80 41.8 1.34e- 9 1 -479. 963.
2 Gentoo 0.494 0.490 4.63 118. 1.33e-19 1 -362. 730.
3 Chinstrap 0.412 0.403 5.51 46.2 3.75e- 9 1 -212. 429.
# ℹ 4 more variables: BIC <dbl>, deviance <dbl>, df.residual <int>, nobs <int>
label_data <- lm_summary |>
mutate(
rsqr = signif(r.squared, 2), # round to 2 significant digits
pval = signif(p.value, 2),
label = glue("R^2 = {rsqr}, P = {pval}"),
body_mass_g = 6400, flipper_length_mm = 175 # label position in plot
) |>
select(species, label, body_mass_g, flipper_length_mm)
label_data
# A tibble: 3 × 4
species label body_mass_g flipper_length_mm
<fct> <glue> <dbl> <dbl>
1 Adelie R^2 = 0.22, P = 1.3e-09 6400 175
2 Gentoo R^2 = 0.49, P = 1.3e-19 6400 175
3 Chinstrap R^2 = 0.41, P = 3.7e-09 6400 175