diff --git a/docs/.gitignore b/docs/.gitignore new file mode 100644 index 0000000..8b02363 --- /dev/null +++ b/docs/.gitignore @@ -0,0 +1 @@ +_extensions diff --git a/docs/index.html b/docs/index.html index f01b337..f3841dc 100644 --- a/docs/index.html +++ b/docs/index.html @@ -68,8 +68,11 @@

Generalised Linear Models in R

Links to interactive tutorials (made with Quarto and webR):

+

Linear Models

diff --git a/docs/index.qmd b/docs/index.qmd index 9bd1d8a..6315416 100644 --- a/docs/index.qmd +++ b/docs/index.qmd @@ -6,4 +6,10 @@ format: html Links to interactive tutorials (made with Quarto and webR): -- [Linear Models](lm_webR.html) +**Linear Models** + +- [Interactive tutorial](lm_webR.html) + +- [Rendered document](lm.html) + +- [R code](lm.R) diff --git a/docs/lm.R b/docs/lm.R new file mode 100644 index 0000000..d2e8b4a --- /dev/null +++ b/docs/lm.R @@ -0,0 +1,490 @@ +## ---------------------------------------------------------------------------------------------------- +download.file("https://raw.githubusercontent.com/Pakillo/LM-GLM-GLMM-intro/trees/data/trees.csv", + destfile = "trees.csv", mode = "wb") + + +## ---------------------------------------------------------------------------------------------------- +trees <- read.csv("trees.csv") +head(trees) + + +## ---------------------------------------------------------------------------------------------------- +plot(trees$height) + + +## ---------------------------------------------------------------------------------------------------- +hist(trees$height) + + +## ---------------------------------------------------------------------------------------------------- +hist(trees$dbh) + + +## ---------------------------------------------------------------------------------------------------- +plot(height ~ dbh, data = trees, las = 1) + + +## ---------------------------------------------------------------------------------------------------- +m1 <- lm(height ~ dbh, data = trees) + + +## ---------------------------------------------------------------------------------------------------- +#| eval: false +## library("equatiomatic") +## m1 <- lm(height ~ dbh, data = trees) +## equatiomatic::extract_eq(m1) + + +## ---------------------------------------------------------------------------------------------------- +#| eval: false +## equatiomatic::extract_eq(m1, use_coefs = TRUE) + + +## ---------------------------------------------------------------------------------------------------- +summary(m1) + + +## ---------------------------------------------------------------------------------------------------- +library("easystats") + + +## ---------------------------------------------------------------------------------------------------- +parameters(m1)[1,] + + +## ---------------------------------------------------------------------------------------------------- +plot(simulate_parameters(m1), show_intercept = TRUE) + + +## ---------------------------------------------------------------------------------------------------- +parameters::parameters(m1)[2,] + + +## ---------------------------------------------------------------------------------------------------- +plot(simulate_parameters(m1)) + + +## ---------------------------------------------------------------------------------------------------- +hist(residuals(m1)) + + +## ---------------------------------------------------------------------------------------------------- +coef(m1) + + +## ---------------------------------------------------------------------------------------------------- +confint(m1) + + +## ---------------------------------------------------------------------------------------------------- +parameters(m1) + + +## ---------------------------------------------------------------------------------------------------- +report(m1) + + +## ---------------------------------------------------------------------------------------------------- +library("modelsummary") + + +## ---------------------------------------------------------------------------------------------------- +modelsummary(m1, output = "html") ## Word, PDF, PowerPoint, png... + + +## ---------------------------------------------------------------------------------------------------- +modelsummary(m1, fmt = 2, + estimate = "{estimate} ({std.error})", + statistic = NULL, + gof_map = c("nobs", "r.squared", "rmse"), + output = "html") + + +## ---------------------------------------------------------------------------------------------------- +library("visreg") + + +## ---------------------------------------------------------------------------------------------------- +visreg(m1) + + +## ---------------------------------------------------------------------------------------------------- +#| eval: false +## visreg(m1, gg = TRUE) + theme_bw() + + +## ---------------------------------------------------------------------------------------------------- +plot(estimate_expectation(m1)) + + +## ---------------------------------------------------------------------------------------------------- +modelplot(m1) + + +## ---------------------------------------------------------------------------------------------------- +plot(parameters(m1), show_intercept = TRUE, show_labels = TRUE) + + +## ---------------------------------------------------------------------------------------------------- +plot(simulate_parameters(m1)) + + +## ---------------------------------------------------------------------------------------------------- +hist(residuals(m1)) + + +## ---------------------------------------------------------------------------------------------------- +def.par <- par(no.readonly = TRUE) +layout(matrix(1:4, nrow=2)) +plot(m1) +par(def.par) + + +## ---------------------------------------------------------------------------------------------------- +check_model(m1) + + +## ---------------------------------------------------------------------------------------------------- +#| eval: false +## model_dashboard(m1) + + +## ---------------------------------------------------------------------------------------------------- +pred <- estimate_expectation(m1) +head(pred) + + +## ---------------------------------------------------------------------------------------------------- +plot(estimate_expectation(m1)) + + +## ---------------------------------------------------------------------------------------------------- +pred$height.obs <- trees$height +plot(height.obs ~ Predicted, data = pred, xlim = c(15, 60), ylim = c(15, 60)) +abline(a = 0, b = 1) + + +## ---------------------------------------------------------------------------------------------------- +pred <- estimate_prediction(m1) +head(pred) + + +## ---------------------------------------------------------------------------------------------------- +plot(estimate_expectation(m1)) + + +## ---------------------------------------------------------------------------------------------------- +plot(estimate_prediction(m1)) + + +## ---------------------------------------------------------------------------------------------------- +estimate_expectation(m1, data = data.frame(dbh = 39)) + + +## ---------------------------------------------------------------------------------------------------- +estimate_prediction(m1, data = data.frame(dbh = 39)) + + +## ---------------------------------------------------------------------------------------------------- +boxplot(height ~ sex, data = trees) + + +## ---------------------------------------------------------------------------------------------------- +m2 <- lm(height ~ sex, data = trees) +summary(m2) + + +## ---------------------------------------------------------------------------------------------------- +m2 <- lm(height ~ sex, data = trees) + + +## ---------------------------------------------------------------------------------------------------- +m2 <- lm(height ~ sex, data = trees) +summary(m2) + + +## ---------------------------------------------------------------------------------------------------- +report(m2) + + +## ---------------------------------------------------------------------------------------------------- +parameters(m2)[1,] + + +## ---------------------------------------------------------------------------------------------------- +plot(simulate_parameters(m2), show_intercept = TRUE) + + +## ---------------------------------------------------------------------------------------------------- +parameters(m2)[2,] + + +## ---------------------------------------------------------------------------------------------------- +plot(simulate_parameters(m2)) + + +## ---------------------------------------------------------------------------------------------------- +estimate_means(m2) + + +## ---------------------------------------------------------------------------------------------------- +estimate_contrasts(m2) + + +## ---------------------------------------------------------------------------------------------------- +visreg(m2) + + +## ---------------------------------------------------------------------------------------------------- +plot(estimate_means(m2)) + + +## ---------------------------------------------------------------------------------------------------- +hist(resid(m2)) + + +## ---------------------------------------------------------------------------------------------------- +def.par <- par(no.readonly = TRUE) +layout(matrix(1:4, nrow=2)) +plot(m2) +par(def.par) + + +## ---------------------------------------------------------------------------------------------------- +check_model(m2) + + +## ---------------------------------------------------------------------------------------------------- +plot(height ~ site, data = trees) + + +## ---------------------------------------------------------------------------------------------------- +m3 <- lm(height ~ site, data = trees) + + +## ---------------------------------------------------------------------------------------------------- +m3 <- lm(height ~ site, data = trees) +summary(m3) + + +## ---------------------------------------------------------------------------------------------------- +trees$site <- as.factor(trees$site) + + +## ---------------------------------------------------------------------------------------------------- +m3 <- lm(height ~ site, data = trees) +summary(m3) + + +## ---------------------------------------------------------------------------------------------------- +plot(simulate_parameters(m3)) + + +## ---------------------------------------------------------------------------------------------------- +estimate_means(m3) + + +## ---------------------------------------------------------------------------------------------------- +plot(estimate_means(m3)) + + +## ---------------------------------------------------------------------------------------------------- +estimate_contrasts(m3) + + +## ---------------------------------------------------------------------------------------------------- +library("marginaleffects") + + +## ---------------------------------------------------------------------------------------------------- +hypotheses(m3, "site2 = site9") + + +## ---------------------------------------------------------------------------------------------------- +parameters(m3) + + +## ---------------------------------------------------------------------------------------------------- +modelsummary(m3, estimate = "{estimate} ({std.error})", statistic = NULL, + fmt = 1, gof_map = NA, coef_rename = paste0("site", 1:10), output = "html") + + +## ---------------------------------------------------------------------------------------------------- +visreg(m3) + + +## ---------------------------------------------------------------------------------------------------- +plot(estimate_means(m3)) + + +## ---------------------------------------------------------------------------------------------------- +modelplot(m3) + + +## ---------------------------------------------------------------------------------------------------- +plot(parameters(m3), show_intercept = TRUE) + + +## ---------------------------------------------------------------------------------------------------- +m3bis <- lm(height ~ site - 1, data = trees) +summary(m3bis) + + +## ---------------------------------------------------------------------------------------------------- +plot(parameters(m3bis)) + + +## ---------------------------------------------------------------------------------------------------- +def.par <- par(no.readonly = TRUE) +layout(matrix(1:4, nrow = 2)) +plot(m3) +par(def.par) + + +## ---------------------------------------------------------------------------------------------------- +check_model(m3) + + +## ---------------------------------------------------------------------------------------------------- +lm(height ~ site + dbh, data = trees) + + +## ---------------------------------------------------------------------------------------------------- +m4 <- lm(height ~ site + dbh, data = trees) +summary(m4) + + +## ---------------------------------------------------------------------------------------------------- +parameters(m4) + + +## ---------------------------------------------------------------------------------------------------- +estimate_means(m4) + + +## ---------------------------------------------------------------------------------------------------- +m4 <- lm(height ~ -1 + site + dbh, data = trees) +summary(m4) + + +## ---------------------------------------------------------------------------------------------------- +visreg(m4) + + +## ---------------------------------------------------------------------------------------------------- +visreg(m4, xvar = "dbh", by = "site", overlay = TRUE, band = FALSE) + + +## ---------------------------------------------------------------------------------------------------- +plot(parameters(m4)) + + +## ---------------------------------------------------------------------------------------------------- +plot(parameters(m4, drop = "dbh")) + + +## ---------------------------------------------------------------------------------------------------- +modelplot(m4) + + +## ---------------------------------------------------------------------------------------------------- +modelplot(m4, coef_omit = "dbh") + + +## ---------------------------------------------------------------------------------------------------- +visreg(m3) + + +## ---------------------------------------------------------------------------------------------------- +visreg(m4, xvar = "site") + + +## ---------------------------------------------------------------------------------------------------- +boxplot(dbh ~ site, data = trees) + + +## ---------------------------------------------------------------------------------------------------- +aggregate(trees$dbh ~ trees$site, FUN = mean) + + +## ---------------------------------------------------------------------------------------------------- +aggregate(trees$height ~ trees$site, FUN = mean) + + +## ---------------------------------------------------------------------------------------------------- +visreg(m4, xvar = "dbh", by = "site", overlay = TRUE, band = FALSE) + + +## ---------------------------------------------------------------------------------------------------- +parameters(m4, keep = "dbh") + + +## ---------------------------------------------------------------------------------------------------- +def.par <- par(no.readonly = TRUE) +layout(matrix(1:4, nrow=2)) +plot(m4) +par(def.par) + + +## ---------------------------------------------------------------------------------------------------- +check_model(m4) + + +## ---------------------------------------------------------------------------------------------------- +trees$height.pred <- fitted(m4) +plot(trees$height.pred, trees$height, + xlab = "Tree height (predicted)", + ylab = "Tree height (observed)", + las = 1, xlim = c(10,60), ylim = c(10,60)) +abline(a = 0, b = 1) + + +## ---------------------------------------------------------------------------------------------------- +pred <- estimate_expectation(m4) +pred$obs <- trees$height +plot(obs ~ Predicted, data = pred, xlim = c(15, 60), ylim = c(15, 60)) +abline(a = 0, b = 1) + + +## ---------------------------------------------------------------------------------------------------- +check_predictions(m4) + + +## ---------------------------------------------------------------------------------------------------- +trees.10cm <- data.frame(site = as.factor(1:10), + dbh = 10) +trees.10cm + + +## ---------------------------------------------------------------------------------------------------- +pred <- estimate_expectation(m4, data = trees.10cm) +pred + + +## ---------------------------------------------------------------------------------------------------- +pred <- estimate_prediction(m4, data = trees.10cm) +pred + + +## ---------------------------------------------------------------------------------------------------- +df <- data.frame(dbh = seq(10, 50, by = 1), + height = seq(20, 60, by = 1)) + +plot(height ~ dbh, data = df, type = "n") +abline(a = 25, 0.6) +abline(a = 40, b = 0.1, col = "steelblue") +abline(a = 50, b = -0.3, col = "orangered") + + +## ---------------------------------------------------------------------------------------------------- +m5 <- lm(height ~ site*dbh, data = trees) +summary(m5) + + +## ---------------------------------------------------------------------------------------------------- +visreg(m5, xvar = "dbh", by = "site") + + +## ---------------------------------------------------------------------------------------------------- +visreg(m5, xvar = "dbh", by = "site", overlay = TRUE, band = FALSE) + diff --git a/docs/lm.html b/docs/lm.html new file mode 100644 index 0000000..673d425 --- /dev/null +++ b/docs/lm.html @@ -0,0 +1,2293 @@ + + + + + + + + + + +Linear models + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ +
+
+

Linear models

+
+ + + +
+ +
+
Author
+
+

Francisco Rodríguez-Sánchez

+
+
+ + + +
+ + +
+ +
+

A simple linear model

+
+

Example dataset: forest trees

+ +
+
download.file("https://raw.githubusercontent.com/Pakillo/LM-GLM-GLMM-intro/trees/data/trees.csv",
+              destfile = "trees.csv", mode = "wb")
+
+
    +
  • Import:
  • +
+
+
trees <- read.csv("trees.csv")
+head(trees)
+
+
  site   dbh height    sex dead
+1    4 29.68   36.1   male    0
+2    5 33.29   42.3   male    0
+3    2 28.03   41.9 female    0
+4    5 39.86   46.5 female    0
+5    1 47.94   43.9 female    0
+6    1 10.82   26.2   male    0
+
+
+
+
+

Questions

+
    +
  • What is the relationship between DBH and height?

  • +
  • Do taller trees have bigger trunks?

  • +
  • Can we predict height from DBH? How well?

  • +
+
+
+

Plot your data first!

+
+

Exploratory Data Analysis (EDA)

+

Outliers

+
+
plot(trees$height)
+
+

+
+
+
+
+

Histogram of response variable

+
+
hist(trees$height)
+
+

+
+
+
+
+

Histogram of predictor variable

+
+
hist(trees$dbh)
+
+

+
+
+
+
+

Scatterplot

+
+
plot(height ~ dbh, data = trees, las = 1)
+
+

+
+
+
+
+
+

Model fitting

+
+

Now fit model

+

Hint: lm

+
+
m1 <- lm(height ~ dbh, data = trees)
+
+

which corresponds to

+

\[ + \begin{aligned} + Height_{i} = a + b \cdot DBH_{i} + \varepsilon _{i} \\ + \varepsilon _{i}\sim N\left( 0,\sigma^2 \right) \\ + \end{aligned} +\]

+
+
+

Package equatiomatic returns model structure

+
+
library("equatiomatic")
+m1 <- lm(height ~ dbh, data = trees)
+equatiomatic::extract_eq(m1)
+
+
+
equatiomatic::extract_eq(m1, use_coefs = TRUE)
+
+
+
+
+

Model interpretation

+
+

What does this mean?

+
+
summary(m1)
+
+

+Call:
+lm(formula = height ~ dbh, data = trees)
+
+Residuals:
+     Min       1Q   Median       3Q      Max 
+-13.3270  -2.8978   0.1057   2.7924  12.9511 
+
+Coefficients:
+            Estimate Std. Error t value Pr(>|t|)    
+(Intercept) 19.33920    0.31064   62.26   <2e-16 ***
+dbh          0.61570    0.01013   60.79   <2e-16 ***
+---
+Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+
+Residual standard error: 4.093 on 998 degrees of freedom
+Multiple R-squared:  0.7874,    Adjusted R-squared:  0.7871 
+F-statistic:  3695 on 1 and 998 DF,  p-value: < 2.2e-16
+
+
+
+
+

Estimated distribution of the intercept parameter

+
+
library("easystats")
+
+
# Attaching packages: easystats 0.7.0
+✔ bayestestR  0.13.1   ✔ correlation 0.8.4 
+✔ datawizard  0.9.0    ✔ effectsize  0.8.6 
+✔ insight     0.19.6   ✔ modelbased  0.8.6 
+✔ performance 0.10.8   ✔ parameters  0.21.3
+✔ report      0.5.7    ✔ see         0.8.1 
+
+
+
+
parameters(m1)[1,]
+
+
Parameter   | Coefficient |   SE |         95% CI | t(998) |      p
+-------------------------------------------------------------------
+(Intercept) |       19.34 | 0.31 | [18.73, 19.95] |  62.26 | < .001
+
+
+

+Uncertainty intervals (equal-tailed) and p-values (two-tailed) computed
+  using a Wald t-distribution approximation.
+
+
+
+
plot(simulate_parameters(m1), show_intercept = TRUE)
+
+

+
+
+
+
+

Estimated distribution of the slope parameter

+
+
parameters::parameters(m1)[2,]
+
+
Parameter | Coefficient |   SE |       95% CI | t(998) |      p
+---------------------------------------------------------------
+dbh       |        0.62 | 0.01 | [0.60, 0.64] |  60.79 | < .001
+
+
+

+Uncertainty intervals (equal-tailed) and p-values (two-tailed) computed
+  using a Wald t-distribution approximation.
+
+
+
+
plot(simulate_parameters(m1))
+
+

+
+
+
+
+

Distribution of residuals

+
+
hist(residuals(m1))
+
+

+
+
+
+
+

Degrees of freedom

+

DF = n - p

+

n = sample size

+

p = number of estimated parameters

+
+
+

R-squared

+

Proportion of ‘explained’ variance

+

\(R^{2} = 1 - \frac{webr-residual Variation}{Total Variation}\)

+
+
+

Adjusted R-squared

+

Accounts for model complexity

+

(number of parameters)

+

\(R^2_{adj} = 1 - (1 - R^2) \frac{n - 1}{n - p - 1}\)

+
+
+

Quiz

+

https://pollev.com/franciscorod726

+
+
+

Retrieving model coefficients

+
+
coef(m1)
+
+
(Intercept)         dbh 
+ 19.3391968   0.6157036 
+
+
+
+
+

Confidence intervals for parameters

+
+
confint(m1)
+
+
                 2.5 %    97.5 %
+(Intercept) 18.7296053 19.948788
+dbh          0.5958282  0.635579
+
+
+
+
+

Retrieving model parameters (easystats)

+
+
parameters(m1)
+
+
Parameter   | Coefficient |   SE |         95% CI | t(998) |      p
+-------------------------------------------------------------------
+(Intercept) |       19.34 | 0.31 | [18.73, 19.95] |  62.26 | < .001
+dbh         |        0.62 | 0.01 | [ 0.60,  0.64] |  60.79 | < .001
+
+
+

+Uncertainty intervals (equal-tailed) and p-values (two-tailed) computed
+  using a Wald t-distribution approximation.
+
+
+

https://easystats.github.io/parameters/

+
+
+
+

Communicating results

+
+

Avoid dichotomania of statistical significance

+
    +
  • “Never conclude there is ‘no difference’ or ‘no association’ just because p > 0.05 or CI includes zero

  • +
  • Estimate and communicate effect sizes and their uncertainty

  • +
  • https://doi.org/10.1038/d41586-019-00857-9

  • +
+
+
+

Communicating results

+
    +
  • We found a significant relationship between DBH and Height (p<0.05).

  • +
  • We found a {significant} positive relationship between DBH and Height {(p<0.05)} (b = 0.61, SE = 0.01).

  • +
  • (add p-value if you wish)

  • +
+
+
+

Models that describe themselves (easystats)

+
+
report(m1)
+
+
We fitted a linear model (estimated using OLS) to predict height with dbh
+(formula: height ~ dbh). The model explains a statistically significant and
+substantial proportion of variance (R2 = 0.79, F(1, 998) = 3695.40, p < .001,
+adj. R2 = 0.79). The model's intercept, corresponding to dbh = 0, is at 19.34
+(95% CI [18.73, 19.95], t(998) = 62.26, p < .001). Within this model:
+
+  - The effect of dbh is statistically significant and positive (beta = 0.62, 95%
+CI [0.60, 0.64], t(998) = 60.79, p < .001; Std. beta = 0.89, 95% CI [0.86,
+0.92])
+
+Standardized parameters were obtained by fitting the model on a standardized
+version of the dataset. 95% Confidence Intervals (CIs) and p-values were
+computed using a Wald t-distribution approximation.
+
+
+

https://easystats.github.io/report/

+
+
+

Generating table with model results: modelsummary

+
+
library("modelsummary")
+
+

+Attaching package: 'modelsummary'
+
+
+
The following object is masked from 'package:parameters':
+
+    supported_models
+
+
+
The following object is masked from 'package:insight':
+
+    supported_models
+
+
+
+
modelsummary(m1, output = "html")  ## Word, PDF, PowerPoint, png...
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
 (1)
(Intercept)19.339
(0.311)
dbh0.616
(0.010)
Num.Obs.1000
R20.787
R2 Adj.0.787
AIC5660.3
BIC5675.0
Log.Lik.−2827.125
F3695.395
RMSE4.09
+ + +
+
+

https://modelsummary.com/

+
+
+

Generating table with model results: modelsummary

+
+
modelsummary(m1, fmt = 2, 
+             estimate = "{estimate} ({std.error})", 
+             statistic = NULL,
+             gof_map = c("nobs", "r.squared", "rmse"),
+             output = "html")
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
 (1)
(Intercept)19.34 (0.31)
dbh0.62 (0.01)
Num.Obs.1000
R20.787
RMSE4.09
+ + +
+
+
+
+
+

Visualising fitted model

+
+

Plot model: visreg

+
+
library("visreg")
+
+
+
visreg(m1)
+
+

+
+
+

visreg can use ggplot2 too

+
+
visreg(m1, gg = TRUE) + theme_bw()
+
+

https://pbreheny.github.io/visreg

+
+
+

Plot (easystats)

+
+
plot(estimate_expectation(m1))
+
+

+
+
+
+
+

Plot (modelsummary)

+
+
modelplot(m1)
+
+

+
+
+
+
+

Plot model parameters with easystats (see package)

+
+
plot(parameters(m1), show_intercept = TRUE, show_labels = TRUE)
+
+

+
+
+
+
+

Plot parameters’ estimated distribution

+
+
plot(simulate_parameters(m1))
+
+

+
+
+
+
+
+

Model checking

+
+

Linear model assumptions

+
    +
  • Linearity (transformations, GAM…)

  • +
  • Residuals:

    +
      +
    • Independent
    • +
    • Equal variance
    • +
    • Normal
    • +
  • +
  • Negligible measurement error in predictors

  • +
+
+
+

Are residuals normal?

+
+
hist(residuals(m1))
+
+

+
+
+

SD = 4.09

+
+
+

Model checking: plot(model)

+
+
def.par <- par(no.readonly = TRUE)
+layout(matrix(1:4, nrow=2))
+plot(m1)
+
+

+
+
par(def.par)
+
+
+
+

Model checking with performance (easystats)

+
+
check_model(m1)
+
+

+
+
+

https://easystats.github.io/performance/articles/check_model.html

+
+
+

A dashboard to explore the full model

+
+
model_dashboard(m1)
+
+
+
+
+

Making predictions with easystats

+
+

Estimate expected values

+
+
pred <- estimate_expectation(m1)
+head(pred)
+
+
Model-based Expectation
+
+dbh   | Predicted |   SE |         95% CI | Residuals
+-----------------------------------------------------
+29.68 |     37.61 | 0.13 | [37.36, 37.87] |     -1.51
+33.29 |     39.84 | 0.14 | [39.56, 40.11] |      2.46
+28.03 |     36.60 | 0.13 | [36.34, 36.85] |      5.30
+39.86 |     43.88 | 0.18 | [43.53, 44.23] |      2.62
+47.94 |     48.86 | 0.24 | [48.38, 49.33] |     -4.96
+10.82 |     26.00 | 0.22 | [25.58, 26.42] |      0.20
+
+Variable predicted: height
+
+
+
+
+

Expected values given DBH

+
+
plot(estimate_expectation(m1))
+
+

+
+
+
+
+

Calibration plot: observed vs predicted

+
+
pred$height.obs <- trees$height
+plot(height.obs ~ Predicted, data = pred, xlim = c(15, 60), ylim = c(15, 60))
+abline(a = 0, b = 1)
+
+

+
+
+
+
+

Estimate prediction interval

+

Accounting for residual variation!

+
+
pred <- estimate_prediction(m1)
+head(pred)
+
+
Model-based Prediction
+
+dbh   | Predicted |   SE |         95% CI | Residuals
+-----------------------------------------------------
+29.68 |     37.61 | 4.09 | [29.58, 45.65] |     -1.51
+33.29 |     39.84 | 4.10 | [31.80, 47.87] |      2.46
+28.03 |     36.60 | 4.09 | [28.56, 44.63] |      5.30
+39.86 |     43.88 | 4.10 | [35.84, 51.92] |      2.62
+47.94 |     48.86 | 4.10 | [40.81, 56.90] |     -4.96
+10.82 |     26.00 | 4.10 | [17.96, 34.04] |      0.20
+
+Variable predicted: height
+
+
+
+
+

Confidence vs Prediction interval

+
+
plot(estimate_expectation(m1))
+
+

+
+
+
+
plot(estimate_prediction(m1))
+
+

+
+
+
+
+

Make predictions for new data

+
+
estimate_expectation(m1, data = data.frame(dbh = 39))
+
+
Model-based Expectation
+
+dbh   | Predicted |   SE |         95% CI
+-----------------------------------------
+39.00 |     43.35 | 0.17 | [43.01, 43.69]
+
+Variable predicted: height
+
+
+
+
estimate_prediction(m1, data = data.frame(dbh = 39))
+
+
Model-based Prediction
+
+dbh   | Predicted |   SE |         95% CI
+-----------------------------------------
+39.00 |     43.35 | 4.10 | [35.31, 51.39]
+
+Variable predicted: height
+
+
+
+
+
+

Workflow

+
    +
  • Visualise data

  • +
  • Understand fitted model (summary)

  • +
  • Visualise model (visreg…)

  • +
  • Check model (plot, check_model, calibration plot…)

  • +
  • Predict (predict, estimate_expectation, estimate_prediction)

  • +
+
+
+
+

Categorical predictors (factors)

+
+

Q: Does tree height vary with sex?

+
+
boxplot(height ~ sex, data = trees)
+
+

+
+
+
+

Model height ~ sex

+
+
m2 <- lm(height ~ sex, data = trees)
+summary(m2)
+
+

+Call:
+lm(formula = height ~ sex, data = trees)
+
+Residuals:
+     Min       1Q   Median       3Q      Max 
+-22.6881  -6.7881  -0.0097   6.7261  22.3687 
+
+Coefficients:
+            Estimate Std. Error t value Pr(>|t|)    
+(Intercept)  36.9312     0.3981  92.778   <2e-16 ***
+sexmale      -0.8432     0.5607  -1.504    0.133    
+---
+Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+
+Residual standard error: 8.865 on 998 degrees of freedom
+Multiple R-squared:  0.002261,  Adjusted R-squared:  0.001261 
+F-statistic: 2.261 on 1 and 998 DF,  p-value: 0.133
+
+
+
+
+

Linear model with categorical predictors

+
+
m2 <- lm(height ~ sex, data = trees)
+
+

corresponds to

+

\[ + \begin{aligned} + Height_{i} = a + b_{male} + \varepsilon _{i} \\ + \varepsilon _{i}\sim N\left( 0,\sigma^2 \right) \\ + \end{aligned} +\]

+
+
+

Model height ~ sex

+
+
m2 <- lm(height ~ sex, data = trees)
+summary(m2)
+
+

+Call:
+lm(formula = height ~ sex, data = trees)
+
+Residuals:
+     Min       1Q   Median       3Q      Max 
+-22.6881  -6.7881  -0.0097   6.7261  22.3687 
+
+Coefficients:
+            Estimate Std. Error t value Pr(>|t|)    
+(Intercept)  36.9312     0.3981  92.778   <2e-16 ***
+sexmale      -0.8432     0.5607  -1.504    0.133    
+---
+Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+
+Residual standard error: 8.865 on 998 degrees of freedom
+Multiple R-squared:  0.002261,  Adjusted R-squared:  0.001261 
+F-statistic: 2.261 on 1 and 998 DF,  p-value: 0.133
+
+
+
+
+

Quiz

+

https://pollev.com/franciscorod726

+
+
+

Let’s read the model report…

+
+
report(m2)
+
+
We fitted a linear model (estimated using OLS) to predict height with sex
+(formula: height ~ sex). The model explains a statistically not significant and
+very weak proportion of variance (R2 = 2.26e-03, F(1, 998) = 2.26, p = 0.133,
+adj. R2 = 1.26e-03). The model's intercept, corresponding to sex = female, is
+at 36.93 (95% CI [36.15, 37.71], t(998) = 92.78, p < .001). Within this model:
+
+  - The effect of sex [male] is statistically non-significant and negative (beta
+= -0.84, 95% CI [-1.94, 0.26], t(998) = -1.50, p = 0.133; Std. beta = -0.10,
+95% CI [-0.22, 0.03])
+
+Standardized parameters were obtained by fitting the model on a standardized
+version of the dataset. 95% Confidence Intervals (CIs) and p-values were
+computed using a Wald t-distribution approximation.
+
+
+
+
+

Estimated distribution of the intercept parameter

+

Intercept = Height of females

+
+
parameters(m2)[1,]
+
+
Parameter   | Coefficient |   SE |         95% CI | t(998) |      p
+-------------------------------------------------------------------
+(Intercept) |       36.93 | 0.40 | [36.15, 37.71] |  92.78 | < .001
+
+
+

+Uncertainty intervals (equal-tailed) and p-values (two-tailed) computed
+  using a Wald t-distribution approximation.
+
+
+
+
plot(simulate_parameters(m2), show_intercept = TRUE)
+
+

+
+
+
+
+

Estimated distribution of the beta parameter

+

beta = height difference of males vs females

+
+
parameters(m2)[2,]
+
+
Parameter  | Coefficient |   SE |        95% CI | t(998) |     p
+----------------------------------------------------------------
+sex [male] |       -0.84 | 0.56 | [-1.94, 0.26] |  -1.50 | 0.133
+
+
+

+Uncertainty intervals (equal-tailed) and p-values (two-tailed) computed
+  using a Wald t-distribution approximation.
+
+
+
+
plot(simulate_parameters(m2))
+
+

+
+
+
+
+

Analysing differences among factor levels

+
+
estimate_means(m2)
+
+
We selected `at = c("sex")`.
+
+
+
Estimated Marginal Means
+
+sex    |  Mean |   SE |         95% CI
+--------------------------------------
+male   | 36.09 | 0.39 | [35.31, 36.86]
+female | 36.93 | 0.40 | [36.15, 37.71]
+
+Marginal means estimated at sex
+
+
+
+
estimate_contrasts(m2)
+
+
No variable was specified for contrast estimation. Selecting `contrast = "sex"`.
+
+
+
Marginal Contrasts Analysis
+
+Level1 | Level2 | Difference |        95% CI |   SE | t(998) |     p
+--------------------------------------------------------------------
+male   | female |      -0.84 | [-1.94, 0.26] | 0.56 |  -1.50 | 0.133
+
+Marginal contrasts estimated at sex
+p-value adjustment method: Holm (1979)
+
+
+
+
+

Visualising the fitted model

+
+
+

Plot (visreg)

+
+
visreg(m2)
+
+

+
+
+
+
+

Plot (easystats)

+
+
plot(estimate_means(m2))
+
+
We selected `at = c("sex")`.
+
+
+

+
+
+
+
+

Model checking

+
+
+

Model checking: residuals

+
+
hist(resid(m2))
+
+

+
+
+
+
def.par <- par(no.readonly = TRUE)
+layout(matrix(1:4, nrow=2))
+plot(m2)
+
+

+
+
par(def.par)
+
+
+
+

Model checking (easystats)

+
+
check_model(m2)
+
+

+
+
+
+
+
+

Q: Does height differ among field sites?

+
+

Quiz

+

https://pollev.com/franciscorod726

+
+
+

Plot data first

+
+
plot(height ~ site, data = trees)
+
+

+
+
+
+
+

Linear model with categorical predictors

+
+
m3 <- lm(height ~ site, data = trees)
+
+

\[ + \begin{aligned} + y_{i} = a + b_{site2} + c_{site3} + d_{site4} + e_{site5} +...+ \varepsilon _{i} \\ + \varepsilon _{i}\sim N\left( 0,\sigma^2 \right) \\ + \end{aligned} +\]

+
+
+

Model Height ~ site

+

All right here?

+
+
m3 <- lm(height ~ site, data = trees)
+summary(m3)
+
+

+Call:
+lm(formula = height ~ site, data = trees)
+
+Residuals:
+     Min       1Q   Median       3Q      Max 
+-22.4498  -6.7049   0.0709   6.7537  23.0640 
+
+Coefficients:
+            Estimate Std. Error t value Pr(>|t|)    
+(Intercept)  35.4636     0.4730  74.975  < 2e-16 ***
+site          0.3862     0.1413   2.733  0.00639 ** 
+---
+Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+
+Residual standard error: 8.842 on 998 degrees of freedom
+Multiple R-squared:  0.007429,  Adjusted R-squared:  0.006435 
+F-statistic:  7.47 on 1 and 998 DF,  p-value: 0.006385
+
+
+
+
+

site is a factor!

+
+
trees$site <- as.factor(trees$site)
+
+
+
+

Model Height ~ site

+
+
m3 <- lm(height ~ site, data = trees)
+summary(m3)
+
+

+Call:
+lm(formula = height ~ site, data = trees)
+
+Residuals:
+     Min       1Q   Median       3Q      Max 
+-20.4416  -6.9004   0.0379   6.3051  19.7584 
+
+Coefficients:
+            Estimate Std. Error t value Pr(>|t|)    
+(Intercept)  33.8416     0.4266  79.329  < 2e-16 ***
+site2         6.3411     0.7126   8.899  < 2e-16 ***
+site3         4.9991     0.9828   5.086 4.36e-07 ***
+site4         0.5329     0.9872   0.540  0.58949    
+site5         4.3723     0.9425   4.639 3.97e-06 ***
+site6         4.7601     1.1709   4.065 5.18e-05 ***
+site7        -0.7416     1.8506  -0.401  0.68871    
+site8        -0.6832     2.4753  -0.276  0.78258    
+site9         9.1709     3.0165   3.040  0.00243 ** 
+site10       -0.5816     3.8013  -0.153  0.87843    
+---
+Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+
+Residual standard error: 8.446 on 990 degrees of freedom
+Multiple R-squared:  0.1016,    Adjusted R-squared:  0.09344 
+F-statistic: 12.44 on 9 and 990 DF,  p-value: < 2.2e-16
+
+
+
+
+

Estimated parameter distributions

+
+
plot(simulate_parameters(m3))
+
+

+
+
+
+
+

Estimated tree heights for each site

+
+
estimate_means(m3)
+
+
We selected `at = c("site")`.
+
+
+
Estimated Marginal Means
+
+site |  Mean |   SE |         95% CI
+------------------------------------
+1    | 33.84 | 0.43 | [33.00, 34.68]
+2    | 40.18 | 0.57 | [39.06, 41.30]
+3    | 38.84 | 0.89 | [37.10, 40.58]
+4    | 34.37 | 0.89 | [32.63, 36.12]
+5    | 38.21 | 0.84 | [36.56, 39.86]
+6    | 38.60 | 1.09 | [36.46, 40.74]
+7    | 33.10 | 1.80 | [29.57, 36.63]
+8    | 33.16 | 2.44 | [28.37, 37.94]
+9    | 43.01 | 2.99 | [37.15, 48.87]
+10   | 33.26 | 3.78 | [25.85, 40.67]
+
+Marginal means estimated at site
+
+
+
+
+

Plot estimated tree heights for each site

+
+
plot(estimate_means(m3))
+
+
We selected `at = c("site")`.
+
+
+

+
+
+
+
+

Analysing differences among factor levels

+

For finer control see emmeans package

+
+
estimate_contrasts(m3)
+
+
No variable was specified for contrast estimation. Selecting `contrast = "site"`.
+
+
+
Marginal Contrasts Analysis
+
+Level1 | Level2 | Difference |          95% CI |   SE | t(990) |      p
+-----------------------------------------------------------------------
+site1  | site10 |       0.58 | [-11.85, 13.01] | 3.80 |   0.15 | > .999
+site1  |  site2 |      -6.34 | [ -8.67, -4.01] | 0.71 |  -8.90 | < .001
+site1  |  site3 |      -5.00 | [ -8.21, -1.78] | 0.98 |  -5.09 | < .001
+site1  |  site4 |      -0.53 | [ -3.76,  2.70] | 0.99 |  -0.54 | > .999
+site1  |  site5 |      -4.37 | [ -7.45, -1.29] | 0.94 |  -4.64 | < .001
+site1  |  site6 |      -4.76 | [ -8.59, -0.93] | 1.17 |  -4.07 | 0.002 
+site1  |  site7 |       0.74 | [ -5.31,  6.79] | 1.85 |   0.40 | > .999
+site1  |  site8 |       0.68 | [ -7.41,  8.78] | 2.48 |   0.28 | > .999
+site1  |  site9 |      -9.17 | [-19.04,  0.69] | 3.02 |  -3.04 | 0.090 
+site2  | site10 |       6.92 | [ -5.57, 19.42] | 3.82 |   1.81 | > .999
+site2  |  site3 |       1.34 | [ -2.10,  4.79] | 1.05 |   1.27 | > .999
+site2  |  site4 |       5.81 | [  2.35,  9.27] | 1.06 |   5.49 | < .001
+site2  |  site5 |       1.97 | [ -1.35,  5.29] | 1.02 |   1.94 | > .999
+site2  |  site6 |       1.58 | [ -2.44,  5.61] | 1.23 |   1.28 | > .999
+site2  |  site7 |       7.08 | [  0.90, 13.26] | 1.89 |   3.75 | 0.008 
+site2  |  site8 |       7.02 | [ -1.17, 15.21] | 2.50 |   2.81 | 0.169 
+site2  |  site9 |      -2.83 | [-12.77,  7.11] | 3.04 |  -0.93 | > .999
+site3  | site10 |       5.58 | [ -7.11, 18.27] | 3.88 |   1.44 | > .999
+site3  |  site4 |       4.47 | [  0.36,  8.57] | 1.26 |   3.56 | 0.015 
+site3  |  site5 |       0.63 | [ -3.37,  4.62] | 1.22 |   0.51 | > .999
+site3  |  site6 |       0.24 | [ -4.35,  4.83] | 1.40 |   0.17 | > .999
+site3  |  site7 |       5.74 | [ -0.82, 12.30] | 2.01 |   2.86 | 0.151 
+site3  |  site8 |       5.68 | [ -2.80, 14.17] | 2.59 |   2.19 | 0.804 
+site3  |  site9 |      -4.17 | [-14.36,  6.01] | 3.11 |  -1.34 | > .999
+site4  | site10 |       1.11 | [-11.58, 13.81] | 3.88 |   0.29 | > .999
+site4  |  site5 |      -3.84 | [ -7.84,  0.16] | 1.22 |  -3.14 | 0.067 
+site4  |  site6 |      -4.23 | [ -8.83,  0.38] | 1.41 |  -3.00 | 0.099 
+site4  |  site7 |       1.27 | [ -5.30,  7.84] | 2.01 |   0.63 | > .999
+site4  |  site8 |       1.22 | [ -7.27,  9.70] | 2.60 |   0.47 | > .999
+site4  |  site9 |      -8.64 | [-18.83,  1.55] | 3.12 |  -2.77 | 0.182 
+site5  | site10 |       4.95 | [ -7.70, 17.61] | 3.87 |   1.28 | > .999
+site5  |  site6 |      -0.39 | [ -4.89,  4.11] | 1.38 |  -0.28 | > .999
+site5  |  site7 |       5.11 | [ -1.39, 11.61] | 1.99 |   2.57 | 0.306 
+site5  |  site8 |       5.06 | [ -3.38, 13.49] | 2.58 |   1.96 | > .999
+site5  |  site9 |      -4.80 | [-14.94,  5.35] | 3.10 |  -1.55 | > .999
+site6  | site10 |       5.34 | [ -7.52, 18.20] | 3.93 |   1.36 | > .999
+site6  |  site7 |       5.50 | [ -1.38, 12.39] | 2.11 |   2.61 | 0.282 
+site6  |  site8 |       5.44 | [ -3.29, 14.18] | 2.67 |   2.04 | > .999
+site6  |  site9 |      -4.41 | [-14.81,  5.99] | 3.18 |  -1.39 | > .999
+site7  | site10 |      -0.16 | [-13.85, 13.53] | 4.18 |  -0.04 | > .999
+site7  |  site8 |      -0.06 | [ -9.97,  9.85] | 3.03 |  -0.02 | > .999
+site7  |  site9 |      -9.91 | [-21.32,  1.49] | 3.49 |  -2.84 | 0.155 
+site8  | site10 |      -0.10 | [-14.80, 14.60] | 4.50 |  -0.02 | > .999
+site8  |  site9 |      -9.85 | [-22.46,  2.75] | 3.86 |  -2.56 | 0.311 
+site9  | site10 |       9.75 | [ -5.99, 25.50] | 4.82 |   2.03 | > .999
+
+Marginal contrasts estimated at site
+p-value adjustment method: Holm (1979)
+
+
+
+
+

Analysing differences among factor levels

+

How different are site 2 and site 9?

+
+
library("marginaleffects")
+
+
+
hypotheses(m3, "site2 = site9")
+
+

+          Term Estimate Std. Error      z Pr(>|z|)   S 2.5 % 97.5 %
+ site2 = site9    -2.83       3.04 -0.931    0.352 1.5 -8.79   3.13
+
+Columns: term, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high 
+
+
+
+
+

Presenting model results

+
+
parameters(m3)
+
+
Parameter   | Coefficient |   SE |         95% CI | t(990) |      p
+-------------------------------------------------------------------
+(Intercept) |       33.84 | 0.43 | [33.00, 34.68] |  79.33 | < .001
+site [2]    |        6.34 | 0.71 | [ 4.94,  7.74] |   8.90 | < .001
+site [3]    |        5.00 | 0.98 | [ 3.07,  6.93] |   5.09 | < .001
+site [4]    |        0.53 | 0.99 | [-1.40,  2.47] |   0.54 | 0.589 
+site [5]    |        4.37 | 0.94 | [ 2.52,  6.22] |   4.64 | < .001
+site [6]    |        4.76 | 1.17 | [ 2.46,  7.06] |   4.07 | < .001
+site [7]    |       -0.74 | 1.85 | [-4.37,  2.89] |  -0.40 | 0.689 
+site [8]    |       -0.68 | 2.48 | [-5.54,  4.17] |  -0.28 | 0.783 
+site [9]    |        9.17 | 3.02 | [ 3.25, 15.09] |   3.04 | 0.002 
+site [10]   |       -0.58 | 3.80 | [-8.04,  6.88] |  -0.15 | 0.878 
+
+
+

+Uncertainty intervals (equal-tailed) and p-values (two-tailed) computed
+  using a Wald t-distribution approximation.
+
+
+
+
modelsummary(m3, estimate  = "{estimate} ({std.error})", statistic = NULL, 
+             fmt = 1, gof_map = NA, coef_rename = paste0("site", 1:10), output = "html")
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
 (1)
site133.8 (0.4)
site26.3 (0.7)
site35.0 (1.0)
site40.5 (1.0)
site54.4 (0.9)
site64.8 (1.2)
site7−0.7 (1.9)
site8−0.7 (2.5)
site99.2 (3.0)
site10−0.6 (3.8)
+ + +
+
+
+
+

Plot (visreg)

+
+
visreg(m3)
+
+

+
+
+
+
+

Plot (easystats)

+
+
plot(estimate_means(m3))
+
+
We selected `at = c("site")`.
+
+
+

+
+
+
+
+

Plot model (modelsummary)

+
+
modelplot(m3)
+
+

+
+
+
+
+

Plot model (easystats)

+
+
plot(parameters(m3), show_intercept = TRUE)
+
+

+
+
+
+
+

Fit model without intercept

+
+
m3bis <- lm(height ~ site - 1, data = trees)
+summary(m3bis)
+
+

+Call:
+lm(formula = height ~ site - 1, data = trees)
+
+Residuals:
+     Min       1Q   Median       3Q      Max 
+-20.4416  -6.9004   0.0379   6.3051  19.7584 
+
+Coefficients:
+       Estimate Std. Error t value Pr(>|t|)    
+site1   33.8416     0.4266  79.329   <2e-16 ***
+site2   40.1826     0.5707  70.404   <2e-16 ***
+site3   38.8407     0.8854  43.868   <2e-16 ***
+site4   34.3744     0.8903  38.610   <2e-16 ***
+site5   38.2139     0.8404  45.469   <2e-16 ***
+site6   38.6017     1.0904  35.401   <2e-16 ***
+site7   33.1000     1.8007  18.381   <2e-16 ***
+site8   33.1583     2.4382  13.599   <2e-16 ***
+site9   43.0125     2.9862  14.404   <2e-16 ***
+site10  33.2600     3.7773   8.805   <2e-16 ***
+---
+Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+
+Residual standard error: 8.446 on 990 degrees of freedom
+Multiple R-squared:   0.95, Adjusted R-squared:  0.9495 
+F-statistic:  1879 on 10 and 990 DF,  p-value: < 2.2e-16
+
+
+
+
plot(parameters(m3bis))
+
+

+
+
+
+
+

Model checking: residuals

+
+
def.par <- par(no.readonly = TRUE)
+layout(matrix(1:4, nrow = 2))
+plot(m3)
+
+

+
+
par(def.par)
+
+
+
+

Model checking: residuals

+
+
check_model(m3)
+
+

+
+
+
+
+
+
+

Combining continuous and categorical predictors

+
+

Predicting tree height based on dbh and site

+
+
lm(height ~ site + dbh, data = trees)
+
+

+Call:
+lm(formula = height ~ site + dbh, data = trees)
+
+Coefficients:
+(Intercept)        site2        site3        site4        site5        site6  
+    16.6990       6.5043       4.3575       1.9347       3.6374       4.2045  
+      site7        site8        site9       site10          dbh  
+    -0.1762      -5.3126       5.4370       2.2633       0.6171  
+
+
+

corresponds to

+

\[ + \begin{aligned} + y_{i} = a + b_{site2} + c_{site3} + d_{site4} + e_{site5} +...+ k \cdot DBH_{i} + \varepsilon _{i} \\ + \varepsilon _{i}\sim N\left( 0,\sigma^2 \right) \\ + \end{aligned} +\]

+
+
+

Predicting tree height based on dbh and site

+
+
m4 <- lm(height ~ site + dbh, data = trees)
+summary(m4)
+
+

+Call:
+lm(formula = height ~ site + dbh, data = trees)
+
+Residuals:
+     Min       1Q   Median       3Q      Max 
+-10.1130  -1.9885   0.0582   2.0314  11.3320 
+
+Coefficients:
+             Estimate Std. Error t value Pr(>|t|)    
+(Intercept) 16.699037   0.260565  64.088  < 2e-16 ***
+site2        6.504303   0.256730  25.335  < 2e-16 ***
+site3        4.357457   0.354181  12.303  < 2e-16 ***
+site4        1.934650   0.356102   5.433 6.98e-08 ***
+site5        3.637432   0.339688  10.708  < 2e-16 ***
+site6        4.204511   0.421906   9.966  < 2e-16 ***
+site7       -0.176193   0.666772  -0.264   0.7916    
+site8       -5.312648   0.893603  -5.945 3.82e-09 ***
+site9        5.437049   1.087766   4.998 6.84e-07 ***
+site10       2.263338   1.369986   1.652   0.0988 .  
+dbh          0.617075   0.007574  81.473  < 2e-16 ***
+---
+Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+
+Residual standard error: 3.043 on 989 degrees of freedom
+Multiple R-squared:  0.8835,    Adjusted R-squared:  0.8823 
+F-statistic:   750 on 10 and 989 DF,  p-value: < 2.2e-16
+
+
+
+
+

Presenting model results

+
+
parameters(m4)
+
+
Parameter   | Coefficient |       SE |         95% CI | t(989) |      p
+-----------------------------------------------------------------------
+(Intercept) |       16.70 |     0.26 | [16.19, 17.21] |  64.09 | < .001
+site [2]    |        6.50 |     0.26 | [ 6.00,  7.01] |  25.34 | < .001
+site [3]    |        4.36 |     0.35 | [ 3.66,  5.05] |  12.30 | < .001
+site [4]    |        1.93 |     0.36 | [ 1.24,  2.63] |   5.43 | < .001
+site [5]    |        3.64 |     0.34 | [ 2.97,  4.30] |  10.71 | < .001
+site [6]    |        4.20 |     0.42 | [ 3.38,  5.03] |   9.97 | < .001
+site [7]    |       -0.18 |     0.67 | [-1.48,  1.13] |  -0.26 | 0.792 
+site [8]    |       -5.31 |     0.89 | [-7.07, -3.56] |  -5.95 | < .001
+site [9]    |        5.44 |     1.09 | [ 3.30,  7.57] |   5.00 | < .001
+site [10]   |        2.26 |     1.37 | [-0.43,  4.95] |   1.65 | 0.099 
+dbh         |        0.62 | 7.57e-03 | [ 0.60,  0.63] |  81.47 | < .001
+
+
+

+Uncertainty intervals (equal-tailed) and p-values (two-tailed) computed
+  using a Wald t-distribution approximation.
+
+
+
+
+

Estimated tree heights for each site

+
+
estimate_means(m4)
+
+
We selected `at = c("site")`.
+
+
+
Estimated Marginal Means
+
+site |  Mean |   SE |         95% CI
+------------------------------------
+1    | 33.90 | 0.15 | [33.60, 34.21]
+2    | 40.41 | 0.21 | [40.01, 40.81]
+3    | 38.26 | 0.32 | [37.64, 38.89]
+4    | 35.84 | 0.32 | [35.21, 36.47]
+5    | 37.54 | 0.30 | [36.95, 38.14]
+6    | 38.11 | 0.39 | [37.34, 38.88]
+7    | 33.73 | 0.65 | [32.45, 35.00]
+8    | 28.59 | 0.88 | [26.86, 30.32]
+9    | 39.34 | 1.08 | [37.23, 41.45]
+10   | 36.17 | 1.36 | [33.50, 38.84]
+
+Marginal means estimated at site
+
+
+
+
+

Fit model without intercept

+
+
m4 <- lm(height ~ -1 + site + dbh, data = trees)
+summary(m4)
+
+

+Call:
+lm(formula = height ~ -1 + site + dbh, data = trees)
+
+Residuals:
+     Min       1Q   Median       3Q      Max 
+-10.1130  -1.9885   0.0582   2.0314  11.3320 
+
+Coefficients:
+        Estimate Std. Error t value Pr(>|t|)    
+site1  16.699037   0.260565   64.09   <2e-16 ***
+site2  23.203340   0.292773   79.25   <2e-16 ***
+site3  21.056494   0.386532   54.48   <2e-16 ***
+site4  18.633687   0.374456   49.76   <2e-16 ***
+site5  20.336469   0.373942   54.38   <2e-16 ***
+site6  20.903548   0.448913   46.56   <2e-16 ***
+site7  16.522844   0.679936   24.30   <2e-16 ***
+site8  11.386389   0.918198   12.40   <2e-16 ***
+site9  22.136086   1.105970   20.02   <2e-16 ***
+site10 18.962375   1.372158   13.82   <2e-16 ***
+dbh     0.617075   0.007574   81.47   <2e-16 ***
+---
+Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+
+Residual standard error: 3.043 on 989 degrees of freedom
+Multiple R-squared:  0.9935,    Adjusted R-squared:  0.9934 
+F-statistic: 1.377e+04 on 11 and 989 DF,  p-value: < 2.2e-16
+
+
+
+
+

Plot (visreg)

+
+
visreg(m4)
+
+

+
+
+

+
+
+
+
visreg(m4, xvar = "dbh", by = "site", overlay = TRUE, band = FALSE)
+
+

+
+
+
+
+

Plot model (easystats)

+
+
plot(parameters(m4))
+
+

+
+
+

Keeping sites only, dropping “dbh”

+
+
plot(parameters(m4, drop = "dbh"))
+
+

+
+
+
+
+

Plot model (modelsummary)

+
+
modelplot(m4)
+
+

+
+
+

Keeping sites only, dropping “dbh”

+
+
modelplot(m4, coef_omit = "dbh")
+
+

+
+
+
+
+

What happened to site 8?

+
+
visreg(m3)
+
+

+
+
+
+
visreg(m4, xvar = "site")
+
+

+
+
+

site 8 has the largest diameters:

+
+
boxplot(dbh ~ site, data = trees)
+
+

+
+
+

DBH

+
+
aggregate(trees$dbh ~ trees$site, FUN = mean)
+
+
   trees$site trees$dbh
+1           1  27.78033
+2           2  27.51580
+3           3  28.82011
+4           4  25.50867
+5           5  28.97119
+6           6  28.68067
+7           7  26.86409
+8           8  35.28250
+9           9  33.83125
+10         10  23.17000
+
+
+

HEIGHT

+
+
aggregate(trees$height ~ trees$site, FUN = mean)
+
+
   trees$site trees$height
+1           1     33.84158
+2           2     40.18265
+3           3     38.84066
+4           4     34.37444
+5           5     38.21386
+6           6     38.60167
+7           7     33.10000
+8           8     33.15833
+9           9     43.01250
+10         10     33.26000
+
+
+
+
+

We have fitted model w/ many intercepts and single slope

+
+
visreg(m4, xvar = "dbh", by = "site", overlay = TRUE, band = FALSE)
+
+

+
+
+
+
+

Slope is the same for all sites

+
+
parameters(m4, keep = "dbh")
+
+
Parameter | Coefficient |       SE |       95% CI | t(989) |      p
+-------------------------------------------------------------------
+dbh       |        0.62 | 7.57e-03 | [0.60, 0.63] |  81.47 | < .001
+
+
+

+Uncertainty intervals (equal-tailed) and p-values (two-tailed) computed
+  using a Wald t-distribution approximation.
+
+
+
+
+

Model checking: residuals

+
+
def.par <- par(no.readonly = TRUE)
+layout(matrix(1:4, nrow=2))
+plot(m4)
+
+

+
+
par(def.par)
+
+
+
+

Model checking with easystats

+
+
check_model(m4)
+
+

+
+
+
+
+

How good is this model? Calibration plot

+
+
trees$height.pred <- fitted(m4)
+plot(trees$height.pred, trees$height, 
+     xlab = "Tree height (predicted)", 
+     ylab = "Tree height (observed)", 
+     las = 1, xlim = c(10,60), ylim = c(10,60))
+abline(a = 0, b = 1)
+
+

+
+
+
+
+

How good is this model? Calibration plot (easystats)

+
+
pred <- estimate_expectation(m4)
+pred$obs <- trees$height
+plot(obs ~ Predicted, data = pred, xlim = c(15, 60), ylim = c(15, 60))
+abline(a = 0, b = 1)
+
+

+
+
+
+
+

Posterior predictive checking

+

Simulating response data from fitted model (yrep)

+

and comparing with observed response (y)

+
+
check_predictions(m4)
+
+

+
+
+
+
+

Predicting heights of new trees (easystats)

+
+

Using model for prediction

+

Expected height of 10-cm diameter tree in each site?

+
+
trees.10cm <- data.frame(site = as.factor(1:10),
+                        dbh = 10)
+trees.10cm
+
+
   site dbh
+1     1  10
+2     2  10
+3     3  10
+4     4  10
+5     5  10
+6     6  10
+7     7  10
+8     8  10
+9     9  10
+10   10  10
+
+
+
+
+

Using model for prediction

+

Expected height of 10-cm DBH trees at each site

+
+
pred <- estimate_expectation(m4, data = trees.10cm)
+pred
+
+
Model-based Expectation
+
+site |   dbh | Predicted |   SE |         95% CI
+------------------------------------------------
+1    | 10.00 |     22.87 | 0.20 | [22.47, 23.27]
+2    | 10.00 |     29.37 | 0.24 | [28.89, 29.85]
+3    | 10.00 |     27.23 | 0.35 | [26.54, 27.91]
+4    | 10.00 |     24.80 | 0.34 | [24.13, 25.47]
+5    | 10.00 |     26.51 | 0.34 | [25.85, 27.16]
+6    | 10.00 |     27.07 | 0.42 | [26.25, 27.89]
+7    | 10.00 |     22.69 | 0.66 | [21.40, 23.99]
+8    | 10.00 |     17.56 | 0.90 | [15.79, 19.32]
+9    | 10.00 |     28.31 | 1.09 | [26.17, 30.45]
+10   | 10.00 |     25.13 | 1.36 | [22.46, 27.81]
+
+Variable predicted: height
+
+
+
+
+

Using model for prediction

+

Prediction intervals (accounting for residual variance)

+
+
pred <- estimate_prediction(m4, data = trees.10cm)
+pred
+
+
Model-based Prediction
+
+site |   dbh | Predicted |   SE |         95% CI
+------------------------------------------------
+1    | 10.00 |     22.87 | 3.05 | [16.88, 28.85]
+2    | 10.00 |     29.37 | 3.05 | [23.38, 35.36]
+3    | 10.00 |     27.23 | 3.06 | [21.22, 33.24]
+4    | 10.00 |     24.80 | 3.06 | [18.80, 30.81]
+5    | 10.00 |     26.51 | 3.06 | [20.50, 32.51]
+6    | 10.00 |     27.07 | 3.07 | [21.05, 33.10]
+7    | 10.00 |     22.69 | 3.11 | [16.58, 28.80]
+8    | 10.00 |     17.56 | 3.17 | [11.33, 23.78]
+9    | 10.00 |     28.31 | 3.23 | [21.96, 34.65]
+10   | 10.00 |     25.13 | 3.33 | [18.59, 31.68]
+
+Variable predicted: height
+
+
+
+
+
+
+

Q: Does allometric relationship between Height and Diameter vary among sites?

+
+
df <- data.frame(dbh = seq(10, 50, by = 1), 
+                 height = seq(20, 60, by = 1))
+
+plot(height ~ dbh, data = df, type = "n")
+abline(a = 25, 0.6)
+abline(a = 40, b = 0.1, col = "steelblue")
+abline(a = 50, b = -0.3, col = "orangered")
+
+

+
+
+
+

Model with interactions

+
+
m5 <- lm(height ~ site*dbh, data = trees)
+summary(m5)
+
+

+Call:
+lm(formula = height ~ site * dbh, data = trees)
+
+Residuals:
+     Min       1Q   Median       3Q      Max 
+-10.1017  -1.9839   0.0645   2.0486  11.1789 
+
+Coefficients:
+             Estimate Std. Error t value Pr(>|t|)    
+(Intercept) 16.359437   0.360054  45.436  < 2e-16 ***
+site2        7.684781   0.609657  12.605  < 2e-16 ***
+site3        4.518568   0.867008   5.212 2.28e-07 ***
+site4        2.769336   0.813259   3.405 0.000688 ***
+site5        3.917607   0.870983   4.498 7.68e-06 ***
+site6        4.155161   1.009379   4.117 4.17e-05 ***
+site7       -2.306799   1.551303  -1.487 0.137334    
+site8       -2.616095   4.090671  -0.640 0.522630    
+site9        2.621560   5.073794   0.517 0.605492    
+site10       4.662340   2.991072   1.559 0.119378    
+dbh          0.629299   0.011722  53.685  < 2e-16 ***
+site2:dbh   -0.042784   0.020033  -2.136 0.032950 *  
+site3:dbh   -0.006031   0.027640  -0.218 0.827312    
+site4:dbh   -0.031633   0.028225  -1.121 0.262677    
+site5:dbh   -0.010173   0.027887  -0.365 0.715334    
+site6:dbh    0.001337   0.032109   0.042 0.966797    
+site7:dbh    0.079728   0.052056   1.532 0.125951    
+site8:dbh   -0.079027   0.113386  -0.697 0.485984    
+site9:dbh    0.081035   0.146649   0.553 0.580679    
+site10:dbh  -0.101107   0.114520  -0.883 0.377522    
+---
+Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+
+Residual standard error: 3.041 on 980 degrees of freedom
+Multiple R-squared:  0.8847,    Adjusted R-squared:  0.8825 
+F-statistic: 395.7 on 19 and 980 DF,  p-value: < 2.2e-16
+
+
+
+
+

Does slope vary among sites?

+
+
visreg(m5, xvar = "dbh", by = "site")
+
+

+
+
+
+
visreg(m5, xvar = "dbh", by = "site", overlay = TRUE, band = FALSE)
+
+

+
+
+

END

+
+
+ +
+ + +
+ + + + \ No newline at end of file diff --git a/docs/lm.qmd b/docs/lm.qmd new file mode 100644 index 0000000..841b60d --- /dev/null +++ b/docs/lm.qmd @@ -0,0 +1,1024 @@ +--- +title: "Linear models" +author: "Francisco Rodríguez-Sánchez" +engine: knitr +format: html +toc: true +filters: + - webr +webr: + packages: ['visreg', 'easystats', 'patchwork', 'emmeans', 'modelsummary', 'marginaleffects'] + autoload-packages: true + channel-type: "post-message" +--- + +# A simple linear model + + +### Example dataset: forest trees + +- Download [this dataset](https://raw.githubusercontent.com/Pakillo/LM-GLM-GLMM-intro/trees/data/trees.csv) + +```{r} +download.file("https://raw.githubusercontent.com/Pakillo/LM-GLM-GLMM-intro/trees/data/trees.csv", + destfile = "trees.csv", mode = "wb") +``` + +- Import: + +```{r} +trees <- read.csv("trees.csv") +head(trees) +``` + + +### Questions + +- What is the relationship between DBH and height? + +- Do taller trees have bigger trunks? + +- Can we predict height from DBH? How well? + + + +## Plot your data first! + +### Exploratory Data Analysis (EDA) + +Outliers + +```{r} +plot(trees$height) +``` + + +### Histogram of response variable + +```{r} +hist(trees$height) +``` + + +### Histogram of predictor variable + +```{r} +hist(trees$dbh) +``` + +### Scatterplot + +```{r} +plot(height ~ dbh, data = trees, las = 1) +``` + + + +## Model fitting + +### Now fit model + +Hint: `lm` + +```{r} +m1 <- lm(height ~ dbh, data = trees) +``` + +which corresponds to + +$$ + \begin{aligned} + Height_{i} = a + b \cdot DBH_{i} + \varepsilon _{i} \\ + \varepsilon _{i}\sim N\left( 0,\sigma^2 \right) \\ + \end{aligned} +$$ + + +### Package `equatiomatic` returns model structure + +```{r} +#| eval: false +library("equatiomatic") +m1 <- lm(height ~ dbh, data = trees) +equatiomatic::extract_eq(m1) +``` + +```{r} +#| eval: false +equatiomatic::extract_eq(m1, use_coefs = TRUE) +``` + + + +## Model interpretation + + +### What does this mean? + +```{r} +summary(m1) +``` + + +### Estimated distribution of the **intercept** parameter + +```{r} +library("easystats") +``` + +```{r} +parameters(m1)[1,] +``` + +```{r} +plot(simulate_parameters(m1), show_intercept = TRUE) +``` + + +### Estimated distribution of the **slope** parameter + +```{r} +parameters::parameters(m1)[2,] +``` + +```{r} +plot(simulate_parameters(m1)) +``` + +### Distribution of residuals + +```{r} +hist(residuals(m1)) +``` + + +### Degrees of freedom + +DF = n - p + +n = sample size + +p = number of estimated parameters + + + + +### R-squared + +Proportion of 'explained' variance + +$R^{2} = 1 - \frac{webr-residual Variation}{Total Variation}$ + + + +### Adjusted R-squared + +Accounts for model complexity + +(number of parameters) + +$R^2_{adj} = 1 - (1 - R^2) \frac{n - 1}{n - p - 1}$ + + +### Quiz + +https://pollev.com/franciscorod726 + + + +### Retrieving model coefficients + +```{r} +coef(m1) +``` + +### Confidence intervals for parameters + +```{r} +confint(m1) +``` + + +### Retrieving model parameters (easystats) + +```{r} +parameters(m1) +``` + + + + + +## Communicating results + +### Avoid dichotomania of statistical significance + +- "Never conclude there is **‘no difference’** or ‘no association’ just because **p > 0.05 or CI includes zero**" + +- Estimate and communicate **effect sizes and their uncertainty** + +- https://doi.org/10.1038/d41586-019-00857-9 + + +### Communicating results + +- We found a **significant relationship** between DBH and Height **(p<0.05)**. + +- We found a {*significant*} **positive** relationship between DBH and Height {*(p<0.05)*} **(b = 0.61, SE = 0.01)**. + +- (add p-value if you wish) + + +### Models that describe themselves (easystats) + +```{r} +report(m1) +``` + + + + + +### Generating table with model results: `modelsummary` + +```{r} +library("modelsummary") +``` + +```{r} +modelsummary(m1, output = "html") ## Word, PDF, PowerPoint, png... +``` + + + + +### Generating table with model results: `modelsummary` + +```{r} +modelsummary(m1, fmt = 2, + estimate = "{estimate} ({std.error})", + statistic = NULL, + gof_map = c("nobs", "r.squared", "rmse"), + output = "html") +``` + + + + +## Visualising fitted model + +### Plot model: `visreg` + +```{r} +library("visreg") +``` + +```{r} +visreg(m1) +``` + + +`visreg` can use ggplot2 too + +```{r} +#| eval: false +visreg(m1, gg = TRUE) + theme_bw() +``` + + + + +### Plot (easystats) + +```{r} +plot(estimate_expectation(m1)) +``` + + +### Plot (modelsummary) + +```{r} +modelplot(m1) +``` + + +### Plot model parameters with easystats (`see` package) + +```{r} +plot(parameters(m1), show_intercept = TRUE, show_labels = TRUE) +``` + + +### Plot parameters' estimated distribution + +```{r} +plot(simulate_parameters(m1)) +``` + + + +## Model checking + + +### Linear model assumptions + +- **Linearity** (transformations, GAM...) + +- **Residuals**: + - Independent + - Equal variance + - Normal + +- Negligible **measurement error** in predictors + + + + +### Are residuals normal? + +```{r} +hist(residuals(m1)) +``` + +SD = 4.09 + + +### Model checking: `plot(model)` + +```{r} +def.par <- par(no.readonly = TRUE) +layout(matrix(1:4, nrow=2)) +plot(m1) +par(def.par) +``` + + +### Model checking with `performance` (easystats) + +```{r} +check_model(m1) +``` + + + + + +### A dashboard to explore the full model + +```{r} +#| eval: false +model_dashboard(m1) +``` + + + + +## Making predictions with easystats + +### Estimate expected values + +```{r} +pred <- estimate_expectation(m1) +head(pred) +``` + + +### Expected values given DBH + +```{r} +plot(estimate_expectation(m1)) +``` + + + +### Calibration plot: observed vs predicted + +```{r} +pred$height.obs <- trees$height +plot(height.obs ~ Predicted, data = pred, xlim = c(15, 60), ylim = c(15, 60)) +abline(a = 0, b = 1) +``` + + +### Estimate prediction interval + +Accounting for residual variation! + +```{r} +pred <- estimate_prediction(m1) +head(pred) +``` + +### Confidence vs Prediction interval + +```{r} +plot(estimate_expectation(m1)) +``` + +```{r} +plot(estimate_prediction(m1)) +``` + + +### Make predictions for new data + +```{r} +estimate_expectation(m1, data = data.frame(dbh = 39)) +``` + +```{r} +estimate_prediction(m1, data = data.frame(dbh = 39)) +``` + + + + +## Workflow + +- **Visualise data** + +- **Understand fitted model** (`summary`) + +- **Visualise model** (`visreg`...) + +- **Check model** (`plot`, `check_model`, calibration plot...) + +- **Predict** (`predict`, `estimate_expectation`, `estimate_prediction`) + + + + + + +# Categorical predictors (factors) + + +## Q: Does tree height vary with sex? + +```{r} +boxplot(height ~ sex, data = trees) +``` + + + +### Model height ~ sex + +```{r} +m2 <- lm(height ~ sex, data = trees) +summary(m2) +``` + + + +### Linear model with categorical predictors + +```{r} +m2 <- lm(height ~ sex, data = trees) +``` + +corresponds to + +$$ + \begin{aligned} + Height_{i} = a + b_{male} + \varepsilon _{i} \\ + \varepsilon _{i}\sim N\left( 0,\sigma^2 \right) \\ + \end{aligned} +$$ + + + +### Model height ~ sex + +```{r} +m2 <- lm(height ~ sex, data = trees) +summary(m2) +``` + + +### Quiz + +https://pollev.com/franciscorod726 + + +### Let's read the model report... + +```{r} +report(m2) +``` + + + +### Estimated distribution of the **intercept** parameter + +**Intercept = Height of females** + +```{r} +parameters(m2)[1,] +``` + +```{r} +plot(simulate_parameters(m2), show_intercept = TRUE) +``` + + +### Estimated distribution of the *beta* parameter + +*beta* = **height difference** of males vs females + +```{r} +parameters(m2)[2,] +``` + +```{r} +plot(simulate_parameters(m2)) +``` + + + + + +### Analysing differences among factor levels + +```{r} +estimate_means(m2) +``` + +```{r} +estimate_contrasts(m2) +``` + + +### Visualising the fitted model + +### Plot (visreg) + +```{r} +visreg(m2) +``` + + +### Plot (easystats) + +```{r} +plot(estimate_means(m2)) +``` + + + +### Model checking + +### Model checking: residuals + +```{r} +hist(resid(m2)) +``` + +```{r} +def.par <- par(no.readonly = TRUE) +layout(matrix(1:4, nrow=2)) +plot(m2) +par(def.par) +``` + + +### Model checking (easystats) + +```{r} +check_model(m2) +``` + + + + +## Q: Does height differ among field sites? + +### Quiz + +https://pollev.com/franciscorod726 + +### Plot data first + +```{r} +plot(height ~ site, data = trees) +``` + + +### Linear model with categorical predictors + +```{r} +m3 <- lm(height ~ site, data = trees) +``` + + +$$ + \begin{aligned} + y_{i} = a + b_{site2} + c_{site3} + d_{site4} + e_{site5} +...+ \varepsilon _{i} \\ + \varepsilon _{i}\sim N\left( 0,\sigma^2 \right) \\ + \end{aligned} +$$ + + + + + +### Model Height ~ site + +**All right here?** + +```{r} +m3 <- lm(height ~ site, data = trees) +summary(m3) +``` + + +### site is a factor! + +```{r} +trees$site <- as.factor(trees$site) +``` + + +### Model Height ~ site + +```{r} +m3 <- lm(height ~ site, data = trees) +summary(m3) +``` + + +### Estimated parameter distributions + +```{r} +plot(simulate_parameters(m3)) +``` + + + + +### Estimated tree heights for each site + +```{r} +estimate_means(m3) +``` + + +### Plot estimated tree heights for each site + +```{r} +plot(estimate_means(m3)) +``` + +### Analysing differences among factor levels + +For finer control see `emmeans` package + +```{r} +estimate_contrasts(m3) +``` + + +### Analysing differences among factor levels + +How different are site 2 and site 9? + +```{r} +library("marginaleffects") +``` + +```{r} +hypotheses(m3, "site2 = site9") +``` + + + +### Presenting model results + +```{r} +parameters(m3) +``` + + +```{r} +modelsummary(m3, estimate = "{estimate} ({std.error})", statistic = NULL, + fmt = 1, gof_map = NA, coef_rename = paste0("site", 1:10), output = "html") +``` + + + +### Plot (visreg) + +```{r} +visreg(m3) +``` + +### Plot (easystats) + +```{r} +plot(estimate_means(m3)) +``` + + + +### Plot model (modelsummary) + +```{r} +modelplot(m3) +``` + + +### Plot model (easystats) + +```{r} +plot(parameters(m3), show_intercept = TRUE) +``` + + +### Fit model without intercept + +```{r} +m3bis <- lm(height ~ site - 1, data = trees) +summary(m3bis) +``` + +```{r} +plot(parameters(m3bis)) +``` + +### Model checking: residuals + +```{r} +def.par <- par(no.readonly = TRUE) +layout(matrix(1:4, nrow = 2)) +plot(m3) +par(def.par) +``` + + +### Model checking: residuals + +```{r} +check_model(m3) +``` + + + + + +# Combining continuous and categorical predictors + + +### Predicting tree height based on dbh and site + +```{r} +lm(height ~ site + dbh, data = trees) +``` + +corresponds to + +$$ + \begin{aligned} + y_{i} = a + b_{site2} + c_{site3} + d_{site4} + e_{site5} +...+ k \cdot DBH_{i} + \varepsilon _{i} \\ + \varepsilon _{i}\sim N\left( 0,\sigma^2 \right) \\ + \end{aligned} +$$ + + +### Predicting tree height based on dbh and site + +```{r} +m4 <- lm(height ~ site + dbh, data = trees) +summary(m4) +``` + + +### Presenting model results + +```{r} +parameters(m4) +``` + + + +### Estimated tree heights for each site + +```{r} +estimate_means(m4) +``` + + +### Fit model without intercept + +```{r} +m4 <- lm(height ~ -1 + site + dbh, data = trees) +summary(m4) +``` + + +### Plot (visreg) + +```{r} +visreg(m4) +``` + +```{r} +visreg(m4, xvar = "dbh", by = "site", overlay = TRUE, band = FALSE) +``` + + + +### Plot model (easystats) + +```{r} +plot(parameters(m4)) +``` + +Keeping sites only, dropping "dbh" + +```{r} +plot(parameters(m4, drop = "dbh")) +``` + + +### Plot model (modelsummary) + +```{r} +modelplot(m4) +``` + +Keeping sites only, dropping "dbh" + +```{r} +modelplot(m4, coef_omit = "dbh") +``` + + +### What happened to site 8? + +```{r} +visreg(m3) +``` + +```{r} +visreg(m4, xvar = "site") +``` + +site 8 has the largest diameters: + +```{r} +boxplot(dbh ~ site, data = trees) +``` + +**DBH** + +```{r} +aggregate(trees$dbh ~ trees$site, FUN = mean) +``` + +**HEIGHT** + +```{r} +aggregate(trees$height ~ trees$site, FUN = mean) +``` + + + + +### We have fitted model w/ many intercepts and single slope + +```{r} +visreg(m4, xvar = "dbh", by = "site", overlay = TRUE, band = FALSE) +``` + + + + +### Slope is the same for all sites + +```{r} +parameters(m4, keep = "dbh") +``` + + + +### Model checking: residuals + +```{r} +def.par <- par(no.readonly = TRUE) +layout(matrix(1:4, nrow=2)) +plot(m4) +par(def.par) +``` + + +### Model checking with easystats + +```{r} +check_model(m4) +``` + + + +### How good is this model? Calibration plot + +```{r} +trees$height.pred <- fitted(m4) +plot(trees$height.pred, trees$height, + xlab = "Tree height (predicted)", + ylab = "Tree height (observed)", + las = 1, xlim = c(10,60), ylim = c(10,60)) +abline(a = 0, b = 1) +``` + + +### How good is this model? Calibration plot (easystats) + +```{r} +pred <- estimate_expectation(m4) +pred$obs <- trees$height +plot(obs ~ Predicted, data = pred, xlim = c(15, 60), ylim = c(15, 60)) +abline(a = 0, b = 1) +``` + + +### *Posterior* predictive checking + +Simulating response data from fitted model (`yrep`) + +and comparing with observed response (`y`) + +```{r} +check_predictions(m4) +``` + + + + +## Predicting heights of new trees (easystats) + +### Using model for prediction + +Expected height of 10-cm diameter tree in each site? + +```{r} +trees.10cm <- data.frame(site = as.factor(1:10), + dbh = 10) +trees.10cm +``` + + +### Using model for prediction + +Expected height of 10-cm DBH trees at each site + +```{r} +pred <- estimate_expectation(m4, data = trees.10cm) +pred +``` + +### Using model for prediction + +Prediction intervals (accounting for residual variance) + +```{r} +pred <- estimate_prediction(m4, data = trees.10cm) +pred +``` + + + +# Q: Does allometric relationship between Height and Diameter vary among sites? + +```{r} +df <- data.frame(dbh = seq(10, 50, by = 1), + height = seq(20, 60, by = 1)) + +plot(height ~ dbh, data = df, type = "n") +abline(a = 25, 0.6) +abline(a = 40, b = 0.1, col = "steelblue") +abline(a = 50, b = -0.3, col = "orangered") +``` + + + +### Model with interactions + +```{r} +m5 <- lm(height ~ site*dbh, data = trees) +summary(m5) +``` + + + +### Does slope vary among sites? + +```{r} +visreg(m5, xvar = "dbh", by = "site") +``` + +```{r} +visreg(m5, xvar = "dbh", by = "site", overlay = TRUE, band = FALSE) +``` + + +END + diff --git a/docs/lm_webR.html b/docs/lm_webR.html index bc2fa54..dcf4622 100644 --- a/docs/lm_webR.html +++ b/docs/lm_webR.html @@ -21,6 +21,40 @@ margin: 0 0.8em 0.2em -1em; /* quarto-specific, see https://github.com/quarto-dev/quarto-cli/issues/4556 */ vertical-align: middle; } +/* CSS for syntax highlighting */ +pre > code.sourceCode { white-space: pre; position: relative; } +pre > code.sourceCode > span { display: inline-block; line-height: 1.25; } +pre > code.sourceCode > span:empty { height: 1.2em; } +.sourceCode { overflow: visible; } +code.sourceCode > span { color: inherit; text-decoration: inherit; } +div.sourceCode { margin: 1em 0; } +pre.sourceCode { margin: 0; } +@media screen { +div.sourceCode { overflow: auto; } +} +@media print { +pre > code.sourceCode { white-space: pre-wrap; } +pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; } +} +pre.numberSource code + { counter-reset: source-line 0; } +pre.numberSource code > span + { position: relative; left: -4em; counter-increment: source-line; } +pre.numberSource code > span > a:first-child::before + { content: counter(source-line); + position: relative; left: -1em; text-align: right; vertical-align: baseline; + border: none; display: inline-block; + -webkit-touch-callout: none; -webkit-user-select: none; + -khtml-user-select: none; -moz-user-select: none; + -ms-user-select: none; user-select: none; + padding: 0 4px; width: 4em; + } +pre.numberSource { margin-left: 3em; padding-left: 4px; } +div.sourceCode + { } +@media screen { +pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; } +} @@ -149,10 +183,10 @@ const initializeWebRTimerStart = performance.now(); // Determine if we need to install R packages - var installRPackagesList = ['visreg']; + var installRPackagesList = ['visreg', 'easystats', 'patchwork', 'emmeans', 'modelsummary', 'marginaleffects']; // Check to see if we have an empty array, if we do set to skip the installation. var setupRPackages = !(installRPackagesList.indexOf("") !== -1); - var autoloadRPackages = false; + var autoloadRPackages = true; // Display a startup message? var showStartupMessage = true; @@ -330,13 +364,159 @@ + + - +
- +
- -
-

Questions

    -
  • What is the relationship between DBH and height?
  • +
  • Import:
-
    -
  • Do taller trees have bigger trunks?
  • -
-
    -
  • Can we predict height from DBH? How well?
  • -
-
-
-

Always plot your data first!

-
-

Always plot your data first!

@@ -697,8 +864,8 @@

Always plot let editor; require(['vs/editor/editor.main'], function () { editor = monaco.editor.create(editorDiv, { - value: `library("knitr") -include_graphics("images/anscombe.png")`, + value: `trees <- read.csv("trees.csv") +head(trees)`, language: 'r', theme: 'vs-light', automaticLayout: true, // TODO: Could be problematic for slide decks @@ -954,10 +1121,29117 @@

Always plot };

-
-

Exploratory Data Analysis (EDA)

+
+

Questions

+
    +
  • What is the relationship between DBH and height?

  • +
  • Do taller trees have bigger trunks?

  • +
  • Can we predict height from DBH? How well?

  • +
+
+
+

Plot your data first!

+
+

Exploratory Data Analysis (EDA)

Outliers

-
plot(trees$height)
+
+ +
+
+
+

+    
+
+
+
+
+ +
+
+

Histogram of response variable

+
+ +
+
+
+

+    
+
+
+
+
+ +
+
+

Histogram of predictor variable

+
+ +
+
+
+

+    
+
+
+
+
+ +
+
+

Scatterplot

+
+ +
+
+
+

+    
+
+
+
+
+ +
+
+
+

Model fitting

+
+

Now fit model

+

Hint: lm

+
+ +
+
+
+

+    
+
+
+
+
+ +

which corresponds to

+

\[ + \begin{aligned} + Height_{i} = a + b \cdot DBH_{i} + \varepsilon _{i} \\ + \varepsilon _{i}\sim N\left( 0,\sigma^2 \right) \\ + \end{aligned} +\]

+
+
+

Package equatiomatic returns model structure

+
+
library("equatiomatic")
+m1 <- lm(height ~ dbh, data = trees)
+equatiomatic::extract_eq(m1)
+
+
+
equatiomatic::extract_eq(m1, use_coefs = TRUE)
+
+
+
+
+

Model interpretation

+
+

What does this mean?

+
+ +
+
+
+

+    
+
+
+
+
+ +
+
+

Estimated distribution of the intercept parameter

+
+ +
+
+
+

+    
+
+
+
+
+ +
+ +
+
+
+

+    
+
+
+
+
+ +
+ +
+
+
+

+    
+
+
+
+
+ +
+
+

Estimated distribution of the slope parameter

+
+ +
+
+
+

+    
+
+
+
+
+ +
+ +
+
+
+

+    
+
+
+
+
+ +
+
+

Distribution of residuals

+
+ +
+
+
+

+    
+
+
+
+
+ +
+
+

Degrees of freedom

+

DF = n - p

+

n = sample size

+

p = number of estimated parameters

+
+
+

R-squared

+

Proportion of ‘explained’ variance

+

\(R^{2} = 1 - \frac{webr-residual Variation}{Total Variation}\)

+
+
+

Adjusted R-squared

+

Accounts for model complexity

+

(number of parameters)

+

\(R^2_{adj} = 1 - (1 - R^2) \frac{n - 1}{n - p - 1}\)

+
+
+

Quiz

+

https://pollev.com/franciscorod726

+
+
+

Retrieving model coefficients

+
+ +
+
+
+

+    
+
+
+
+
+ +
+
+

Confidence intervals for parameters

+
+ +
+
+
+

+    
+
+
+
+
+ +
+
+

Retrieving model parameters (easystats)

+
+ +
+
+
+

+    
+
+
+
+
+ +

https://easystats.github.io/parameters/

+
+
+
+

Communicating results

+
+

Avoid dichotomania of statistical significance

+
    +
  • “Never conclude there is ‘no difference’ or ‘no association’ just because p > 0.05 or CI includes zero

  • +
  • Estimate and communicate effect sizes and their uncertainty

  • +
  • https://doi.org/10.1038/d41586-019-00857-9

  • +
+
+
+

Communicating results

+
    +
  • We found a significant relationship between DBH and Height (p<0.05).

  • +
  • We found a {significant} positive relationship between DBH and Height {(p<0.05)} (b = 0.61, SE = 0.01).

  • +
  • (add p-value if you wish)

  • +
+
+
+

Models that describe themselves (easystats)

+
+ +
+
+
+

+    
+
+
+
+
+ +

https://easystats.github.io/report/

+
+
+

Generating table with model results: modelsummary

+
+ +
+
+
+

+    
+
+
+
+
+ +
+ +
+
+
+

+    
+
+
+
+
+ +

https://modelsummary.com/

+
+
+

Generating table with model results: modelsummary

+
+ +
+
+
+

+    
+
+
+
+
+ +
+
+
+

Visualising fitted model

+
+

Plot model: visreg

+
+ +
+
+
+

+    
+
+
+
+
+ +
+ +
+
+
+

+    
+
+
+
+
+ +

visreg can use ggplot2 too

+
+
visreg(m1, gg = TRUE) + theme_bw()
+
+

https://pbreheny.github.io/visreg

+
+
+

Plot (easystats)

+
+ +
+
+
+

+    
+
+
+
+
+ +
+
+

Plot (modelsummary)

+
+ +
+
+
+

+    
+
+
+
+
+ +
+
+

Plot model parameters with easystats (see package)

+
+ +
+
+
+

+    
+
+
+
+
+ +
+
+

Plot parameters’ estimated distribution

+
+ +
+
+
+

+    
+
+
+
+
+ +
+
+
+

Model checking

+
+

Linear model assumptions

+
    +
  • Linearity (transformations, GAM…)

  • +
  • Residuals:

    +
      +
    • Independent
    • +
    • Equal variance
    • +
    • Normal
    • +
  • +
  • Negligible measurement error in predictors

  • +
+
+
+

Are residuals normal?

+
+ +
+
+
+

+    
+
+
+
+
+ +

SD = 4.09

+
+
+

Model checking: plot(model)

+
+ +
+
+
+

+    
+
+
+
+
+ +
+
+

Model checking with performance (easystats)

+
+ +
+
+
+

+    
+
+
+
+
+ +

https://easystats.github.io/performance/articles/check_model.html

+
+
+

A dashboard to explore the full model

+
+
model_dashboard(m1)
+
+
+
+
+

Making predictions with easystats

+
+

Estimate expected values

+
+ +
+
+
+

+    
+
+
+
+
+ +
+
+

Expected values given DBH

+
+ +
+
+
+

+    
+
+
+
+
+ +
+
+

Calibration plot: observed vs predicted

+
+ +
+
+
+

+    
+
+
+
+
+ +
+
+

Estimate prediction interval

+

Accounting for residual variation!

+
+ +
+
+
+

+    
+
+
+
+
+ +
+
+

Confidence vs Prediction interval

+
+ +
+
+
+

+    
+
+
+
+
+ +
+ +
+
+
+

+    
+
+
+
+
+ +
+
+

Make predictions for new data

+
+ +
+
+
+

+    
+
+
+
+
+ +
+ +
+
+
+

+    
+
+
+
+
+ +
+
+
+

Workflow

+
    +
  • Visualise data

  • +
  • Understand fitted model (summary)

  • +
  • Visualise model (visreg…)

  • +
  • Check model (plot, check_model, calibration plot…)

  • +
  • Predict (predict, estimate_expectation, estimate_prediction)

  • +
+
+
+
+

Categorical predictors (factors)

+
+

Q: Does tree height vary with sex?

+
+ +
+
+
+

+    
+
+
+
+
+ +
+

Model height ~ sex

+
+ +
+
+
+

+    
+
+
+
+
+ +
+
+

Linear model with categorical predictors

+
+ +
+
+
+

+    
+
+
+
+
+ +

corresponds to

+

\[ + \begin{aligned} + Height_{i} = a + b_{male} + \varepsilon _{i} \\ + \varepsilon _{i}\sim N\left( 0,\sigma^2 \right) \\ + \end{aligned} +\]

+
+
+

Model height ~ sex

+
+ +
+
+
+

+    
+
+
+
+
+ +
+
+

Quiz

+

https://pollev.com/franciscorod726

+
+
+

Let’s read the model report…

+
+ +
+
+
+

+    
+
+
+
+
+ +
+
+

Estimated distribution of the intercept parameter

+

Intercept = Height of females

+
+ +
+
+
+

+    
+
+
+
+
+ +
+ +
+
+
+

+    
+
+
+
+
+ +
+
+

Estimated distribution of the beta parameter

+

beta = height difference of males vs females

+
+ +
+
+
+

+    
+
+
+
+
+ +
+ +
+
+
+

+    
+
+
+
+
+ +
+
+

Analysing differences among factor levels

+
+ +
+
+
+

+    
+
+
+
+
+ +
+ +
+
+
+

+    
+
+
+
+
+ +
+
+

Visualising the fitted model

+
+
+

Plot (visreg)

+
+ +
+
+
+

+    
+
+
+
+
+ +
+
+

Plot (easystats)

+
+ +
+
+
+

+    
+
+
+
+
+ +
+
+

Model checking

+
+
+

Model checking: residuals

+
+ +
+
+
+

+    
+
+
+
+
+ +
+ +
+
+
+

+    
+
+
+
+
+ +
+
+

Model checking (easystats)

+
+ +
+
+
+

+    
+
+
+
+
+ +
+
+
+

Q: Does height differ among field sites?

+
+

Quiz

+

https://pollev.com/franciscorod726

+
+
+

Plot data first

+
+ +
+
+
+

+    
+
+
+
+
+ +
+
+

Linear model with categorical predictors

+
+ +
+
+
+

+    
+
+
+
+
+ +

\[ + \begin{aligned} + y_{i} = a + b_{site2} + c_{site3} + d_{site4} + e_{site5} +...+ \varepsilon _{i} \\ + \varepsilon _{i}\sim N\left( 0,\sigma^2 \right) \\ + \end{aligned} +\]

+
+
+

Model Height ~ site

+

All right here?

+
+ +
+
+
+

+    
+
+
+
+
+ +
+
+

site is a factor!

+
+ +
+
+
+

+    
+
+
+
+
+ +
+
+

Model Height ~ site

+
+ +
+
+
+

+    
+
+
+
+
+ +
+
+

Estimated parameter distributions

+
+ +
+
+
+

+    
+
+
+
+
+ +
+
+

Estimated tree heights for each site

+
+ +
+
+
+

+    
+
+
+
+
+ +
+
+

Plot estimated tree heights for each site

+
+ +
+
+
+

+    
+
+
+
+
+ +
+
+

Analysing differences among factor levels

+

For finer control see emmeans package

+
+ +
+
+
+

+    
+
+
+
+
+ +
+
+

Analysing differences among factor levels

+

How different are site 2 and site 9?

+
+ +
+
+
+

+    
+
+
+
+
+ +
+ +
+
+
+

+    
+
+
+
+
+ +
+
+

Presenting model results

+
+ +
+
+
+

+    
+
+
+
+
+ +
+ +
+
+
+

+    
+
+
+
+
+ +
+
+

Plot (visreg)

+
+ +
+
+
+

+    
+
+
+
+
+ +
+
+

Plot (easystats)

+
+ +
+
+
+

+    
+
+
+
+
+ +
+
+

Plot model (modelsummary)

+
+ +
+
+
+

+    
+
+
+
+
+ +
+
+

Plot model (easystats)

+
+ +
+
+
+

+    
+
+
+
+
+ +
+
+

Fit model without intercept

+
+ +
+
+
+

+    
+
+
+
+
+ +
+ +
+
+
+

+    
+
+
+
+
+ +
+
+

Model checking: residuals

+
+ +
+
+
+

+    
+
+
+
+
+ +
+
+

Model checking: residuals

+
+ +
+
+
+

+    
+
+
+
+
+ +
+
+
+
+

Combining continuous and categorical predictors

+
+

Predicting tree height based on dbh and site

+
+ +
+
+
+

+    
+
+
+
+
+ +

corresponds to

+

\[ + \begin{aligned} + y_{i} = a + b_{site2} + c_{site3} + d_{site4} + e_{site5} +...+ k \cdot DBH_{i} + \varepsilon _{i} \\ + \varepsilon _{i}\sim N\left( 0,\sigma^2 \right) \\ + \end{aligned} +\]

+
+
+

Predicting tree height based on dbh and site

+
+ +
+
+
+

+    
+
+
+
+
+ +
+
+

Presenting model results

+
+ +
+
+
+

+    
+
+
+
+
+ +
+
+

Estimated tree heights for each site

+
+ +
+
+
+

+    
+
+
+
+
+ +
+
+

Fit model without intercept

+
+ +
+
+
+

+    
+
+
+
+
+ +
+
+

Plot (visreg)

+
+ +
+
+
+

+    
+
+
+
+
+ +
+ +
+
+
+

+    
+
+
+
+
+ +
+
+

Plot model (easystats)

+
+ +
+
+
+

+    
+
+
+
+
+ +

Keeping sites only, dropping “dbh”

+
+ +
+
+
+

+    
+
+
+
+
+ +
+
+

Plot model (modelsummary)

+
+ +
+
+
+

+    
+
+
+
+
+ +

Keeping sites only, dropping “dbh”

+
+ +
+
+
+

+    
+
+
+
+
+ +
+
+

What happened to site 8?

+
+ +
+
+
+

+    
+
+
+
+
+ +
+ +
+
+
+

+    
+
+
+
+
+ +

site 8 has the largest diameters:

+
+ +
+
+
+

+    
+
+
+
+
+ +

DBH

+
+ +
+
+
+

+    
+
+
+
+
+ +

HEIGHT

+
+ +
+
+
+

+    
+
+
+
+
+ +
+
+

We have fitted model w/ many intercepts and single slope

+
+ +
+
+
+

+    
+
+
+
+
+ +
+
+

Slope is the same for all sites

+
+ +
+
+
+

+    
+
+
+
+
+ +
+
+

Model checking: residuals

+
+ +
+
+
+

+    
+
+
+
+
+ +
+
+

Model checking with easystats

+
+ +
+
+
+

+    
+
+
+
+
+ +
+
+

How good is this model? Calibration plot

+
+ +
+
+
+

+    
+
+
+
+
+ +
+
+

How good is this model? Calibration plot (easystats)

+
+ +
+
+
+

+    
+
+
+
+
+ +
+
+

Posterior predictive checking

+

Simulating response data from fitted model (yrep)

+

and comparing with observed response (y)

+
+ +
+
+
+

+    
+
+
+
+
+ +
+
+

Predicting heights of new trees (easystats)

+
+

Using model for prediction

+

Expected height of 10-cm diameter tree in each site?

+
+ +
+
+
+

+    
+
+
+
+
+ +
+
+

Using model for prediction

+

Expected height of 10-cm DBH trees at each site

+
+ +
+
+
+

+    
+
+
+
+
+ +
+
+

Using model for prediction

+

Prediction intervals (accounting for residual variance)

+
+ +
+
+
+

+    
+
+
+
+
+ +
+
+
+
+

Q: Does allometric relationship between Height and Diameter vary among sites?

+
+ +
+
+
+

+    
+
+
+
+
+ +
+

Model with interactions

+
+ +
+
+
+

+    
+
+
+
+
+ +
+
+

Does slope vary among sites?

+
+ +
+
+
+

+    
+
+
+
+
+ +
+ +
+
+
+

+    
+
+
+
+
+ +

END

diff --git a/docs/lm_webR.qmd b/docs/lm_webR.qmd index 838d6ef..7144cc0 100644 --- a/docs/lm_webR.qmd +++ b/docs/lm_webR.qmd @@ -3,59 +3,1022 @@ title: "Linear models" author: "Francisco Rodríguez-Sánchez" engine: knitr format: html +toc: true filters: - webr webr: - packages: ['visreg'] - autoload-packages: false + packages: ['visreg', 'easystats', 'patchwork', 'emmeans', 'modelsummary', 'marginaleffects'] + autoload-packages: true channel-type: "post-message" --- -## Example dataset: forest trees +# A simple linear model -- Download [this dataset](https://raw.githubusercontent.com/Pakillo/LM-GLM-GLMM-intro/trees/data/trees.csv) (or the entire [zip file](https://github.com/Pakillo/LM-GLM-GLMM-intro/raw/trees/datasets.zip)) + +### Example dataset: forest trees + +- Download [this dataset](https://raw.githubusercontent.com/Pakillo/LM-GLM-GLMM-intro/trees/data/trees.csv) + +```{webr-r} +download.file("https://raw.githubusercontent.com/Pakillo/LM-GLM-GLMM-intro/trees/data/trees.csv", + destfile = "trees.csv", mode = "wb") +``` - Import: ```{webr-r} -trees <- read.csv("data/trees.csv") +trees <- read.csv("trees.csv") head(trees) ``` -## Questions - -\Large +### Questions - What is the relationship between DBH and height? -\vspace{5mm} - - Do taller trees have bigger trunks? -\vspace{5mm} - - Can we predict height from DBH? How well? -# Always plot your data first! +## Plot your data first! +### Exploratory Data Analysis (EDA) -## Always plot your data first! +Outliers ```{webr-r} -#| echo: false -library("knitr") -include_graphics("images/anscombe.png") +plot(trees$height) ``` +### Histogram of response variable + +```{webr-r} +hist(trees$height) +``` -## Exploratory Data Analysis (EDA) -Outliers +### Histogram of predictor variable -```{webr-r } -plot(trees$height) +```{webr-r} +hist(trees$dbh) +``` + +### Scatterplot + +```{webr-r} +plot(height ~ dbh, data = trees, las = 1) +``` + + + +## Model fitting + +### Now fit model + +Hint: `lm` + +```{webr-r} +m1 <- lm(height ~ dbh, data = trees) +``` + +which corresponds to + +$$ + \begin{aligned} + Height_{i} = a + b \cdot DBH_{i} + \varepsilon _{i} \\ + \varepsilon _{i}\sim N\left( 0,\sigma^2 \right) \\ + \end{aligned} +$$ + + +### Package `equatiomatic` returns model structure + +```{r} +#| eval: false +library("equatiomatic") +m1 <- lm(height ~ dbh, data = trees) +equatiomatic::extract_eq(m1) +``` + +```{r} +#| eval: false +equatiomatic::extract_eq(m1, use_coefs = TRUE) +``` + + + +## Model interpretation + + +### What does this mean? + +```{webr-r} +summary(m1) +``` + + +### Estimated distribution of the **intercept** parameter + +```{webr-r} +library("easystats") +``` + +```{webr-r} +parameters(m1)[1,] +``` + +```{webr-r} +plot(simulate_parameters(m1), show_intercept = TRUE) +``` + + +### Estimated distribution of the **slope** parameter + +```{webr-r} +parameters::parameters(m1)[2,] +``` + +```{webr-r} +plot(simulate_parameters(m1)) +``` + +### Distribution of residuals + +```{webr-r} +hist(residuals(m1)) +``` + + +### Degrees of freedom + +DF = n - p + +n = sample size + +p = number of estimated parameters + + + + +### R-squared + +Proportion of 'explained' variance + +$R^{2} = 1 - \frac{webr-residual Variation}{Total Variation}$ + + + +### Adjusted R-squared + +Accounts for model complexity + +(number of parameters) + +$R^2_{adj} = 1 - (1 - R^2) \frac{n - 1}{n - p - 1}$ + + +### Quiz + +https://pollev.com/franciscorod726 + + + +### Retrieving model coefficients + +```{webr-r} +coef(m1) +``` + +### Confidence intervals for parameters + +```{webr-r} +confint(m1) +``` + + +### Retrieving model parameters (easystats) + +```{webr-r} +parameters(m1) +``` + + + + + +## Communicating results + +### Avoid dichotomania of statistical significance + +- "Never conclude there is **‘no difference’** or ‘no association’ just because **p > 0.05 or CI includes zero**" + +- Estimate and communicate **effect sizes and their uncertainty** + +- https://doi.org/10.1038/d41586-019-00857-9 + + +### Communicating results + +- We found a **significant relationship** between DBH and Height **(p<0.05)**. + +- We found a {*significant*} **positive** relationship between DBH and Height {*(p<0.05)*} **(b = 0.61, SE = 0.01)**. + +- (add p-value if you wish) + + +### Models that describe themselves (easystats) + +```{webr-r} +report(m1) +``` + + + + + +### Generating table with model results: `modelsummary` + +```{webr-r} +library("modelsummary") +``` + +```{webr-r} +modelsummary(m1, output = "html") ## Word, PDF, PowerPoint, png... +``` + + + + +### Generating table with model results: `modelsummary` + +```{webr-r} +modelsummary(m1, fmt = 2, + estimate = "{estimate} ({std.error})", + statistic = NULL, + gof_map = c("nobs", "r.squared", "rmse"), + output = "html") +``` + + + + +## Visualising fitted model + +### Plot model: `visreg` + +```{webr-r} +library("visreg") +``` + +```{webr-r} +visreg(m1) +``` + + +`visreg` can use ggplot2 too + +```{r} +#| eval: false +visreg(m1, gg = TRUE) + theme_bw() +``` + + + + +### Plot (easystats) + +```{webr-r} +plot(estimate_expectation(m1)) +``` + + +### Plot (modelsummary) + +```{webr-r} +modelplot(m1) +``` + + +### Plot model parameters with easystats (`see` package) + +```{webr-r} +plot(parameters(m1), show_intercept = TRUE, show_labels = TRUE) +``` + + +### Plot parameters' estimated distribution + +```{webr-r} +plot(simulate_parameters(m1)) +``` + + + +## Model checking + + +### Linear model assumptions + +- **Linearity** (transformations, GAM...) + +- **Residuals**: + - Independent + - Equal variance + - Normal + +- Negligible **measurement error** in predictors + + + + +### Are residuals normal? + +```{webr-r} +hist(residuals(m1)) +``` + +SD = 4.09 + + +### Model checking: `plot(model)` + +```{webr-r} +def.par <- par(no.readonly = TRUE) +layout(matrix(1:4, nrow=2)) +plot(m1) +par(def.par) +``` + + +### Model checking with `performance` (easystats) + +```{webr-r} +check_model(m1) +``` + + + + + +### A dashboard to explore the full model + +```{r} +#| eval: false +model_dashboard(m1) +``` + + + + +## Making predictions with easystats + +### Estimate expected values + +```{webr-r} +pred <- estimate_expectation(m1) +head(pred) +``` + + +### Expected values given DBH + +```{webr-r} +plot(estimate_expectation(m1)) +``` + + + +### Calibration plot: observed vs predicted + +```{webr-r} +pred$height.obs <- trees$height +plot(height.obs ~ Predicted, data = pred, xlim = c(15, 60), ylim = c(15, 60)) +abline(a = 0, b = 1) +``` + + +### Estimate prediction interval + +Accounting for residual variation! + +```{webr-r} +pred <- estimate_prediction(m1) +head(pred) +``` + +### Confidence vs Prediction interval + +```{webr-r} +plot(estimate_expectation(m1)) +``` + +```{webr-r} +plot(estimate_prediction(m1)) +``` + + +### Make predictions for new data + +```{webr-r} +estimate_expectation(m1, data = data.frame(dbh = 39)) +``` + +```{webr-r} +estimate_prediction(m1, data = data.frame(dbh = 39)) ``` + + + + +## Workflow + +- **Visualise data** + +- **Understand fitted model** (`summary`) + +- **Visualise model** (`visreg`...) + +- **Check model** (`plot`, `check_model`, calibration plot...) + +- **Predict** (`predict`, `estimate_expectation`, `estimate_prediction`) + + + + + + +# Categorical predictors (factors) + + +## Q: Does tree height vary with sex? + +```{webr-r} +boxplot(height ~ sex, data = trees) +``` + + + +### Model height ~ sex + +```{webr-r} +m2 <- lm(height ~ sex, data = trees) +summary(m2) +``` + + + +### Linear model with categorical predictors + +```{webr-r} +m2 <- lm(height ~ sex, data = trees) +``` + +corresponds to + +$$ + \begin{aligned} + Height_{i} = a + b_{male} + \varepsilon _{i} \\ + \varepsilon _{i}\sim N\left( 0,\sigma^2 \right) \\ + \end{aligned} +$$ + + + +### Model height ~ sex + +```{webr-r} +m2 <- lm(height ~ sex, data = trees) +summary(m2) +``` + + +### Quiz + +https://pollev.com/franciscorod726 + + +### Let's read the model report... + +```{webr-r} +report(m2) +``` + + + +### Estimated distribution of the **intercept** parameter + +**Intercept = Height of females** + +```{webr-r} +parameters(m2)[1,] +``` + +```{webr-r} +plot(simulate_parameters(m2), show_intercept = TRUE) +``` + + +### Estimated distribution of the *beta* parameter + +*beta* = **height difference** of males vs females + +```{webr-r} +parameters(m2)[2,] +``` + +```{webr-r} +plot(simulate_parameters(m2)) +``` + + + + + +### Analysing differences among factor levels + +```{webr-r} +estimate_means(m2) +``` + +```{webr-r} +estimate_contrasts(m2) +``` + + +### Visualising the fitted model + +### Plot (visreg) + +```{webr-r} +visreg(m2) +``` + + +### Plot (easystats) + +```{webr-r} +plot(estimate_means(m2)) +``` + + + +### Model checking + +### Model checking: residuals + +```{webr-r} +hist(resid(m2)) +``` + +```{webr-r} +def.par <- par(no.readonly = TRUE) +layout(matrix(1:4, nrow=2)) +plot(m2) +par(def.par) +``` + + +### Model checking (easystats) + +```{webr-r} +check_model(m2) +``` + + + + +## Q: Does height differ among field sites? + +### Quiz + +https://pollev.com/franciscorod726 + +### Plot data first + +```{webr-r} +plot(height ~ site, data = trees) +``` + + +### Linear model with categorical predictors + +```{webr-r} +m3 <- lm(height ~ site, data = trees) +``` + + +$$ + \begin{aligned} + y_{i} = a + b_{site2} + c_{site3} + d_{site4} + e_{site5} +...+ \varepsilon _{i} \\ + \varepsilon _{i}\sim N\left( 0,\sigma^2 \right) \\ + \end{aligned} +$$ + + + + + +### Model Height ~ site + +**All right here?** + +```{webr-r} +m3 <- lm(height ~ site, data = trees) +summary(m3) +``` + + +### site is a factor! + +```{webr-r} +trees$site <- as.factor(trees$site) +``` + + +### Model Height ~ site + +```{webr-r} +m3 <- lm(height ~ site, data = trees) +summary(m3) +``` + + +### Estimated parameter distributions + +```{webr-r} +plot(simulate_parameters(m3)) +``` + + + + +### Estimated tree heights for each site + +```{webr-r} +estimate_means(m3) +``` + + +### Plot estimated tree heights for each site + +```{webr-r} +plot(estimate_means(m3)) +``` + +### Analysing differences among factor levels + +For finer control see `emmeans` package + +```{webr-r} +estimate_contrasts(m3) +``` + + +### Analysing differences among factor levels + +How different are site 2 and site 9? + +```{webr-r} +library("marginaleffects") +``` + +```{webr-r} +hypotheses(m3, "site2 = site9") +``` + + + +### Presenting model results + +```{webr-r} +parameters(m3) +``` + + +```{webr-r} +modelsummary(m3, estimate = "{estimate} ({std.error})", statistic = NULL, + fmt = 1, gof_map = NA, coef_rename = paste0("site", 1:10), output = "html") +``` + + + +### Plot (visreg) + +```{webr-r} +visreg(m3) +``` + +### Plot (easystats) + +```{webr-r} +plot(estimate_means(m3)) +``` + + + +### Plot model (modelsummary) + +```{webr-r} +modelplot(m3) +``` + + +### Plot model (easystats) + +```{webr-r} +plot(parameters(m3), show_intercept = TRUE) +``` + + +### Fit model without intercept + +```{webr-r} +m3bis <- lm(height ~ site - 1, data = trees) +summary(m3bis) +``` + +```{webr-r} +plot(parameters(m3bis)) +``` + +### Model checking: residuals + +```{webr-r} +def.par <- par(no.readonly = TRUE) +layout(matrix(1:4, nrow = 2)) +plot(m3) +par(def.par) +``` + + +### Model checking: residuals + +```{webr-r} +check_model(m3) +``` + + + + + +# Combining continuous and categorical predictors + + +### Predicting tree height based on dbh and site + +```{webr-r} +lm(height ~ site + dbh, data = trees) +``` + +corresponds to + +$$ + \begin{aligned} + y_{i} = a + b_{site2} + c_{site3} + d_{site4} + e_{site5} +...+ k \cdot DBH_{i} + \varepsilon _{i} \\ + \varepsilon _{i}\sim N\left( 0,\sigma^2 \right) \\ + \end{aligned} +$$ + + +### Predicting tree height based on dbh and site + +```{webr-r} +m4 <- lm(height ~ site + dbh, data = trees) +summary(m4) +``` + + +### Presenting model results + +```{webr-r} +parameters(m4) +``` + + + +### Estimated tree heights for each site + +```{webr-r} +estimate_means(m4) +``` + + +### Fit model without intercept + +```{webr-r} +m4 <- lm(height ~ -1 + site + dbh, data = trees) +summary(m4) +``` + + +### Plot (visreg) + +```{webr-r} +visreg(m4) +``` + +```{webr-r} +visreg(m4, xvar = "dbh", by = "site", overlay = TRUE, band = FALSE) +``` + + + +### Plot model (easystats) + +```{webr-r} +plot(parameters(m4)) +``` + +Keeping sites only, dropping "dbh" + +```{webr-r} +plot(parameters(m4, drop = "dbh")) +``` + + +### Plot model (modelsummary) + +```{webr-r} +modelplot(m4) +``` + +Keeping sites only, dropping "dbh" + +```{webr-r} +modelplot(m4, coef_omit = "dbh") +``` + + +### What happened to site 8? + +```{webr-r} +visreg(m3) +``` + +```{webr-r} +visreg(m4, xvar = "site") +``` + +site 8 has the largest diameters: + +```{webr-r} +boxplot(dbh ~ site, data = trees) +``` + +**DBH** + +```{webr-r} +aggregate(trees$dbh ~ trees$site, FUN = mean) +``` + +**HEIGHT** + +```{webr-r} +aggregate(trees$height ~ trees$site, FUN = mean) +``` + + + + +### We have fitted model w/ many intercepts and single slope + +```{webr-r} +visreg(m4, xvar = "dbh", by = "site", overlay = TRUE, band = FALSE) +``` + + + + +### Slope is the same for all sites + +```{webr-r} +parameters(m4, keep = "dbh") +``` + + + +### Model checking: residuals + +```{webr-r} +def.par <- par(no.readonly = TRUE) +layout(matrix(1:4, nrow=2)) +plot(m4) +par(def.par) +``` + + +### Model checking with easystats + +```{webr-r} +check_model(m4) +``` + + + +### How good is this model? Calibration plot + +```{webr-r} +trees$height.pred <- fitted(m4) +plot(trees$height.pred, trees$height, + xlab = "Tree height (predicted)", + ylab = "Tree height (observed)", + las = 1, xlim = c(10,60), ylim = c(10,60)) +abline(a = 0, b = 1) +``` + + +### How good is this model? Calibration plot (easystats) + +```{webr-r} +pred <- estimate_expectation(m4) +pred$obs <- trees$height +plot(obs ~ Predicted, data = pred, xlim = c(15, 60), ylim = c(15, 60)) +abline(a = 0, b = 1) +``` + + +### *Posterior* predictive checking + +Simulating response data from fitted model (`yrep`) + +and comparing with observed response (`y`) + +```{webr-r} +check_predictions(m4) +``` + + + + +## Predicting heights of new trees (easystats) + +### Using model for prediction + +Expected height of 10-cm diameter tree in each site? + +```{webr-r} +trees.10cm <- data.frame(site = as.factor(1:10), + dbh = 10) +trees.10cm +``` + + +### Using model for prediction + +Expected height of 10-cm DBH trees at each site + +```{webr-r} +pred <- estimate_expectation(m4, data = trees.10cm) +pred +``` + +### Using model for prediction + +Prediction intervals (accounting for residual variance) + +```{webr-r} +pred <- estimate_prediction(m4, data = trees.10cm) +pred +``` + + + +# Q: Does allometric relationship between Height and Diameter vary among sites? + +```{webr-r} +df <- data.frame(dbh = seq(10, 50, by = 1), + height = seq(20, 60, by = 1)) + +plot(height ~ dbh, data = df, type = "n") +abline(a = 25, 0.6) +abline(a = 40, b = 0.1, col = "steelblue") +abline(a = 50, b = -0.3, col = "orangered") +``` + + + +### Model with interactions + +```{webr-r} +m5 <- lm(height ~ site*dbh, data = trees) +summary(m5) +``` + + + +### Does slope vary among sites? + +```{webr-r} +visreg(m5, xvar = "dbh", by = "site") +``` + +```{webr-r} +visreg(m5, xvar = "dbh", by = "site", overlay = TRUE, band = FALSE) +``` + + +END + diff --git a/docs/trees.csv b/docs/trees.csv new file mode 100644 index 0000000..d498bfe --- /dev/null +++ b/docs/trees.csv @@ -0,0 +1,1001 @@ +"site","dbh","height","sex","dead" +4,29.68,36.1,"male",0 +5,33.29,42.3,"male",0 +2,28.03,41.9,"female",0 +5,39.86,46.5,"female",0 +1,47.94,43.9,"female",0 +1,10.82,26.2,"male",0 +2,10.6,29.8,"male",0 +2,20.12,35.6,"male",0 +2,29.14,42.1,"male",0 +1,29.55,36.5,"male",0 +1,36.44,40.7,"male",0 +1,15.64,34.4,"female",0 +4,39.23,44.1,"male",0 +1,20.05,30.8,"female",0 +1,11.58,24.8,"female",0 +1,21.05,32.2,"male",0 +4,15.7,34.3,"female",0 +5,24.57,39.3,"female",0 +1,18.21,26.3,"female",0 +1,34.69,39.4,"female",0 +1,43.49,40.2,"male",0 +5,18.43,33.7,"male",0 +4,30.83,39.6,"male",0 +10,29.15,37.5,"male",0 +7,8.57,24.3,"male",0 +2,34.88,46.9,"male",0 +1,30.59,33.5,"female",0 +1,36.93,36.9,"female",0 +1,29.03,35.5,"male",0 +2,12.83,30.6,"male",0 +5,41.89,45.3,"female",0 +2,32.04,44.1,"female",0 +1,21.76,32,"female",0 +1,46.84,41.9,"female",1 +1,32.62,36.5,"female",0 +1,18.69,32,"male",0 +1,43.53,46.2,"male",0 +3,5.14,20.2,"female",0 +7,26.4,33.3,"male",0 +1,48.04,46.8,"male",0 +2,16.26,37.6,"female",0 +4,31.84,35.9,"male",0 +1,42.57,41.2,"male",0 +6,15.37,30.1,"male",0 +1,16.38,21.4,"female",0 +1,28.89,36.8,"male",1 +1,12.78,26.6,"female",0 +2,15.66,34.4,"female",0 +5,31.76,39.3,"male",0 +3,43.51,51.9,"male",0 +2,31.04,45.8,"female",0 +1,9.9,15.9,"male",0 +3,49.92,53.9,"female",0 +1,34.56,34.1,"male",0 +1,6.02,21.1,"male",0 +2,38.75,42.9,"female",0 +4,12.56,29,"female",0 +1,16.98,32,"female",0 +5,18.51,32.8,"female",0 +1,25.17,33.9,"female",0 +8,17.89,25.3,"male",0 +6,43.32,50,"female",1 +3,30.15,36.2,"female",0 +1,47.08,46.8,"male",0 +2,17.5,34.9,"female",0 +3,23.02,34.5,"male",0 +1,14.38,25.6,"female",0 +3,40.28,43.6,"female",1 +6,30.64,40.6,"male",0 +9,18.05,35.1,"male",0 +1,31.02,34.9,"male",1 +2,23.9,46.4,"male",0 +3,25.99,35.5,"female",0 +4,44.64,44,"male",1 +4,39.25,47.1,"male",0 +2,45.85,49.3,"male",1 +6,9.61,26.2,"male",0 +5,20.68,28.5,"male",0 +1,22.03,30.8,"male",0 +1,31.83,36.7,"male",0 +6,21.69,38.6,"male",0 +1,39.84,37.8,"male",0 +1,29.58,27.3,"male",0 +1,21.5,29.4,"male",0 +2,6.93,26.8,"male",0 +1,29.91,37.6,"male",0 +6,38.75,44.3,"male",0 +4,9.25,21.9,"male",0 +1,28.63,33,"male",0 +2,24.62,36.6,"male",0 +2,48.61,55,"female",0 +3,33.17,39.9,"female",0 +2,47.94,56.8,"male",0 +2,10.74,27.8,"female",0 +1,47.28,42.2,"female",0 +1,7.55,21.9,"male",0 +2,15.01,31.8,"female",0 +8,29.31,34.6,"female",0 +3,19.01,32.4,"male",0 +1,37.4,36.6,"female",0 +6,33.32,42.5,"male",0 +1,13.85,22.8,"male",0 +5,28.83,35.4,"female",0 +2,9.9,23.1,"male",0 +2,47.41,51.1,"male",0 +1,15.81,29.9,"male",0 +1,23.6,29.9,"female",0 +4,13.01,27.1,"female",0 +5,10.44,29.4,"male",0 +2,29,38.4,"female",0 +9,29.23,34.1,"female",0 +3,7.06,27.5,"female",0 +5,36.91,45.8,"male",0 +1,29.78,34.5,"female",0 +5,14.2,28.9,"female",0 +2,46.16,51.5,"male",0 +2,29.48,41.4,"female",0 +4,34.57,37.4,"female",0 +2,35.22,43.5,"female",0 +10,20.78,32.2,"male",0 +6,42.99,50.8,"male",1 +1,14.09,22.3,"female",0 +1,34.26,42.6,"male",0 +3,21.32,34,"female",0 +2,42.41,49.7,"female",0 +2,21.71,36.9,"male",0 +1,32.8,34.1,"male",0 +3,27.11,36.2,"female",0 +1,43.8,44.3,"female",1 +4,12.51,27.8,"female",0 +6,25.61,37.6,"male",0 +1,30.81,36.7,"male",0 +2,20.93,31.2,"male",0 +1,26.55,30.9,"male",0 +3,45.43,49.4,"female",0 +4,28.63,37.8,"male",0 +1,46.26,47.8,"male",1 +1,10.92,23.2,"female",0 +2,35.93,45.5,"female",1 +5,16.25,34.3,"female",0 +2,5.66,33.4,"male",0 +6,34.19,43.5,"female",0 +1,7.88,13.4,"male",0 +3,27.31,42.9,"female",0 +5,10.73,29.2,"male",0 +1,23.5,29.4,"female",0 +3,20.73,38.8,"male",0 +5,36.94,42.9,"female",0 +1,33.62,41.7,"female",0 +1,13.12,23.4,"female",0 +2,22.41,39.6,"male",0 +3,35.69,43.1,"female",0 +2,20.6,36.9,"female",0 +1,12.28,22.2,"male",0 +3,39.77,42.6,"female",0 +1,5.92,22.9,"male",0 +4,5.06,22.4,"female",0 +2,9.23,35.1,"female",0 +1,22.25,31.2,"female",0 +2,34.24,42.9,"female",0 +1,19.06,29.2,"male",0 +1,11.54,17.7,"male",0 +2,8.75,27.5,"female",0 +1,18.41,29.6,"male",0 +1,46.63,45.9,"female",0 +1,11.99,24.6,"female",0 +6,46.32,52.3,"female",0 +5,32.63,40.8,"female",0 +5,46.55,50.5,"male",0 +3,42.15,44.4,"male",0 +6,22.08,35.7,"female",0 +2,14.4,34.5,"female",0 +1,11.93,25,"male",0 +1,17.75,26.1,"male",0 +6,31.64,42,"male",0 +1,13.02,28.4,"female",0 +1,33.84,37.7,"male",0 +2,24.66,36.5,"female",0 +7,35.7,40.7,"male",0 +1,23.83,33.1,"male",0 +1,34.61,39.5,"male",0 +2,16.92,33.1,"female",0 +1,44.52,42.8,"male",0 +1,14.01,26.3,"male",0 +2,42.01,45.7,"female",0 +1,43.35,42,"female",0 +7,41.21,41,"male",0 +2,12.76,31.9,"female",0 +1,17.26,26.1,"female",0 +1,39.49,42.2,"female",0 +4,42.5,50.4,"male",0 +1,42.37,39.9,"female",0 +1,44.23,48.8,"male",0 +1,28.32,29.7,"female",0 +1,49.48,46.4,"female",0 +4,49.81,45.8,"female",0 +1,9.42,21.3,"male",0 +5,9.56,28,"male",0 +6,38.22,39.8,"male",0 +3,43.24,44.9,"male",0 +1,28.65,33.8,"female",0 +4,20.74,31,"female",0 +1,38.11,41.7,"female",0 +1,8.63,24.9,"female",0 +1,26.21,32.6,"male",0 +3,37.59,41.3,"female",0 +3,36.03,44,"female",0 +2,31.97,43.1,"male",0 +1,44.07,46.3,"male",0 +5,46.24,48,"male",1 +1,48.62,46.1,"female",0 +4,48.78,43.4,"female",0 +5,7.43,28,"male",0 +5,31.9,42.1,"male",0 +1,48.71,50.2,"male",0 +4,22.31,25.5,"male",0 +2,34.75,46.8,"female",0 +1,19.95,28.3,"male",0 +2,37.98,46.8,"male",0 +2,23.59,40.3,"female",0 +1,45.6,48.1,"male",0 +2,24.64,37,"female",0 +5,49.69,49.5,"male",0 +1,14.92,28,"female",0 +1,49.09,49.4,"male",0 +1,31.24,38.6,"female",0 +4,16.53,27,"female",0 +2,30.83,39.7,"male",0 +3,31.29,38.7,"male",0 +1,24.21,28.3,"male",0 +1,20.49,27.1,"male",0 +1,39.69,41.4,"male",0 +2,16.98,34.2,"female",0 +1,6.8,21.9,"female",0 +6,40.64,47.5,"female",0 +4,15.02,28,"male",0 +4,11.29,24.7,"male",0 +4,25.25,34.2,"male",0 +2,36.43,48.2,"female",0 +5,18.11,33.8,"male",0 +1,41.37,43,"female",0 +1,22.3,29.7,"female",0 +1,34.43,37.5,"male",0 +1,48.74,53.5,"female",0 +1,48.55,48.7,"male",0 +1,28.06,28.3,"male",0 +5,33.34,40,"female",0 +1,45.66,41.6,"male",0 +5,28.35,42.5,"male",0 +5,41.54,48,"female",0 +1,20.39,24.8,"male",0 +5,25.08,29.3,"female",0 +1,26.13,28.3,"female",0 +6,35.75,43.3,"male",0 +2,42.57,41.7,"male",0 +1,47.39,46.9,"male",0 +1,40.44,45.4,"female",0 +1,9.59,19.7,"female",0 +2,40.66,42.8,"male",0 +2,7.98,30.5,"female",0 +2,8.16,33.3,"male",0 +3,8.5,28.6,"male",0 +2,9.19,30.2,"female",0 +2,47.59,55.2,"female",0 +1,34.49,33.5,"female",0 +3,40.43,40.8,"male",0 +2,24.77,37.4,"female",0 +1,18.6,29.6,"female",0 +3,42.48,44.9,"female",0 +8,28.79,27.6,"female",0 +1,45.29,42.2,"female",0 +2,46.82,50.2,"male",0 +1,27.37,31.6,"female",0 +1,8.89,19.2,"female",0 +1,41.49,45.6,"female",0 +2,16.72,29.4,"female",0 +1,34.99,37.3,"female",0 +1,34.42,37.4,"male",0 +3,20.69,31.5,"female",0 +1,11.62,23.2,"male",0 +2,20.4,36,"female",0 +2,13.11,30.6,"female",0 +5,29.28,39.3,"female",1 +5,41.99,48.2,"female",0 +4,42.52,43.6,"male",0 +1,43.86,45.5,"male",0 +1,39.63,38.5,"female",0 +5,34.83,38.4,"female",0 +4,11.54,25.2,"female",0 +4,45.17,44.8,"female",0 +1,46.1,44.2,"female",0 +1,31.74,41.3,"female",0 +5,48.62,50.3,"female",0 +4,10.66,22.8,"male",0 +1,41.62,40.4,"female",0 +1,34.52,39.1,"male",0 +1,18.45,26.7,"male",0 +4,18.81,31,"female",0 +1,34.99,37.7,"female",0 +7,30.92,36.4,"male",0 +3,30.61,37.7,"female",0 +1,5.84,16.3,"female",0 +2,5.21,33.3,"male",0 +4,34.72,42.7,"female",1 +6,14.18,29.3,"male",0 +2,21.49,36.4,"female",0 +2,37.79,48.5,"male",0 +1,20.94,28.2,"female",0 +5,34.32,37.2,"male",0 +2,6.38,27,"female",0 +2,30.89,44.4,"male",0 +3,34.51,44.6,"male",0 +1,42.78,41.2,"male",0 +2,25.81,44.2,"female",0 +6,35.55,43.8,"male",0 +1,16.67,31.5,"female",0 +1,36.74,40.7,"female",0 +1,14,26.6,"male",0 +4,40.5,39.6,"female",0 +2,44.6,49.7,"female",0 +1,33.64,37.2,"female",0 +1,42.17,41.2,"female",0 +3,7.88,27.3,"female",0 +3,41.16,46.5,"male",0 +5,44.61,50.1,"female",0 +1,32.44,43.1,"male",0 +1,34.52,37,"female",0 +2,21.51,33.1,"female",0 +4,33.24,44.4,"male",0 +3,26.77,37.9,"female",0 +1,46.25,47.7,"female",0 +1,18.22,24.5,"male",0 +4,43.78,45.3,"male",0 +1,31.32,39.4,"male",0 +2,40.99,48.9,"male",0 +2,29.09,44.9,"female",0 +1,30.49,39.6,"male",0 +1,22.03,33.5,"male",0 +5,41.55,47.5,"female",0 +1,19.93,30.6,"male",0 +5,36.91,46.8,"female",0 +3,10.21,24.4,"female",0 +5,16.42,30.2,"male",0 +1,36.88,38.8,"female",0 +4,14.24,27.9,"male",0 +2,20,32.9,"male",0 +1,23.86,30.6,"male",0 +5,41.66,44.2,"male",0 +2,30.08,43.8,"female",0 +5,28.95,42.3,"male",0 +1,32.4,38.4,"female",0 +1,6.13,19.6,"male",0 +6,10.68,26.4,"male",0 +5,21.06,33.9,"male",0 +2,31.62,39.9,"female",0 +2,40.81,49.8,"female",0 +1,7.15,20.9,"male",1 +2,47.03,54,"male",0 +1,33.98,35.4,"female",0 +1,39.53,34.3,"female",0 +2,26.46,38.8,"male",0 +1,40.15,39.9,"female",0 +2,39.38,54.6,"female",0 +2,24.91,37.1,"male",0 +1,10.26,23.4,"female",1 +3,32.39,38.1,"male",0 +1,40.95,45.1,"female",0 +1,36.98,38,"male",0 +5,23.46,24.7,"female",0 +3,28.14,41.5,"female",0 +1,27.63,33.7,"male",0 +2,39.33,50.7,"male",0 +4,18.85,23.1,"female",0 +1,44.97,44.1,"female",0 +1,33.64,39.8,"male",0 +1,12.35,26.4,"male",0 +2,18.56,35,"female",0 +3,27.31,39.4,"male",0 +4,6.34,24,"male",0 +2,27.83,44.6,"female",0 +1,21.42,33.2,"male",0 +3,25.06,30.3,"male",0 +1,20.49,32.5,"female",0 +9,41.42,51.5,"female",0 +4,20.75,33.6,"male",0 +6,45.73,52.3,"male",0 +1,11.31,25.8,"male",0 +3,12.73,32.2,"female",0 +7,42.52,45.8,"female",0 +3,48.89,53.7,"female",0 +5,8.95,25.4,"male",0 +1,19.82,29.3,"male",1 +1,15.51,21.6,"male",0 +1,16.57,26.9,"male",0 +1,46.26,46.3,"female",0 +5,18.86,32.9,"female",0 +2,41.82,48.8,"female",0 +2,32.78,44.8,"male",0 +3,25.12,35.7,"female",0 +1,8.37,17.2,"male",0 +7,25.59,36.6,"female",0 +2,37.44,45.3,"female",0 +2,36.59,50.4,"male",0 +1,37.58,37.4,"male",0 +1,23.14,31.8,"male",0 +9,42.84,48.6,"female",0 +8,43.4,40.8,"female",0 +2,8.39,29.2,"female",0 +2,27.02,34.9,"male",0 +6,5.7,28.2,"female",0 +1,20.87,25.9,"male",0 +4,44.99,46.9,"male",0 +3,33.33,40.3,"male",0 +1,35,37.5,"male",0 +1,28.61,32.5,"male",0 +1,27.64,36.9,"male",0 +3,35.84,40.8,"female",1 +3,20.86,35.9,"female",0 +3,48.57,52.4,"male",1 +1,41.66,43.2,"male",0 +2,37.62,40.8,"male",0 +5,41.24,43.7,"female",0 +1,9.55,22.8,"female",0 +1,13.84,25.3,"male",0 +1,44.82,45.9,"female",0 +5,13.42,26.5,"female",0 +7,7.62,19.2,"male",0 +6,5.71,21.5,"female",0 +1,23.03,33.5,"female",0 +5,13.98,32,"female",0 +4,12.22,25.3,"female",0 +1,40.82,38.7,"male",0 +2,21.55,38.4,"female",0 +2,31.29,43.5,"female",0 +5,43.69,42.7,"male",0 +4,22.7,33.8,"female",0 +1,23.34,30.2,"female",0 +1,7.43,23.6,"female",0 +1,37.41,36.2,"male",0 +2,21.9,35.4,"female",0 +3,40.79,49.7,"female",0 +2,45.24,51.9,"male",0 +5,36.44,44,"female",0 +1,25.97,31.5,"female",0 +1,47.48,49.4,"female",0 +1,23.6,25.4,"male",0 +3,8.23,27.6,"male",0 +1,19.09,28.4,"female",0 +2,16.31,32.2,"male",0 +6,48.84,52,"female",0 +2,7.68,30.4,"male",0 +5,42.32,47.2,"male",0 +1,45.9,46.2,"female",0 +1,15.32,27.6,"female",0 +1,5.96,18.4,"male",0 +4,48.21,49.2,"male",0 +1,6.72,19.6,"female",0 +2,18.7,32.3,"male",0 +5,45.69,51,"female",0 +1,34.12,35.7,"male",0 +5,34.19,41,"female",0 +3,29.01,46.3,"male",0 +1,31.23,36.7,"male",0 +3,41.53,47.5,"male",0 +3,45.05,51.5,"female",0 +4,49.92,47.4,"female",0 +1,10.39,26,"female",0 +6,10.84,26.4,"female",0 +1,34.34,35.3,"male",1 +1,38.21,40,"male",0 +4,26.51,37.8,"female",0 +4,28.34,31.5,"female",0 +5,19.73,34.2,"female",0 +1,13.05,20.8,"female",0 +1,6.19,19.1,"male",0 +2,25.18,35.5,"male",0 +1,22.92,31.6,"female",0 +6,42.06,46.5,"female",0 +2,22.11,41.3,"male",0 +9,33.52,43.5,"female",0 +1,19.92,30,"male",0 +3,32.6,38.1,"male",0 +1,44.02,41.7,"male",0 +1,14.85,33.3,"male",0 +2,31.1,41.8,"female",0 +2,33.91,44.9,"female",0 +1,21.6,30.2,"female",0 +1,26.69,32.8,"male",0 +5,47.88,51.6,"female",0 +1,44.54,46.9,"male",0 +6,23.58,31.6,"male",0 +2,29.59,35.2,"female",0 +1,28.46,29.2,"female",0 +5,44.35,48,"male",0 +1,14.5,24.1,"female",0 +4,19.63,27.5,"female",0 +2,38.03,43.8,"female",0 +1,21.1,34.1,"male",0 +1,42.05,45.6,"male",0 +6,7.52,28.4,"male",0 +5,27.82,38.7,"female",0 +1,47.41,44.2,"male",0 +7,42.2,46.4,"female",0 +1,13.45,31.1,"female",0 +1,30.35,35.9,"female",0 +1,37.22,30.3,"female",0 +1,6.49,20,"female",0 +1,17.19,22.6,"male",0 +2,24.67,36.8,"male",0 +4,48.97,46.3,"male",0 +1,14.31,26.6,"male",0 +6,29.68,34.1,"male",0 +2,49.42,57.2,"male",0 +2,21.23,32.9,"female",0 +2,40.84,50.7,"female",0 +2,36.91,40.2,"female",0 +4,32.33,37.1,"male",0 +3,21.89,35.3,"male",0 +2,48.68,59.3,"female",0 +1,49.88,50.2,"male",0 +4,26.37,26.8,"male",0 +2,14.12,36.9,"male",0 +1,40.06,39.4,"female",0 +4,21.16,35.2,"male",0 +2,39.96,45,"male",0 +3,9.52,25.8,"female",0 +1,41.53,46.5,"male",0 +2,36.2,41.7,"male",0 +5,19.73,35.4,"male",0 +3,22.31,42.2,"male",0 +2,29.25,41.7,"male",0 +6,28.19,33.9,"female",0 +2,48.41,55.7,"male",0 +1,42.51,45.7,"male",0 +2,33.92,39.8,"female",0 +1,30.73,35.2,"male",0 +1,41.78,40.5,"male",1 +5,11.12,29.1,"male",0 +4,25.63,43.8,"female",0 +1,11.25,17.5,"female",0 +6,11.39,29.1,"male",0 +7,28.75,33.3,"male",1 +2,11.53,35.7,"male",0 +6,40.47,46.5,"female",0 +1,32.52,39.9,"female",0 +5,28.87,35.1,"female",0 +1,36.01,35.5,"female",0 +1,42.32,43.4,"female",1 +2,13.82,30.6,"female",0 +1,31.43,42.9,"male",0 +4,23.06,36.9,"female",0 +1,7.24,20.9,"male",0 +1,29.03,37.7,"male",0 +2,38.05,49.5,"female",0 +1,9.06,29.3,"male",0 +2,20.55,33.2,"female",0 +2,19.99,34.1,"male",0 +4,11.17,22.5,"male",0 +2,8.64,27.4,"female",0 +6,22.77,27.8,"female",0 +1,38.98,35.9,"female",0 +1,49.55,49.6,"female",0 +6,18.33,33,"female",0 +1,12.88,27.2,"male",0 +1,11.6,17.2,"male",0 +7,26.95,30.6,"male",0 +2,25.87,35,"female",1 +1,40.45,39.4,"female",0 +2,40.7,47.6,"female",0 +4,29.6,37.7,"female",0 +2,26.25,38.9,"male",0 +1,34.3,32,"female",0 +1,41.86,41.2,"female",0 +5,25.82,37.1,"female",0 +4,30.9,39.6,"female",0 +1,28.79,35,"male",0 +1,36.62,42.4,"male",0 +5,41.06,46.5,"female",0 +1,27.04,34,"female",0 +2,7.32,27.3,"female",0 +6,26.13,41,"male",1 +1,45.12,49.1,"male",0 +1,21.23,30.3,"male",0 +2,27.98,42.6,"male",0 +1,38.59,43.3,"female",0 +1,14.43,20.9,"male",0 +1,20.33,27.9,"male",1 +1,44.32,50,"female",0 +1,26.1,35.8,"male",0 +2,19.59,39.5,"male",0 +1,37.48,43.7,"female",0 +6,36.05,42,"male",0 +2,40.44,55,"female",0 +3,21.09,30.9,"male",0 +2,19.53,35,"male",0 +2,18.91,37.5,"female",0 +5,46.84,45.1,"female",0 +7,48.27,46.9,"female",0 +1,47.1,53.6,"female",0 +3,12.43,27.4,"male",0 +1,12.31,23.2,"female",0 +1,49.24,48.5,"female",0 +1,27.73,32.5,"male",0 +1,29.58,33,"female",0 +5,34.26,42.2,"female",0 +2,44.93,48.9,"male",0 +6,26.97,33,"male",0 +5,37.88,48.7,"female",0 +1,46.82,47.3,"male",0 +1,16.08,25.5,"male",0 +3,10.15,25.1,"female",0 +5,19.03,36.6,"female",0 +1,34.9,42.3,"male",0 +3,5.24,19.1,"female",0 +5,24.36,42.3,"male",0 +2,41.59,48.8,"female",0 +2,18.07,34.2,"male",0 +1,26.92,31.2,"female",0 +1,5.28,24,"female",0 +2,46.91,51.2,"female",0 +1,19.05,26.3,"female",0 +1,20.82,27.5,"female",0 +5,32.53,41,"male",0 +3,45.27,52.4,"male",0 +2,24.87,38.6,"male",0 +1,13.87,27.2,"male",0 +1,18.32,26.6,"female",0 +2,46.18,50.7,"female",0 +2,30.39,42.8,"male",0 +4,27.87,32.7,"male",0 +1,20.98,33.6,"male",0 +2,7.11,28.2,"male",0 +5,36.97,42.4,"male",0 +6,33.5,40.1,"male",0 +5,14.37,28.7,"female",0 +1,45.99,39.2,"female",0 +1,47.83,53.4,"male",0 +6,18.15,28.6,"female",0 +5,22.43,35.6,"female",0 +4,14.41,25.4,"male",0 +6,16.76,34.9,"female",0 +5,26.06,35.6,"female",0 +3,17.48,34.2,"female",0 +2,39.01,44.7,"female",0 +3,45.41,47.8,"female",0 +1,13.07,23.8,"male",0 +2,5.31,31,"male",0 +10,43.06,43.1,"male",0 +1,38.17,40.6,"female",0 +5,49.83,49.2,"male",1 +2,43.15,46.6,"male",0 +2,5.91,26.6,"male",0 +2,34.5,38.2,"female",0 +1,41.51,43,"male",0 +5,15.52,29.7,"male",0 +6,23.91,33.2,"female",0 +6,20.92,31.7,"male",0 +7,39.08,38.6,"female",0 +9,33.99,41.5,"female",0 +1,32.96,39.5,"male",0 +1,6.73,17.4,"male",0 +5,12.76,21.3,"female",0 +2,17.42,36.4,"female",0 +1,29.43,33.7,"male",1 +2,32.82,41.5,"female",0 +2,47.38,56,"male",0 +1,36.58,39,"male",0 +1,33.08,36.4,"female",0 +2,7.01,26.6,"male",0 +1,18.13,26.7,"male",0 +2,12.71,34.3,"male",0 +2,15.42,32.8,"female",0 +3,31.42,39.1,"male",0 +1,29.28,32,"male",0 +2,17.2,33.4,"female",0 +2,26.34,46,"male",0 +1,49.19,50.6,"male",0 +4,9.21,27.4,"male",0 +7,5.66,16,"male",0 +2,9,32.6,"male",0 +1,23.88,29.2,"male",0 +2,11.46,28.9,"male",0 +2,21.95,38.4,"female",0 +8,37.85,35,"female",0 +9,32.97,41.4,"male",0 +1,29.65,31.9,"male",0 +5,12.78,26.7,"male",0 +3,40.33,49.1,"male",0 +3,22.11,34.8,"female",0 +2,26.25,40.8,"male",1 +2,15.54,32.2,"female",0 +5,29.27,35,"female",0 +2,29.49,40.5,"female",0 +8,31.39,33,"male",0 +2,49.81,49.2,"female",0 +10,11.42,26.6,"male",0 +2,22.98,39.8,"male",0 +2,41.8,44.9,"female",0 +2,11.05,27,"male",0 +1,20.13,28,"male",0 +1,6.3,21.6,"male",0 +1,28.46,36.4,"male",0 +2,39.88,48,"male",1 +2,36.56,46.3,"male",0 +2,13.36,26.1,"male",0 +2,30.01,44.6,"female",0 +1,7.74,22.7,"male",0 +1,39.89,45,"male",0 +3,32.9,42.9,"female",1 +3,49.85,52.4,"female",0 +1,25.55,29,"female",0 +1,41.46,35.7,"female",0 +3,20.02,26.7,"male",0 +6,48.08,53.7,"male",0 +1,24.46,25.8,"male",0 +1,48.69,44.7,"female",0 +3,41.67,45.2,"female",0 +4,26.85,34.7,"male",1 +1,7.91,19.2,"male",0 +5,44.07,45.2,"male",0 +3,48.52,50.8,"male",0 +1,37.75,38.3,"female",0 +2,39.15,45.9,"female",0 +5,39.08,45.6,"female",0 +1,19.25,32.4,"male",0 +1,34.91,34.9,"female",0 +1,28.81,36.5,"female",0 +5,21.76,36.7,"male",0 +1,13.94,28.6,"male",0 +1,46.05,48,"female",0 +2,48.46,49.9,"female",0 +2,48.62,53.7,"female",0 +1,9.44,17.7,"male",0 +1,20.57,23.2,"female",0 +1,13.11,25.5,"male",0 +1,28.3,34,"female",0 +4,38.86,43.5,"male",0 +2,27.33,37.9,"female",0 +3,5.49,23.3,"male",0 +1,26.16,30.3,"female",0 +1,37.98,39.3,"female",0 +6,21.59,36.4,"male",0 +1,5.25,26.5,"male",0 +2,48.25,53.5,"male",0 +1,18,33.3,"female",0 +1,22.08,28.7,"female",0 +1,47.37,45.3,"female",0 +2,13.64,37.1,"female",0 +2,31.15,42.3,"male",0 +1,48.14,45.7,"male",0 +2,27.38,43,"female",0 +1,15.31,23.3,"female",0 +1,5.45,19,"female",0 +7,6.13,17.4,"male",0 +1,6.09,22.3,"male",0 +1,46.75,50.9,"female",0 +1,8.19,20.2,"female",0 +1,8.55,23,"female",0 +3,7.74,30.9,"male",0 +1,46.74,45.2,"male",0 +2,8.18,26.5,"female",0 +2,41.18,49.9,"female",0 +1,5.62,23.3,"male",0 +5,15.38,27,"female",0 +5,25.23,36.6,"female",0 +1,48.52,44.4,"female",0 +1,46.8,45.6,"female",0 +2,17.18,32.1,"male",0 +1,36.82,32.4,"female",0 +6,33.74,43.4,"male",0 +6,39.97,56.9,"male",0 +1,20.15,26.1,"female",0 +2,14.97,34.2,"female",0 +5,43.07,50.1,"male",0 +1,6.89,22.9,"male",0 +5,10.32,25.3,"male",0 +5,20.46,32.9,"female",0 +5,15.12,23.4,"female",0 +4,47.11,48.8,"male",0 +3,28.3,38.3,"male",0 +5,33.05,41.4,"male",0 +2,13.4,35.9,"male",0 +8,31.09,26,"female",0 +4,5.59,23.9,"male",0 +4,14.14,23.5,"male",0 +1,31.75,36.9,"male",0 +1,45.15,45.6,"female",1 +1,8.54,17,"male",0 +1,5.18,22.3,"male",0 +3,33.84,45.3,"female",0 +2,44.46,49.5,"female",0 +1,26.53,32.2,"male",0 +1,15.82,26.6,"female",0 +1,39.01,39.9,"female",0 +3,48.33,48.7,"male",1 +2,8.47,29.8,"male",0 +7,32.04,38.8,"female",0 +1,29.37,38.7,"female",0 +1,23.03,23.3,"female",0 +1,49.38,47.7,"male",0 +2,23.45,33.9,"male",0 +1,40.05,42.6,"male",0 +3,46.74,50.1,"female",0 +2,48.51,55.1,"male",0 +1,45.41,42.7,"male",0 +3,12.32,32.5,"female",0 +1,28.97,33.3,"female",0 +1,7.35,22.3,"male",0 +5,20.12,30.2,"male",0 +2,12.49,31.9,"male",0 +2,27.56,38.5,"female",0 +4,7.97,25.1,"female",0 +7,38.04,42.6,"female",0 +1,34.32,39.1,"female",0 +1,47.67,47.5,"male",0 +1,21.71,27.9,"female",0 +4,13.4,28.7,"male",0 +2,19.28,31.9,"female",0 +1,12.69,26.1,"male",0 +2,44.52,47.7,"female",0 +1,18.53,24.9,"female",0 +4,32.79,31.1,"female",0 +2,23.93,35.3,"female",0 +5,47.51,56.3,"male",0 +1,25.68,37.5,"female",0 +7,19.68,27.5,"female",0 +8,38.01,31.3,"female",0 +5,31.13,39.7,"female",0 +3,34.23,45,"female",0 +6,16.05,33.9,"female",0 +4,10.53,26.4,"female",0 +2,9.89,29.1,"male",0 +8,49.05,43.4,"female",0 +1,6.97,21.8,"female",0 +6,5.16,22.7,"female",0 +1,7.84,24.9,"male",0 +4,24.28,34.2,"male",1 +5,21.67,34.8,"female",0 +4,27.98,43.1,"male",0 +2,23.6,32.5,"male",0 +3,23.91,38.5,"female",0 +1,22.67,31.7,"male",0 +6,36.55,43.5,"female",0 +10,11.44,26.9,"male",0 +2,43.07,46,"male",0 +6,33.08,47.4,"male",0 +2,48.8,50.7,"female",0 +1,27.05,30.6,"male",0 +1,7.51,23.7,"female",0 +4,28.97,38.2,"female",0 +5,20.65,31.9,"male",0 +7,31.37,36.8,"female",0 +4,40.46,45.5,"female",0 +3,20.78,34.3,"male",0 +6,5.81,25,"female",0 +2,26,38.8,"female",0 +5,10.39,23.3,"male",0 +2,10.59,29.5,"female",0 +2,32.34,44.5,"female",0 +1,40.15,37,"female",0 +4,30.68,41.8,"male",1 +4,22.98,35.1,"male",0 +1,34.31,37.5,"male",0 +6,23.77,34,"female",0 +1,37.58,37.9,"male",0 +3,18.88,36.4,"male",0 +1,15.6,27,"male",0 +5,34.04,42.3,"female",0 +2,12.25,32.2,"male",0 +3,47.61,53.1,"female",0 +2,12.51,32.7,"female",0 +6,42.94,44.9,"male",0 +1,21.23,31.6,"male",0 +2,29.59,39.6,"male",0 +1,22.76,26.2,"female",0 +4,5.48,23.8,"male",0 +1,35.76,38.3,"male",0 +2,48.64,53.1,"male",0 +5,5.69,20.3,"male",0 +1,48.93,46.9,"male",0 +6,9.47,23.6,"female",0 +1,27.48,34.3,"male",0 +1,10.42,25.6,"male",0 +4,33.5,35.3,"female",0 +2,32.76,43,"female",0 +4,14.03,18.6,"female",0 +2,22.87,27.9,"female",0 +1,42.42,45.2,"female",0 +4,19.07,27.4,"male",0 +1,31.9,34.8,"male",0 +3,42.89,44.2,"male",0 +3,34.44,39.1,"male",0 +1,38.92,40.4,"female",0 +1,37.1,38.9,"female",0 +7,24.51,26.2,"male",0 +4,28.93,32.8,"male",0 +4,9.51,29.3,"female",0 +6,30.74,39.6,"female",0 +1,18.8,28.5,"male",0 +2,14.03,30.9,"male",0 +1,5.16,19.5,"male",0 +6,49.09,54,"male",0 +2,32.84,45.7,"female",0 +5,33.02,38.5,"male",0 +4,30.49,40.7,"female",0 +2,28.74,39.7,"female",0 +2,29.85,37.5,"male",0 +2,32.28,37.1,"female",0 +1,47.67,47.6,"male",0 +3,21.57,36.5,"female",0 +5,11.04,30.7,"female",0 +3,14.77,27.9,"female",0 +6,45.86,46,"female",0 +4,22.42,31.7,"female",0 +2,33.74,48,"male",0 +1,24.76,34.7,"male",0 +3,18.65,30.2,"female",0 +1,18.16,24.4,"female",0 +8,40.57,34.9,"male",0 +1,21.1,31.4,"male",0 +1,5.27,22.1,"male",0 +1,34.55,33.8,"male",1 +7,22.05,30,"female",0 +5,36.53,39.7,"female",0 +1,10.41,21,"male",0 +3,34.72,42.1,"male",0 +8,37.08,36,"female",1 +3,42.07,49.9,"female",0 +1,25.46,36.4,"female",0 +4,5.18,23.8,"male",0 +1,29.54,37.9,"male",0 +1,25.95,32.7,"male",0 +1,48.83,43.2,"male",0 +3,31.1,39.5,"female",0 +1,46.07,47.5,"female",0 +1,36.65,41.7,"male",0 +1,34.69,44.8,"female",0 +1,41.73,40.1,"male",0 +1,20.3,34.2,"male",0 +4,39.7,44.8,"male",0 +4,22.87,36.2,"male",0 +1,35.64,40.7,"male",0 +5,34,33.5,"male",0 +5,42.88,46,"female",0 +6,47.37,47.1,"male",1 +2,22.67,38,"male",0 +8,38.96,30,"female",0 +6,44.24,42.7,"male",0 +1,15.8,23.6,"female",1 +1,12.03,23.1,"female",0 +2,26.11,37.1,"male",0 +1,10.68,21.5,"female",0 +2,10.44,31.3,"male",0 +1,40.75,40.3,"male",0 +9,38.63,48.4,"female",0 +4,13.58,24.2,"female",0 +1,18.29,27.7,"male",0 +4,18.12,32.1,"female",0 +2,27.74,42,"female",0 +4,36,35.3,"female",0 +3,29.23,39.5,"male",0 +2,7.82,26.7,"female",0 +1,13.06,23.2,"male",0 +1,28.76,36.8,"female",0 +1,32.52,43.9,"female",0 +5,32.78,41,"female",0 +6,24.82,36.3,"female",0 +5,39.92,41.5,"female",0 +5,20.67,34.3,"female",0 +5,39.09,42,"male",0 +1,40.24,41.3,"male",0 +2,45.57,55.8,"female",0 +2,46.99,48.5,"male",1 +2,47.27,54.4,"male",0 +1,24.31,25.2,"female",0 +2,29.85,42.6,"female",1 +2,43.56,48.3,"male",0 +1,16.63,29.5,"female",0 +1,9.7,23.1,"male",0 +1,47.38,51.7,"female",1 +1,38.17,39,"male",0 +2,12.04,28.9,"female",0 +7,7.75,19.8,"female",0 +1,41.96,45.8,"male",0 +1,44.13,40.6,"male",0 +4,36.9,46.7,"female",0 +4,30.77,36.5,"male",0 +4,31.78,39.5,"female",0 +6,48.76,44.9,"male",1 +4,6.63,22.1,"female",0 +4,24.92,36,"female",0 +2,44.42,46.7,"female",0 +3,13,29.8,"male",0 +2,39.17,44.6,"male",0 +1,13.02,26.5,"male",0 +2,21.34,40.9,"male",0 +1,24.89,35.3,"female",0 +3,9.75,25.1,"male",0 +3,21.06,32,"male",0 +2,29.47,44.3,"male",0