Skip to content

Commit

Permalink
Update the notes
Browse files Browse the repository at this point in the history
  • Loading branch information
bradduthie committed Sep 15, 2024
1 parent f32fc79 commit c9c3f08
Show file tree
Hide file tree
Showing 6 changed files with 44 additions and 123 deletions.
4 changes: 2 additions & 2 deletions index.html
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

<meta name="author" content="Brad Duthie" />

<meta name="date" content="2023-09-13" />
<meta name="date" content="2023-09-18" />

<title>On the equivalence of t-tests, anovas, and linear models</title>

Expand Down Expand Up @@ -359,7 +359,7 @@
<h1 class="title toc-ignore">On the equivalence of t-tests, anovas, and
linear models</h1>
<h4 class="author">Brad Duthie</h4>
<h4 class="date">13 September 2023</h4>
<h4 class="date">18 September 2023</h4>

</div>

Expand Down
4 changes: 2 additions & 2 deletions notes.Rmd
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
---
title: "On the equivalence of t-tests, anovas, and linear models"
author: "Brad Duthie"
date: "13 September 2023"
date: "18 September 2023"
output:
word_document: default
pdf_document: default
html_document: default
pdf_document: default
linkcolor: blue
---

Expand Down
Binary file modified notes.docx
Binary file not shown.
Binary file modified notes.pdf
Binary file not shown.
131 changes: 12 additions & 119 deletions scratch.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

plant_data <- read.csv(file = "two_discrete_x_values.csv");

species_1 <- plant_data$height[plant_data$species_ID == "species_1"];
Expand All @@ -7,60 +6,39 @@ hist(species_1, breaks = 10, col = "blue", xlim = c(0, 300),
main = "", xlab = "Plant height", cex.lab = 1.25,
ylim = c(0, 15));
hist(species_2, breaks = 10, col = "red", add = TRUE)
legend(x = 0, y = 15, fill = c("red", "blue"),
legend = c("species_2", "species_1"));

#legend(x = 0, y = 15, fill = c("red", "blue"),
# legend = c("species_2", "species_1"));


n1 <- length(species_1);
n2 <- length(species_2);
var_y1 <- var(species_1);
var_y2 <- var(species_2);
s_p <- sqrt(((n1 - 1)*var_y1 + (n2 - 1)*var_y2)/(n1 + n2 - 2));
SEybar <- s_p * sqrt((n1 + n2) / (n1*n2));
tval <- (mean(species_1) - mean(species_2)) / SEybar;


#################
#################
################# Using the t-test function in R and lm
#################
#################
t.test(species_1, species_2, var.equal = TRUE);
t.test(species_1, species_2, var.equal = FALSE);

# Use a linear model instead to get the same numbers
lmod1 <- lm(height ~ 1 + species_ID, data = plant_data);
lmod1 <- lm(height ~ species_ID, data = plant_data);
summary(lmod1);









#################
#################
################# How R sees the linear model and t-test
#################
#################
is_species_2 <- as.numeric(plant_data$species_ID == "species_2");
plant_height <- plant_data$height;
lm_table_eg <- data.frame(plant_height, is_species_2);
















#################
#################
################# Thinking about this in a plot
#################
#################
plot(x = lm_table_eg$is_species_2, y = lm_table_eg$plant_height,
ylim = c(0, 275), pch = 20, cex.axis = 1.25, cex.lab = 1.25,
ylab = "Plant height", xlab = "Is species 2: Yes (1) or no (0)",
Expand All @@ -75,106 +53,36 @@ points(x = 1, y = summary(lmod1)$coefficients[1,1] +
summary(lmod1)$coefficients[2,1], pch = 18, col = "orange", cex = 3);















#################
#################
################# Analysis of variance with two categories
#################
#################
aov_1 <- aov(height ~ species_ID, data = plant_data);
summary(aov_1);



















#################
#################
################# What happens when we have three categories? ANOVA
#################
#################
plant_data <- read.csv(file = "three_discrete_x_values.csv");
aov_2 <- aov(height ~ species_ID, data = plant_data);
summary(aov_2);
















#################
#################
################# Three categories with a linear model
#################
#################
lmod2 <- lm(height ~ 1 + species_ID, data = plant_data);
summary(lmod2);

# Can we check out the mean values?
tapply(X = plant_data$height, INDEX = plant_data$species_ID,
FUN = mean);





















#################
#################
################# What's really going on with 3 groups?
#################
#################
the_intercept <- rep(x = 1, length = length(plant_data$height));
is_species_2 <- as.numeric(plant_data$species_ID == "species_2");
is_species_3 <- as.numeric(plant_data$species_ID == "species_3");
Expand All @@ -188,24 +96,9 @@ head(lm_table_eg);
















#################
#################
################# Even more elegant, using matrices!
#################
#################
Y <- as.matrix(lm_table_eg[,1]);
X <- as.matrix(lm_table_eg[,2:4]);

Expand Down
28 changes: 28 additions & 0 deletions slides.Rmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
---
title: "Mendelian genetics"
output:
beamer_presentation:
theme: "default"
colortheme: "default"
fonttheme: "default"
ioslides_presentation: default
slidy_presentation: default
header-includes:
- \usepackage{hyperref}
- \usepackage{caption}
- \usepackage[normalem]{ulem}
- \definecolor{links}{HTML}{2A1B81}
- \hypersetup{colorlinks,linkcolor=,urlcolor=links}
colorlinks: true
linkcolor: blue
urlcolor: blue
---

```{r, echo = FALSE}
library(knitr);
opts_chunk$set(echo = FALSE);
```


## How is variation inherited from parent to offspring?

0 comments on commit c9c3f08

Please sign in to comment.