-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbayes_intro.qmd
280 lines (216 loc) · 10.5 KB
/
bayes_intro.qmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
---
title: "Intro to Bayesian Inference in R"
execute:
cache: true
---
## Example: Flipping a Coin
Suppose we flip a coin a set number of times and count up the number of times a head appears. In this first example, we **don't** have any strong belief about the probability that a head will appear. In other words, any probability between 0 and 1 is equally likely to occur. Based on this prior knowledge (or lack thereof), we can visualize the prior distribution of potential probabilities drawing samples from a uniform distribution between 0 and 1.
```{r}
hist(runif(10000,0,1), breaks = 30, freq = FALSE, main = "Prior Distribution",
xlab = "Probability of Heads", ylab = "Density", xlim = c(0, 1))
```
Using this prior knowledge and the binomial likelihood function for our data we can generate samples from the posterior distribution.
```{r}
coin_model_unif <- function(n_flips){
# Simulate coin flips (0 for tails, 1 for heads)
set.seed(5)
coin_flips <- sample(c(0, 1), n_flips, replace = TRUE)
# Likelihood function: Binomial likelihood
likelihood <- function(p) {
sum(dbinom(coin_flips, size = 1, prob = p, log = TRUE))
}
# Prior distribution: Uniform prior
prior <- function(p) {
dunif(p, 0, 1, log = TRUE)
}
# Posterior function: Proportional to the product of prior and likelihood
posterior <- function(p) {
prior(p) + likelihood(p)
}
# Metropolis-Hastings MCMC algorithm
metropolis_hastings <- function() {
samples <- numeric(10000)
current_sample <- runif(1) # Initialize at random value between 0 and 1
for (i in 1:10000) {
# Sample proposed sample from a truncated normal distribution
proposed_sample <- truncnorm::rtruncnorm(1, a = 0, b = 1,
mean = current_sample, sd = 0.1)
# Calculate acceptance probability
acceptance_prob <- exp(posterior(proposed_sample) -
posterior(current_sample))
# Accept or reject proposal
if (runif(1) < acceptance_prob) {
current_sample <- proposed_sample
}
samples[i] <- current_sample
}
return(samples)
}
# Run Metropolis-Hastings MCMC
posterior_samples <- metropolis_hastings()
# Plot posterior distribution
hist(posterior_samples, breaks = 30, freq = FALSE,
main = "Posterior Distribution",
xlab = "Probability of Heads", ylab = "Density", xlim = c(0, 1))
observed_frequency <- sum(coin_flips) / n_flips
abline(v = observed_frequency, col = "red", lwd = 2)
legend("topright", legend = "Observed Frequency of Heads", col = "red", lty = 1,
lwd = 2)
}
coin_model_unif(10)
coin_model_unif(100)
```
When we don't have any prior information, Bayesian inference resembles classical inference. The above estimates can be obtained from maximum likelihood.
If we go into this problem with some prior knowledge about the probability of flipping heads, this knowledge will influence our posterior distribution. If we are pretty confident that the probability is 50%, but we want to leave some room for the possibility that the coin is biased, we might specify a prior distribution that looks like this (beta distribution centered at 0.5).
```{r}
hist(rbeta(10000,20,20), breaks = 30, freq = FALSE, main = "Prior Distribution",
xlab = "Probability of Heads", ylab = "Density", xlim = c(0, 1))
```
Now we can repeat the steps from earlier to generate samples from the posterior distribution.
```{r}
coin_model_beta <- function(n_flips){
# Simulate coin flips (0 for tails, 1 for heads)
set.seed(5)
coin_flips <- sample(c(0, 1), n_flips, replace = TRUE)
# Likelihood function: Binomial likelihood
likelihood <- function(p) {
sum(dbinom(coin_flips, size = 1, prob = p, log = TRUE))
}
# Prior distribution: Beta prior centered on 0.5
prior <- function(p) {
dbeta(p, 20, 20, log = TRUE)
}
# Posterior function: Proportional to the product of prior and likelihood
posterior <- function(p) {
prior(p) + likelihood(p)
}
# Metropolis-Hastings MCMC algorithm
metropolis_hastings <- function() {
samples <- numeric(10000)
current_sample <- runif(1) # Initialize at random value between 0 and 1
for (i in 1:10000) {
# Sample proposed sample from a truncated normal distribution
proposed_sample <- truncnorm::rtruncnorm(1, a = 0, b = 1,
mean = current_sample, sd = 0.1)
# Calculate acceptance probability
acceptance_prob <- exp(posterior(proposed_sample) -
posterior(current_sample))
# Accept or reject proposal
if (runif(1) < acceptance_prob) {
current_sample <- proposed_sample
}
samples[i] <- current_sample
}
return(samples)
}
# Run Metropolis-Hastings MCMC
posterior_samples <- metropolis_hastings()
# Plot posterior distribution
hist(posterior_samples, breaks = 30, freq = FALSE,
main = "Posterior Distribution",
xlab = "Probability of Heads", ylab = "Density", xlim = c(0, 1))
observed_frequency <- sum(coin_flips) / n_flips
abline(v = observed_frequency, col = "red", lwd = 2)
legend("topright", legend = "Observed Frequency of Heads", col = "red", lty = 1,
lwd = 2)
}
coin_model_beta(10)
coin_model_beta(100)
```
We can see that the prior has a lot of influence on the posterior distribution when we only flip the coin 10 times, but is less influential when we have more data and the observed frequency is closer to 50%.
## Modelling salamander larvae abundance
```{r}
#| output: false
library(tidyverse)
library(here)
library(brms)
```
For this example, we are going to model the relative abundance of blue-spotted salamander larvae in wetlands. We are interested in the effects of canopy cover, day of year, year, and study site. We we use a Poisson distribution with a log-link to model counts of larvae in wetlands and use an offset term to account for differences in the number of samples per wetland and the size of the wetland relative to maximum inundation. We will start with a frequentist approach using the **glm()** function in R.
```{r}
vp_amphibs <- readRDS(here("data/vp_amphibs.rds")) %>%
mutate(year = factor(year)) %>%
rename(site = sites)
vp_AL <- vp_amphibs %>%
filter(species == "BS") %>%
rowid_to_column()
mod_freq <- glm(count~
scale(jday)+
scale(canopy)+
year+
site+
offset(log(samples/proparea)),
data=vp_AL,
family="poisson")
summary(mod_freq)
```
Most of these parameter estimates seem reasonable, but the effect size and standard error for the site GB8 variable are quite large. If we look at the total counts of larvae for each site we can see why.
```{r}
vp_AL %>%
group_by(site) %>%
summarise(total_count = sum(count)) %>%
ggplot(aes(x = site, y = total_count)) +
geom_bar(stat = "identity") +
theme_bw()
```
This is what is known as complete separation, when the outcome variable is seemingly perfectly predicted by the predictor. Site GB8 has only two wetlands and blue-spotted salamander larvae were not found in either. Complete separation isn't always an issue, but it can lead to errors in other parameter estimates and make model comparison difficult.
Using a Bayesian approach, we can make use of weak priors to keep our make sure our parameter estimates are realistic.
```{r}
prior <- brms::set_prior("normal(0,1)", class = "b")
# Plot normal distribution
tibble(x = seq(-4, 4, length.out = 1000)) %>%
mutate(y = dnorm(x,0,1)) %>%
ggplot(aes(x = x, y = y)) +
geom_line() + # Plot the normal distribution curve
labs(title = "Normal Distribution",
x = "x",
y = "Density") +
theme_bw()
```
**What does this prior mean?** The GB8 variable takes a value of 1 if the site is GB8 and 0 if it is not. Based on the prior distribution for GB8 above, we think the effect has a \~95% chance of being between -2 and 2. Since the expected count of salamander larvae is modelled with a log-link, this range of effects corresponds to a change in expected count between a factor of exp(-2) = 0.1 and a factor of exp(2) = 7.4, which seem reasonable for this system.
If we have a lot of data for GB8, the prior won't have a huge impact on our posterior distribution. If we don't have a lot of data, then the prior will make sure we don't explore unreasonable values for the posterior.
We will now use the brm function to specify our model. Note that the syntax is similar to glm(), but we now include some extra information.
```{r}
mod_bayes <- brms::brm(count~
scale(jday)+
scale(canopy)+
year+
site+
offset(log(samples/proparea)),
data=vp_AL,
family="poisson",
prior = prior, ### our normal(0,1) prior
iter = 2000, ### number of samples to draw from the posterior
warmup = 1000, ### number of samples to discard from the start
chains = 2, ### number of independent samples to draw
cores = 2) ### number of processing cores to use on computer
```
Now we can compare parameter estimates between the frequentist and Bayesian approaches
```{r}
# Extract parameter estimates
glm_coefs <- broom::tidy(mod_freq) %>%
mutate(type = "freq") %>%
select(term, estimate, std.error, type)
brms_coefs <- as.data.frame(fixef(mod_bayes)) %>%
rename(estimate = Estimate,
std.error = Est.Error) %>%
mutate(term = glm_coefs$term,
type = "bayes") %>%
select(term, estimate, std.error, type)
rbind(glm_coefs, brms_coefs) %>%
mutate(estimate = round(estimate,2),
std.error = round(std.error,2)) %>%
pivot_wider(names_from = "type", values_from = c("estimate","std.error")) %>%
select(term, estimate_freq, std.error_freq,
estimate_bayes, std.error_bayes) %>%
kableExtra::kbl(caption = "Comparison of Model Parameters",
col.names = c("Term","Estimate", "Std. Error","Estimate", "Std. Error")) %>%
kableExtra::add_header_above(c("","Frequentist"=2,"Bayesian"=2))
```
The parameter estimates and standard deviations are similar between the two approaches, but the parameter estimate and standard deviation for GB8 are smaller in magnitude.
Lastly we'll explore some diagnostics:
```{r}
summary(mod_bayes)
```
```{r}
plot(mod_bayes, variable = c("b_scalecanopy", "b_siteGB8"))
```