-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathdrift-diffusion-sim-data-v2.R
227 lines (180 loc) · 8.93 KB
/
drift-diffusion-sim-data-v2.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
# Simulate choice RT data using drift diffusion model (quick and dirty method)
# written K. Garner, Feb 2019, free to share and use
# based on matlab code by David Sewell
rm(list = ls())
# code structure:
# 1. install packages (if necessary), load packages
# 2. define diffusion model functions - one for single accumulator model, one for two independent accumulators
# 3. set intital parameters and run
# 4. plot output RTs as histograms
#--------------------------------------------------------------------------
# install packages (if necessary), load packages
# install required packages
# install.packages("ggplot2")
library(ggplot2)
library(tidyverse)
#--------------------------------------------------------------------------
# DEFINE FUNCTIONS
#--------------------------------------------------------------------------
generate_rts_w_single_drift <- function(drift_rate, boundary, non_decision_time, start_point, noise_sdev, time_step, nTrials, t0){
# use Euler's method to generate sample paths of the diffusion process
# each trial of the simulation is constructed as a loop that will generate simulated
# sample paths (i.e. accumulations of evidence). This loop is then applied nTrials times
get.trials <- function(start_point, t0, boundary, non_decision_time, drift_rate, time_step, noise_sdev){
# the following loop controls the evidence accumulation process. At each time_step,
# the current evidence total (T_evidene) is compared againt the value of the two absorbing
# boundaries (boundary and 0). Sample steps occur until the evidence total crosses
# one of the boundaries. Each sample of info changes the evidence total by
# an amount that is fixed across time steps (drift_rate * time_steps),
# plus a scaled random draw from a normal distribution with mean 0 and sd noise_sdev
# (a value of 0.1 or 1 are often used). Once
# evidence total crosses a boundary, the time steps and the boundary crossed are
# recorded, the non-decision time is then added to the time steps to make the predicted RT
# initialise evidence
T_evidence = start_point * boundary
t = t0
repeat{
T_evidence = T_evidence + drift_rate*time_step + rnorm(1, mean=0, sd = noise_sdev)*sqrt(time_step)
t = t + time_step
if (T_evidence > boundary | T_evidence < 0){
break
}
}
# a threshold has been reached (hence termination of while loop, so assign response and get RT)
if ( T_evidence > boundary )
resp = 1
else if ( T_evidence < 0 ){
resp = 0
}
RT = t + non_decision_time
out = data.frame(resp = resp, RT = RT) # assign the variables to a list
out # output the list
}
# initialise a vector to collect responses
start_point = rep(start_point, times = nTrials)
tmp = lapply(start_point, get.trials, t0 = t0, boundary = boundary,
non_decision_time = non_decision_time, drift_rate = drift_rate,
time_step = time_step, noise_sdev = noise_sdev) # apply get.trials function nTrials times
data = do.call(rbind, tmp) # make into a nice data frame
data
}
# GENERATE DATA FROM SINGLE DRIFT MODEL
#--------------------------------------------------------------------------
# first, set the values for the core parameters in the diffusion model (with single drift implementation):
drift_rate = 0.28 # drift rate
boundary = .13 # boundary separation
non_decision_time = 0.12 # non-decision time
start_point = 0.45 # start point of evidence (0 = lower boundary, 1 = upper boundary, 0.5 = halfway between boundaries)
# next, set the standard deviation of within trial noise, traditionally set to 0.1
# (Ratcliff, 1978), but others set noise_sdev = 1. Either is fine, as this parameter “scales” the
# other parameters in the model (i.e., diffusion model parameters are only defined on a
# ratio scale, and so you can get the same predictions from different combinations of
# parameter values---if one parameter were to be mulitplied by a constant, the same
# predictions would obtain if all other parameters were multiplied (or divided) by the same value).
noise_sdev = 0.1 # diffusion coefficient
time_step = .001 # Time steps in seconds
# set the number of trials
nTrials = 10000
t0 = 0 # time zero
#
# Start the clock! (if you want to time, uncomment line below and beneath 'Stop the clock')
# ptm <- proc.time()
predicted.data = generate_rts_w_single_drift(drift_rate = drift_rate, boundary = boundary,
non_decision_time = non_decision_time,
start_point = start_point, noise_sdev = noise_sdev,
time_step = time_step, nTrials = nTrials, t0=t0)
# Stop the clock
# proc.time() - ptm
# _____________________________________________________________________________________________________
# load the previously made distribution
load("observed_data.RData")
# PLOT OUTPUT DATAs AS HISTOGRAMS
plot.observed_vs_predicted_RTs <- function(observed, predicted){
par(mfrow=c(2,1), mar=c(3, 3, 1, 1))
get.plot <- function(observed, predicted, response){
tmp = with(predicted, density(RT[resp==response]))
with(observed, hist(RT[resp==response], prob=TRUE, main = paste("resp = ", response, sep=""),
xlab="RT", xlim=c(0,1), ylim=c(0,max(tmp$y)), col=scales::alpha('skyblue',.5), breaks = 100))
with(predicted, lines(density(RT[resp==response]), col="#F24D29", lwd=2))
}
get.plot(observed = observed, predicted = predicted, response = 0)
get.plot(observed = observed, predicted = predicted, response = 1)
}
plot.observed_vs_predicted_RTs(observed.data, predicted.data)
# CALCULATE CHI-SQUARED
#-------------------------------------------------------------------
#source functions needed to calculate chi-squared
source("chi-square-funs.R")
#get response time quantile information for observed data
output1 = data2RTQ(observed.data)
binedge_c = output1[[1]]
binedge_e = output1[[2]]
bindata_c = output1[[3]]
bindata_e = output1[[4]]
#get response time quantile information for data from model
output2 = preds2RTQ(predicted.data, binedge_c, binedge_e)
bindata_pc = output2[[1]]
bindata_pe = output2[[2]]
#
#calculate chi-squared
x2 = chisqFit(data = c(bindata_c,bindata_e),
preds = c(bindata_pc,bindata_pe) / nTrials,
N = nTrials)
x2
# IMPLEMENT OPTIMISATION ALGORITHM TO FIND THE PARAMETERS THAT MINIMISE CHI-SQAURE
#-------------------------------------------------------------------
#Create wrapper function that takes parameters and observed data (and other settings)
#as input arguments and returns chi-square
wrapper4optim = function(parms, data, time_step=.01, nTrials=10000, t0=0){
#unpack paramaters
drift_rate = parms[1]
boundary = parms[2]
non_decision_time = parms[3]
start_point = parms[4]
noise_sdev = 0.1
#Generate predicted data under parameters in question
predicted.data = generate_rts_w_single_drift(drift_rate = drift_rate, boundary = boundary,
non_decision_time = non_decision_time,
start_point = start_point, noise_sdev = noise_sdev,
time_step = time_step, nTrials = nTrials, t0=t0)
#get response time quantile information for observed data
output1 = data2RTQ(observed.data)
binedge_c = output1[[1]]
binedge_e = output1[[2]]
bindata_c = output1[[3]]
bindata_e = output1[[4]]
#get response time quantile information for data from model
output2 = preds2RTQ(predicted.data, binedge_c, binedge_e)
bindata_pc = output2[[1]]
bindata_pe = output2[[2]]
#calculate chi-square
x2 = chisqFit(data = c(bindata_c,bindata_e),
preds = c(bindata_pc,bindata_pe) / nTrials,
N = 10000)
observed.data$source = "observed"
predicted.data$source = "predicted"
plot=bind_rows(observed.data,predicted.data) %>%
ggplot() +
geom_density(aes(x=RT,group=source,colour=source),alpha=0.1) +
facet_grid(.~resp)
print(plot)
print(c(x2,parms))
return(x2)
}
#Specify intiial values for the estimated parameters
init_drift_rate = 0.28 # drift rate
init_boundary = .13 # boundary separation
init_non_decision_time = 0.12 # non-decision time
init_start_point = .45 # starting point for evidence
#Run optimisation algorithm on wrapper function
optim(par = c(init_drift_rate, #starting values for each parameter
init_boundary,
init_non_decision_time,
init_start_point),
fn = wrapper4optim, #function to be minimised
data = observed.data, #data to which predictions are compared
lower = c(-1,0,0.05,0), #lower bound for each parameter
upper = c( 1,1,1, 1), #upper bound for each parameter
control = list(trace=6))
#data generating values:
#drift rate = .28, boundary = 0.13, non decision time = 0.12, start point = 0.45