-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrolling_var_and_mean.m
194 lines (175 loc) · 8.51 KB
/
rolling_var_and_mean.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
% """
% Rich Hakim 2020
% Most of this code uses a version of Welford's algorithm and
% was adapted from some Python code I found here:
% http://www.taylortree.com/2010/11/running-variance.html
% and here:
% http://www.taylortree.com/2010/06/running-simple-moving-average-sma.html
% and here:
% http://subluminal.wordpress.com/2008/07/31/running-standard-deviations/
%
% The purpose of this script is to compute very fast rolling variances
% and means in a stepwise way. This allows for online processing of
% incoming data. It uses a recursive algorithm to calculate online
% updates to the current std and mean.
% The only challenge is setting up a rolling buffer of values, which must
% be done outside of this class. See the demo script for an example.
% Note: I haven't figured out how to do efficient setting of large array
% values within objects in Matlab, yet. If you know how to do it, please
% get in touch or send me a pull request!
% RUNNING (accumulating) vs. ROLLING (windowed):
% In order to use it as a running (accumulating from the first index)
% average or variance, just set win_size to be inf and vals_old to []. This
% method can be used to calculate normal variances and means of arrays that
% are too big to fit into memory!
% ARBITRARY DIMENSIONS:
% This script allows for an arbitrary number of dimensions to be used, but
% the rolling dimension must be dim 1.
classdef rolling_var_and_mean
properties
varSum_old = [];
mean_old = [];
win_size = 3;
dim_sizes_slices = [1];
% vals_rolling = nan([win_size, dim_sizes_slices]);
vals_rolling = nan([3, 1]);
otherdims = repmat({':'},1,numel(1)-1);
end
methods
% SETUP
function obj = set_key_properties(obj, dim_sizes , win_size)
% """
% This function sets the key properties needed to starting using the step function and other functions
% dim_sizes = sizes of dimensions in input data. The first dimension doesnt really matter since it is the rolling dimension, and it can be set to 1 or anything.
% win_size = this is the size of the rolling window
% """
obj.dim_sizes_slices = dim_sizes(2:end);
% obj.vals_rolling = nan([win_size , obj.dim_sizes_slices]);
obj.otherdims = repmat({':'},1,numel(obj.dim_sizes_slices));
obj.win_size = win_size;
end
% MAIN STEP FUNCTION:
function [obj , mean_new , var_new] = step(obj, idx_new, vals_new , vals_old)
% idx_rolling_new = mod(idx_new-1 , obj.win_size)+1;
% vals_old = obj.vals_rolling(idx_rolling_new , obj.otherdims{:});
% obj.vals_rolling(idx_rolling_new , obj.otherdims{:}) = vals;
% vals_new = obj.vals_rolling(idx_rolling_new , obj.otherdims{:});
mean_new = obj.update_mean(idx_new, vals_new, vals_old, obj.win_size, obj.mean_old);
varSum_new = obj.update_varSum(idx_new, vals_new, vals_old, obj.win_size, obj.varSum_old);
var_new = obj.varSum_to_var(idx_new, obj.win_size, mean_new, varSum_new, obj.dim_sizes_slices);
obj.mean_old = mean_new;
obj.varSum_old = varSum_new;
end
% Code for the Running or Windowed Variance:
function varSum_new = update_varSum(obj, idx_new, vals_new, vals_old, win_size, varSum_old)
% """
% Returns the power sum average based on the blog post from
% Subliminal Messages. Use the power sum average to help derive the running
% variance.
% sources: http://subluminal.wordpress.com/2008/07/31/running-standard-deviations/
%
% Keyword arguments:
% idx_new -- current index or location of the value in the vals
% vals -- list or tuple of data to average
% win_size -- number of values to include in average
% varSum_old -- previous update_varSum (n - 1) of the vals.
% """
if win_size < 1
warning ValueError("win_size must be 1 or greater")
end
if idx_new < 1
idx_new = 1;
end
if isempty(varSum_old)
if idx_new > 1
warning ValueError("varSum_old of NaN when idx_new > 1")
end
varSum_old = 0.0;
end
vals_new = double(vals_new);
if idx_new <= win_size
% CODE MEAT
varSum_new = varSum_old + (vals_new .* vals_new - varSum_old) ./ (idx_new);
else
vals_old = double(vals_old);
% CODE MEAT
varSum_new = varSum_old + (((vals_new .* vals_new) - (vals_old .* vals_old)) ./ win_size);
end
end
% Code for converting varSum to var
function var_new = varSum_to_var(obj, idx_new, win_size, mean_current, varSum, dim_sizes_slices)
% """
% Returns the running variance based on a given time win_size.
% sources: http://subluminal.wordpress.com/2008/07/31/running-standard-deviations/
%
% Keyword arguments:
% idx_new -- current index or location of the value in the vals
% vals -- list or tuple of data to average
% mean_current -- current average of the given win_size
% varSum -- current varSum of the given win_size
% """
if win_size < 1
warning ValueError("win_size must be 1 or greater")
end
if idx_new <= 1
var_new = zeros([1 , dim_sizes_slices]);
else
if isempty(mean_current)
warning ValueError("mean_current of None invalid when idx_new > 0")
end
if isempty(varSum)
warning ValueError("varSum of None invalid when idx_new > 0")
end
if idx_new >= win_size
windowsize = win_size;
else
windowsize = idx_new;
end
% CODE MEAT
var_new = (varSum .* windowsize - windowsize .* mean_current .* mean_current) ./ (windowsize-1);
end
end
%%
% Code for the Running Mean:
function mean_new = running_mean(obj, idx_new, vals, mean_old)
% """
% Returns the cumulative or unweighted simple moving average.
% Avoids sum of vals per call.
%
% Keyword arguments:
% idx_new -- current index or location of the value in the vals
% vals -- list or tuple of data to average
% mean_old -- previous average (n - 1) of the vals.
% """
if idx_new <= 1
mean_new = vals;
else
% CODE MEAT
mean_new = mean_old + ((vals - mean_old) / (idx_new));
end
end
% Code for the Windowed mean:
function mean_new = update_mean(obj, idx_new, vals_new, vals_old, win_size, mean_old)
% """
% Returns the running simple moving average - avoids sum of vals per call.
%
% Keyword arguments:
% idx_new -- current index or location of the value in the vals
% vals -- list or tuple of data to average
% win_size -- number of values to include in average
% mean_old -- previous simple moving average (n - 1) of the vals
% """
if win_size < 1
warning ValueError("win_size must be 1 or greater")
end
if idx_new <= 1
mean_new = vals_new;
elseif idx_new <= win_size
mean_new = obj.running_mean(idx_new, vals_new, mean_old);
else
% CODE MEAT
mean_new = mean_old + ((vals_new - vals_old) ./ double(win_size));
end
end
end
end