forked from markschelbergen/wind-profile-clustering
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpreprocess_data.py
85 lines (63 loc) · 3.5 KB
/
preprocess_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import numpy as np
from copy import copy
ref_vector_height = 100.
def express_profiles_wrt_ref_vector(data):
data['wind_direction'] = np.arctan2(data['wind_speed_north'], data['wind_speed_east']) # CCW w.r.t. East
# TODO: check if interpolation can be done without the loop
wind_speed_ref = np.zeros(data['n_samples'])
ref_dir = np.zeros(data['n_samples'])
for i in range(data['n_samples']):
wind_speed_ref[i] = np.interp(ref_vector_height, data['altitude'], data['wind_speed'][i, :])
wind_speed_east_ref = np.interp(ref_vector_height, data['altitude'], data['wind_speed_east'][i, :])
wind_speed_north_ref = np.interp(ref_vector_height, data['altitude'], data['wind_speed_north'][i, :])
ref_dir[i] = np.arctan2(wind_speed_north_ref, wind_speed_east_ref)
data['reference_vector_speed'] = wind_speed_ref
data['reference_vector_direction'] = ref_dir
# Express wind direction with respect to the reference vector.
data['wind_direction'] = data['wind_direction'] - ref_dir.reshape((-1, 1))
# Modify values such that angles are -pi < dir < pi.
data['wind_direction'] = np.where(data['wind_direction'] < -np.pi, data['wind_direction'] + 2*np.pi,
data['wind_direction'])
data['wind_direction'] = np.where(data['wind_direction'] > np.pi, data['wind_direction'] - 2*np.pi,
data['wind_direction'])
data['wind_speed_parallel'] = data['wind_speed_east']*np.cos(ref_dir).reshape((-1, 1)) + \
data['wind_speed_north']*np.sin(ref_dir).reshape((-1, 1))
data['wind_speed_perpendicular'] = -data['wind_speed_east']*np.sin(ref_dir).reshape((-1, 1)) + \
data['wind_speed_north']*np.cos(ref_dir).reshape((-1, 1))
return data
def reduce_wind_data(data, mask_keep):
n_samples_after_filter = np.sum(mask_keep)
print("{:.1f}% of data/{} samples remain after filtering.".format(n_samples_after_filter/data['n_samples'] * 100.,
n_samples_after_filter))
for k, val in data.items():
if k in ['altitude', 'n_samples', 'n_locs', 'years']:
continue
else:
data[k] = val[mask_keep]
data['n_samples'] = n_samples_after_filter
return data
def remove_lt_mean_wind_speed_value(data, min_mean_wind_speed):
sample_mean_wind_speed = np.mean(data['wind_speed'], axis=1)
mask_keep = sample_mean_wind_speed > min_mean_wind_speed
data = reduce_wind_data(data, mask_keep)
return data
def normalize_data(data):
norm_ref = np.percentile(data['wind_speed'], 90., axis=1).reshape((-1, 1))
training_data_prl = data['wind_speed_parallel']/norm_ref
training_data_prp = data['wind_speed_perpendicular']/norm_ref
data['training_data'] = np.concatenate((training_data_prl, training_data_prp), 1)
data['normalisation_value'] = norm_ref.reshape(-1)
return data
def preprocess_data(data, remove_low_wind_samples=True, return_copy=True):
if return_copy:
data = copy(data)
data['wind_speed'] = (data['wind_speed_east']**2 + data['wind_speed_north']**2)**.5
if remove_low_wind_samples:
data = remove_lt_mean_wind_speed_value(data, 5.)
data = express_profiles_wrt_ref_vector(data)
data = normalize_data(data)
return data
if __name__ == '__main__':
from read_data.dowa import read_data
wind_data = read_data({'i_lat': 110, 'i_lon': 55})
preprocess_data(wind_data)