-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcollate.py
54 lines (40 loc) · 1.77 KB
/
collate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
def reduce(input_files, output_files):
import pandas as pd
import numpy as np
from file_names import get_names
import warnings
from pathlib import Path
import os
script_directory = os.path.dirname(os.path.abspath(__file__)) #Directory of script
result_directory = os.path.join(script_directory, output_files) #Directory of where to extract to
os.mkdir(result_directory)
THIS_FOLDER = Path(__file__).parent.resolve() #Finds the path to the folder that the script is in. This means that the script can still run on PythonAnywhere.
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)
file_names = get_names(input_files)
datetimes = []
stations = []
count = 0
data_frame = pd.DataFrame()
for file_name in file_names:
data = pd.read_csv(str(THIS_FOLDER) + "/" + input_files +"/"+ file_name)
outputs = data.iloc[:,6].to_list()[1:-1]
these_stations = data.iloc[:,5].to_list()[1:-1]
for station in these_stations:
if station not in stations:
data_frame[station] = np.zeros(len(datetimes))
stations.append(station)
datetime = (data.iloc[:,4].to_list()[2])
data_frame.loc[datetime] = np.zeros(len(stations))
index = 0
for station2 in these_stations:
data_frame.loc[datetime,station2]=outputs[index]
index = index + 1
datetimes.append(datetime)
count = count + 1
if(count > 300):
count = 0
datetime_name = ("v").join((datetime.split(" ")[0]).split("/"))
data_frame.to_csv(str(THIS_FOLDER)+ "/"+ output_files+"/"+datetime_name+".csv")
data_frame = pd.DataFrame()
datetimes = []
stations = []