-
Notifications
You must be signed in to change notification settings - Fork 13
/
Copy pathgen_bin.py
81 lines (69 loc) · 2.35 KB
/
gen_bin.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import os
from os import walk
import csv
import numpy as np
import sys
if sys.version_info[0] == 2:
import cPickle as pickle
else:
import pickle
import struct
import pandas
from utils import readCSV, normalize
path = 'data'
CUT_LEFT = 0.2963
CUT_RIGHT = 0.3069
def gen_path_dict(path):
path_dict = {}
for dirpath, dirnames, filenames in walk(path):
dirnames.sort()
filenames.sort()
if len(filenames) >= 90:
cls = int(dirpath.split('/')[-2])
no = int(dirpath.split('/')[-1])
path_dict[cls] = path_dict.get(cls, {})
for fn in filenames:
if fn.endswith('.spa'):
path_dict[cls].setdefault(no, []).append(os.path.join(dirpath, fn))
with open('path_dict.pkl', 'wb') as f:
pickle.dump(path_dict, f, protocol=pickle.HIGHEST_PROTOCOL)
def get_path_dict(path):
with open(path, 'rb') as f:
return pickle.load(f)
def read_spa(path):
with open(path, 'r') as f:
spa_array = np.loadtxt(f, skiprows=1)
tmp = spa_array[spa_array[:, 0] >= CUT_LEFT]
spa_intensity = tmp[tmp[:, 0] <= CUT_RIGHT][:, -1]
return normalize(spa_intensity)
def gen_label_dict():
folder_length = [6, 10, 5, 6]
dct = {}
for i in range(len(folder_length)):
dct[str(i+1)] = [str(j) for j in range(1, folder_length[i] + 1)]
label_dict = {}
for i in range(1, len(folder_length)+1):
label_dict[i] = label_dict.get(i, {})
for j in dct[str(i)]:
label_dict[i][int(j)] = i
return label_dict
if __name__ == '__main__':
folder_length = [6, 10, 5, 6]
# gen_path_dict(path)
path_dict = get_path_dict('path_dict.pkl')
label_dict = gen_label_dict()
elements = [l[1:] for l in readCSV('data/elements.csv')[1:]]
with open('data/original.bin', 'wb') as f, open('data/original.csv', 'wb') as fcsv:
cw = csv.writer(fcsv)
cw.writerow(['No', 'Class', 'SubNO', 'Category'] + 'Mn Si Ni Cr V Mo Ti Cu Fe'.split())
for cls, subfolders in path_dict.items():
for subno, filenames in subfolders.items():
no = sum(folder_length[:cls - 1]) + subno
label = label_dict[cls][subno]
csv_list = [no, cls, subno, label] + elements[no - 1]
for fn in filenames:
cw.writerow(csv_list)
spa_array = read_spa(fn)
f.write(struct.pack('<f', label))
np.array(elements[no - 1], np.float32).tofile(f)
spa_array.astype('float32').tofile(f)