This repository has been archived by the owner on Oct 8, 2018. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsdml_exp_com.py
87 lines (57 loc) · 2.47 KB
/
sdml_exp_com.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
def warn(*args, **kwargs):
pass
import warnings
warnings.warn = warn
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.model_selection import StratifiedKFold
from norms import OrigN, OrigS, SpectralNorm, WbysqDist, BinarNorm
import os
import pandas as pd
import numpy as np
from MetricTransform import MetricTransform
from reskit.core import DataTransformer, MatrixTransformer, Pipeliner
from sklearn.feature_selection import VarianceThreshold
from reskit.features import degrees, bag_of_edges, closeness_centrality, betweenness_centrality
from convert import convert
from load_data import load_ucla
from norms import OrigN, OrigS, SpectralNorm, WbysqDist, BinarNorm
path_ucla = '../../../Connectomics/Autism/Data/'
matrix_ucla, target_ucla, xyz = load_ucla(path_ucla)
X = {}
X['data'] = matrix_ucla
X['dist'] = xyz
y = target_ucla
param_SDML = {'model': 'SDML',
'mode': 'dist_exp',
'alpha': 1e-5,
'use_cov': False,
'sparsity': 0.01,
'balance': 0.5
}
normalizers1 = [('wbysqdist', WbysqDist())]
normalizers2 = [('spectral', SpectralNorm())]
featurizers = [('degrees', MatrixTransformer(degrees))]
selection = [
('origS', OrigS())]
pipe_sdml = Pipeline(steps=[('metric', MetricTransform(**param_SDML)), ('clf', SVC(kernel='precomputed'))])
metric_trans_learn = [('SDML', pipe_sdml)]
steps = [('normalization1', normalizers1),
('normalization2', normalizers2),
('features', featurizers),
('selection', selection),
('machine_learning', metric_trans_learn)]
param_grid = {'SDML': {'metric__use_cov': [False],
'metric__sparsity': [0.01, 0.15, 0.25,0.5],
'metric__balance': [0.1, 0.3, 0.6, 0.8],
'metric__alpha': [0.1, 1e-4, 1e-7, 1,10, 100],
'clf__C':[1e-7, 1e-3, 0.1, 1, 10, 100]}
}
grid_cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=0)
eval_cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=1)
pipeliner = Pipeliner(steps=steps, grid_cv=grid_cv,
eval_cv=eval_cv, param_grid=param_grid)
table = pipeliner.get_results(X, y, scoring=['roc_auc'], caching_steps = ['normalization1','normalization2','features','selection'], logs_file = 'log/ucla_results_SDML_exp_comp.log')
print('finished')
table.to_csv('ucla_results_SDML_exp_comp', sep = '\t')