-
Notifications
You must be signed in to change notification settings - Fork 84
/
Copy pathmap_rawout_to_organized_s3.py
125 lines (113 loc) · 5.41 KB
/
map_rawout_to_organized_s3.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# map_rawout_to_organized_s3.py
#
# Author: Daniel Clark, 2015
'''
This module organizes outputs from the C-PAC pipeline into the ABIDE
pre-processed file structure
'''
# Main function
def main():
'''
This function runs the main routine
'''
# Import packages
from CPAC.AWS import fetch_creds
import os
import yaml
# Init variables
creds_path = '/home/ubuntu/secure-creds/aws-keys/fcp-indi-keys2.csv'
bucket = fetch_creds.return_bucket('fcp-indi', creds_path)
bucket_prefix = 'data/Projects/ABIDE_Initiative/Outputs/cpac/raw_outputs_rerun'
sub_fp = '/home/ubuntu/abide/preprocessing/yamls/subs_list.yml'
sub_list = yaml.load(open(sub_fp, 'r'))
example_subid = '0050002_session_1'
# Populate list of files to link to
#src_list = []
#src_list = gather_files_tosort(src_list, bucket, bucket_prefix)
# Derivatives dictionary {name: (no_files_per_strategy, filt_str)}
strat_dict = {'nofilt_noglobal' : ['pipeline_abide_rerun', 'global0'],
'nofilt_global' : ['pipeline_abide_rerun', 'global1'],
'filt_noglobal' : ['pipeline_abide_rerun__freq-filter', 'global0'],
'filt_global' : ['pipeline_abide_rerun__freq-filter', 'global1']}
derivs_dict = {'alff' : (1, 'alff_to_standard_smooth', 'nii.gz'),
'degree_binarize' : (1, 'centrality_outputs_smoothed', 'degree_centrality_binarize'),
'degree_weighted' : (1, 'centrality_outputs_smoothed', 'degree_centrality_weighted'),
'dual_regression' : (1, 'dr_tempreg_maps_zstat_stack_to_standard_smooth', 'nii.gz'),
'eigenvector_binarize' : (1, 'centrality_outputs_smoothed', 'eigenvector_centrality_binarize'),
'eigenvector_weighted' : (1, 'centrality_outputs_smoothed', 'eigenvector_centrality_weighted'),
'falff' : (1, 'falff_to_standard_smooth', 'nii.gz'),
'func_mask' : (1, 'functional_brain_mask_to_standard', 'nii.gz'),
'func_mean' : (1, 'mean_functional_in_mni', 'nii.gz'),
'func_preproc' : (1, 'functional_mni', '.nii.gz'),
'lfcd' : (1, 'centrality_outputs_smoothed', 'lfcd_binarize'),
'reho' : (1, 'reho_to_standard_smooth', 'nii.gz'),
'rois_aal' : (4, 'roi_timeseries', 'aal'),
'rois_cc200' : (4, 'roi_timeseries', 'CC200'),
'rois_cc400' : (4, 'roi_timeseries', 'CC400'),
'rois_dosenbach160' : (4, 'roi_timeseries', 'rois_3mm'),
'rois_ez' : (4, 'roi_timeseries', 'ez'),
'rois_ho' : (4, 'roi_timeseries', 'ho_'),
'rois_tt' : (4, 'roi_timeseries', 'tt'),
'vmhc' : (1, 'vmhc_fisher_zstd_zstat_map', 'nii.gz')}
# Create error and output dictionaries
out_dict = {k : {kk : [] for kk in derivs_dict.keys()} for k in strat_dict.keys()}
err_dict = {k : {kk : [] for kk in derivs_dict.keys()} for k in strat_dict.keys()}
# Iterate through strategies
for strat, filts in strat_dict.items():
print 'building %s...' % strat
filt = filts[0]
g_sig = filts[1]
strat_prefix = os.path.join(bucket_prefix, filt, example_subid)
# Iterate through derivatives
for deriv, v in derivs_dict.items():
num_files = v[0]
deriv_folder = v[1]
name_filter = v[2]
deriv_prefix = os.path.join(strat_prefix, deriv_folder)
keys_list = []
for key in bucket.list(prefix=deriv_prefix):
k_name = str(key.name)
# If global signal regression was used or didnt need to be
if (g_sig in k_name or 'global' not in k_name) and \
name_filter in k_name:
keys_list.append(k_name)
# Grab only wanted results from keys
if len(keys_list) == num_files:
out_dict[strat][deriv] = [k for k in keys_list if '.nii.gz' in k or '.1D' in k][0]
else:
err_dict[strat][deriv] = keys_list
print 'error in number of files!'
# Go through dictionary and build paths
mapping_dict = {}
s = 1
# For each subject
for sub in sub_list:
subid = sub.split('_')[-1] + '_session_1'
print 'populating %s...%d' % (subid, s)
# For each strategy
for strat, deriv_dict in out_dict.items():
strat_prefix = os.path.join(bucket_prefix, strat)
# For each derivative, generate src and dst filepaths
d = 0
for deriv, filepath in deriv_dict.items():
deriv_prefix = os.path.join(strat_prefix, deriv, sub + '_' + deriv)
# Check extensions
if filepath.endswith('.nii.gz'):
dst_path = deriv_prefix + '.nii.gz'
elif filepath.endswith('.1D'):
dst_path = deriv_prefix + '.1D'
else:
raise Exception('Bad extension type')
# Get sub id from filepath
src_path = filepath.replace(example_subid, subid)
mapping_dict[src_path] = dst_path
d += 1
if d != 20:
print d
raw_input('not enough dervivs')
s += 1
# Return
return out_dict, err_dict, mapping_dict
# Run main by default
if __name__ == '__main__':
main()