-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
123 lines (103 loc) · 3.48 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import argparse
import os
from bioblend.galaxy import GalaxyInstance
from bioblend.galaxy.libraries import LibraryClient
from bioblend.galaxy.histories import HistoryClient
from bioblend.galaxy.folders import FoldersClient
from datetime import datetime
from time import sleep
parser = argparse.ArgumentParser()
parser.add_argument("-a", "--apikey")
parser.add_argument("-e", "--endpoint")
parser.add_argument("-p", "--port")
parser.add_argument("-s", "--sourcedir")
args = parser.parse_args()
host = "127.0.0.1" if not args.endpoint else args.endpoint
port = "8080"
addr = host + ":{}".format(port) if port else ""
apik = args.apikey
gi = GalaxyInstance(addr, apik)
lc = LibraryClient(gi)
fc = FoldersClient(gi)
hc = HistoryClient(gi)
library_name = "GDC Files"
library_description = "A library of files acquired from the NCI Genomic Data Commons (GDC)"
libs=lc.get_libraries()
lib = {}
if libs and isinstance(libs, dict):
libs = [libs]
if libs:
for _lib in libs:
if "name" in _lib and _lib["name"] == library_name:
lib = _lib
else:
lib = lc.create_library(library_name, library_description)
print("Library {} created:\n{}".format(library_name, lib))
if not lib:
print("ERROR: no library")
exit()
print("lib:{}".format(lib))
now_string = datetime.today().strftime("%Y-%m-%d @ %H:%M:%S")
# create folder to live in
folder = fc.create_folder(parent_folder_id=lib["root_folder_id"], name=now_string)
print(folder)
sleep(1)
# NOTE: NOT RECURSIVE -- only files in base dir
def add_files_in_path_to_lib(lib_id, folder_id, path):
file_list = os.listdir(path)
result_list = []
# FORMAT: upload_file_from_server(library_id, server_dir, folder_id=None, file_type='auto', dbkey='?', link_data_only=None, roles='', preserve_dirs=False, tag_using_filenames=False, tags=None)
for _f in file_list:
_f = path + "/" + _f
_r = lc.upload_from_galaxy_filesystem(
lib_id,
_f,
folder_id=folder_id,
link_data_only="link_to_files",
tag_using_filenames=True
)
result_list.extend(_r)
return result_list
# return lc.upload_file_from_server(
# library_id = lib_id,
# server_dir = path,
# folder_id = folder_id,
# link_data_only = "link_to_files",
# tag_using_filenames=True
# )
files = add_files_in_path_to_lib(lib["id"], folder["id"], args.sourcedir)
if isinstance(files, dict):
files = [files]
print(files)
print("Data check on {} files:".format(len(files)))
print("waiting on datasets to become available...")
ready = 0
old_ready = -1
while ready < len(files):
if ready != old_ready:
print("ready files: {}".format(ready))
old_ready = ready
ready = 0
for f in fc.show_folder(folder["id"], contents=True)["folder_contents"]:
if f["state"] == "ok":
ready = ready + 1
sleep(5)
print("...")
print("All {} datasets ready!".format(ready))
# add files to history
history = hc.create_history("{}".format(now_string))
print(history)
# create dataset collection
collection_description = {
'collection_type': 'list',
'element_identifiers': [],
'name': 'manifest collection'
}
for f in files:
element_identifier = {
'id': f["id"],
'name': f["name"],
'src': 'ldda'}
collection_description["element_identifiers"].append(element_identifier)
print(collection_description)
hc.create_dataset_collection(history["id"], collection_description)