From a13e0add53977685af16f21c4899221a83ace827 Mon Sep 17 00:00:00 2001 From: danielquintao Date: Wed, 2 Oct 2024 13:05:33 -0300 Subject: [PATCH 1/3] cirro serve with --datadir option working --- cirrocumulus/api.py | 8 ++++++++ cirrocumulus/envir.py | 3 +++ cirrocumulus/serve.py | 10 ++++++++++ src/EditNewDatasetDialog.js | 21 ++++++++++++++++++++- 4 files changed, 41 insertions(+), 1 deletion(-) diff --git a/cirrocumulus/api.py b/cirrocumulus/api.py index c1a94062..2fe935bf 100644 --- a/cirrocumulus/api.py +++ b/cirrocumulus/api.py @@ -27,6 +27,7 @@ CIRRO_SPECIES, CIRRO_STATIC_DIR, CIRRO_UPLOAD, + CIRRO_SERVER_DATA_DIR, ) from .invalid_usage import InvalidUsage from .job_api import delete_job, submit_job @@ -167,6 +168,13 @@ def handle_server(): other=["Gallus gallus", "Macaca fascicularis", "Macaca mulatta", "Rattus norvegicus"], ) + # browse server-side data files to allow user to select the desired dataset instead of necessarily typing the URL + d["server_files"] = [] + if CIRRO_SERVER_DATA_DIR in os.environ and os.environ[CIRRO_SERVER_DATA_DIR]: + for root, dirs, files in os.walk(os.environ[CIRRO_SERVER_DATA_DIR]): + for file in dirs + files: + d["server_files"] += [os.path.join(root, file)] + # from https://www.ebi.ac.uk/ols/ontologies/efo/terms?iri=http%3A%2F%2Fwww.ebi.ac.uk%2Fefo%2FEFO_0010183&viewMode=All&siblings=false d["library"] = load_json(CIRRO_LIBRARY) or [ "10x 3' v1", diff --git a/cirrocumulus/envir.py b/cirrocumulus/envir.py index 79b479b3..698a453c 100644 --- a/cirrocumulus/envir.py +++ b/cirrocumulus/envir.py @@ -38,6 +38,9 @@ # comma separated list of paths to allow all logged in users to download from CIRRO_STATIC_DIR = "CIRRO_STATIC_DIR" +# directory in server whose files can be available to user for direct selection +CIRRO_SERVER_DATA_DIR = "CIRRO_SERVER_DATA_DIR" + SERVER_CAPABILITY_RENAME_CATEGORIES = "SERVER_CAPABILITY_RENAME_CATEGORIES" SERVER_CAPABILITY_JOBS = "SERVER_CAPABILITY_JOBS" SERVER_CAPABILITY_FEATURE_SETS = "SERVER_CAPABILITY_FEATURE_SETS" diff --git a/cirrocumulus/serve.py b/cirrocumulus/serve.py index f1290ed9..e34f7187 100644 --- a/cirrocumulus/serve.py +++ b/cirrocumulus/serve.py @@ -16,6 +16,7 @@ CIRRO_JOB_TYPE, CIRRO_SERVE, CIRRO_UPLOAD, + CIRRO_SERVER_DATA_DIR, ) from cirrocumulus.launch import create_app from cirrocumulus.util import add_dataset_providers, create_instance, get_fs @@ -100,6 +101,10 @@ def create_parser(description=False): "--results", help="URL to save user computed results (e.g. differential expression) to" ) parser.add_argument("--ontology", help="Path to ontology in OBO format for annotation") + parser.add_argument( + "--datadir", + help='Path to directory in the server where the user can select its dataset from with no need to type the full URL in the "New Dataset" window.', + ) return parser @@ -127,6 +132,11 @@ def main(argsv): os.environ[CIRRO_JOB_RESULTS] = args.results get_fs(os.environ[CIRRO_JOB_RESULTS]).makedirs(os.environ[CIRRO_JOB_RESULTS], exist_ok=True) + if args.datadir is not None: + if not os.path.isdir(args.datadir): + raise ValueError("--datadir is not a valid path to a directory") + os.environ[CIRRO_SERVER_DATA_DIR] = args.datadir + run_args = [ "gunicorn", "-b", diff --git a/src/EditNewDatasetDialog.js b/src/EditNewDatasetDialog.js index e9947cb5..90185537 100644 --- a/src/EditNewDatasetDialog.js +++ b/src/EditNewDatasetDialog.js @@ -81,8 +81,11 @@ function EditNewDatasetDialog(props) { const otherSpecies = serverInfo.species.other; const libraryOptions = serverInfo.library; + const dataFiles = serverInfo.server_files; + const canUpload = serverInfo.upload; const isNew = dataset == null; + const mustBrowse = dataFiles.length > 0; let saveEnabled = !loading && name.trim() !== ''; const isAuthEnabled = serverInfo.auth.clientId !== ''; @@ -340,7 +343,7 @@ function EditNewDatasetDialog(props) { + Date: Thu, 3 Oct 2024 10:18:26 -0300 Subject: [PATCH 2/3] filter browseable files/dirs to h5ad, 10x h5, Xenium, loom, Seurat, TileDB, or zarr formats --- cirrocumulus/api.py | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/cirrocumulus/api.py b/cirrocumulus/api.py index 2fe935bf..17e0f9b3 100644 --- a/cirrocumulus/api.py +++ b/cirrocumulus/api.py @@ -168,12 +168,42 @@ def handle_server(): other=["Gallus gallus", "Macaca fascicularis", "Macaca mulatta", "Rattus norvegicus"], ) - # browse server-side data files to allow user to select the desired dataset instead of necessarily typing the URL + # browse server-side data files to allow user to select the desired dataset instead of necessarily typing the URL: d["server_files"] = [] if CIRRO_SERVER_DATA_DIR in os.environ and os.environ[CIRRO_SERVER_DATA_DIR]: + VALID_EXTENSIONS = [ + ".h5ad", + ".h5", + ".zip", + ".tar", + ".tar.gz", + ".loom", + ".h5seurat", + ".rds", + ".zarr", + ] for root, dirs, files in os.walk(os.environ[CIRRO_SERVER_DATA_DIR]): for file in dirs + files: - d["server_files"] += [os.path.join(root, file)] + if any([file.endswith(ext) for ext in VALID_EXTENSIONS]): + d["server_files"] += [os.path.join(root, file)] + # if this is a directory, e.g. .zarr "files", prevent further recursion: + if file in dirs: + dirs.remove(file) + elif ( + file in dirs + ): # check if `file` is a MEX formatted directory (we must look at the subfiles) + # https://www.10xgenomics.com/support/software/xenium-panel-designer/latest/tutorials/create-single-cell-reference + count_tsvgz = 0 # MEX directoris have two subfiles with .tsv.gz extension + count_mtxgz = 0 # MEX directories also have one subfile with .mtx.gz extension + for subfile in os.listdir(os.path.join(root, file)): + if subfile.endswith(".tsv.gz"): + count_tsvgz += 1 + elif subfile.endswith(".mtx.gz"): + count_mtxgz += 1 + if count_tsvgz == 2 and count_mtxgz == 1: + d["server_files"] += [os.path.join(root, file)] + dirs.remove(file) + break # from https://www.ebi.ac.uk/ols/ontologies/efo/terms?iri=http%3A%2F%2Fwww.ebi.ac.uk%2Fefo%2FEFO_0010183&viewMode=All&siblings=false d["library"] = load_json(CIRRO_LIBRARY) or [ From 49600832b65a1bbff640c8da70c2e0a866b306d6 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 3 Oct 2024 13:59:10 +0000 Subject: [PATCH 3/3] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- cirrocumulus/api.py | 2 +- cirrocumulus/serve.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cirrocumulus/api.py b/cirrocumulus/api.py index 17e0f9b3..43da9d7e 100644 --- a/cirrocumulus/api.py +++ b/cirrocumulus/api.py @@ -24,10 +24,10 @@ CIRRO_LIBRARY, CIRRO_MIXPANEL, CIRRO_SERVE, + CIRRO_SERVER_DATA_DIR, CIRRO_SPECIES, CIRRO_STATIC_DIR, CIRRO_UPLOAD, - CIRRO_SERVER_DATA_DIR, ) from .invalid_usage import InvalidUsage from .job_api import delete_job, submit_job diff --git a/cirrocumulus/serve.py b/cirrocumulus/serve.py index e34f7187..4f449e14 100644 --- a/cirrocumulus/serve.py +++ b/cirrocumulus/serve.py @@ -15,8 +15,8 @@ CIRRO_JOB_RESULTS, CIRRO_JOB_TYPE, CIRRO_SERVE, - CIRRO_UPLOAD, CIRRO_SERVER_DATA_DIR, + CIRRO_UPLOAD, ) from cirrocumulus.launch import create_app from cirrocumulus.util import add_dataset_providers, create_instance, get_fs