From de1cf2ecf883d62c89826105e1fd7f4f17e1d5b8 Mon Sep 17 00:00:00 2001 From: christinehc Date: Thu, 9 Jan 2025 13:43:38 -0800 Subject: [PATCH] feat,build: update link-ml spec for sample/chem output --- build_script.py | 19 +++++++++++----- srpAnalytics.yaml | 57 +++++++++++++++++++++++++---------------------- 2 files changed, 43 insertions(+), 33 deletions(-) diff --git a/build_script.py b/build_script.py index 6aab037..af6493a 100644 --- a/build_script.py +++ b/build_script.py @@ -10,6 +10,9 @@ from tqdm import tqdm from sampleChemMapping.src.mapping import load_mapping_reference, get_mapping_file +# DEFINE OUTPUT DIRECTORY +OUTPUT_DIR = "." # "/tmp" + # TODO: Write this functions def fitCurveFiles(morpho_behavior_tuples): @@ -69,7 +72,7 @@ def runSampMap( cclass: str = "", fses: str = "", descfile: str = "", - output_dir: str = "/tmp/", + output_dir: str = OUTPUT_DIR, ) -> list[str]: """ run sample mapping @@ -118,7 +121,7 @@ def runExposome(chem_id_file): cmd = f"Rscript exposome/exposome_summary_stats.R {chem_id_file}" tqdm.write(cmd) os.system(cmd) - return ["/tmp/exposomeGeneStats.csv"] + return [os.path.join(OUTPUT_DIR, "exposomeGeneStats.csv")] def runExpression(gex, chem, ginfo): @@ -128,7 +131,11 @@ def runExpression(gex, chem, ginfo): cmd = f"Rscript zfExp/parseGexData.R {gex} {chem} {ginfo}" tqdm.write(cmd) os.system(cmd) - return ["/tmp/srpDEGPathways.csv", "/tmp/srpDEGStats.csv", "/tmp/allGeneEx.csv"] + return [ + os.path.join(OUTPUT_DIR, "srpDEGPathways.csv"), + os.path.join(OUTPUT_DIR, "srpDEGStats.csv"), + os.path.join(OUTPUT_DIR, "allGeneEx.csv"), + ] def runSchemaCheck(dbfiles: list[Optional[str]] = []): @@ -235,7 +242,7 @@ def main(): fdf = combineFiles( df.loc[df.sample_type == st].loc[df.data_type == dt], dt ) - fname = f"/tmp/tmp_{st}_{dt}.csv" + fname = os.path.join(OUTPUT_DIR, f"tmp_{st}_{dt}.csv") fdf.to_csv(fname, index=False) if st == "chemical": chem_files.append(fname) @@ -300,10 +307,10 @@ def main(): # Gene Expression Workflow # ------------------------ if args.geneEx: - if not os.path.exists("/tmp/chemicals.csv"): + if not os.path.exists(os.path.join(OUTPUT_DIR, "chemicals.csv")): runSampMap(False, [], sid, cid, emap, cclass, fses, descfile) - res = runExpression(gex1, "/tmp/chemicals.csv", ginfo) + res = runExpression(gex1, os.path.join(OUTPUT_DIR, "chemicals.csv"), ginfo) runSchemaCheck(res) diff --git a/srpAnalytics.yaml b/srpAnalytics.yaml index 1f66b5b..c9fb365 100644 --- a/srpAnalytics.yaml +++ b/srpAnalytics.yaml @@ -180,39 +180,42 @@ classes: zebrafishSampBMDs: description: Benchmark dose measurements of sample extracts in zebrafish slots: - - Sample_ID - - Model - - BMD10 - - BMD50 - - Min_Dose - - Max_Dose - - AUC_Norm - - BMD_Analysis_Flag - - IncludeInPortal - - End_Point_Name - - Description - - endPointLink - - DataQC_Flag + - sample_id + - sample_name + - model + - bmd10 + - bmd50 + - min_dose + - max_dose + - auc_norm + - bmd_analysis_flag + - include_in_portal + - end_point_name + - description + - end_point_link + - data_qc_Flag zebrafishSampDoseResponse: description: Dose response datapoints of sample extracts in zebrafish slots: - - Sample_ID - - Dose - - Response - - CI_Lo - - CI_Hi - - IncludeInPortal - - End_Point_Name - - endPointLink + - sample_id + - dose + - response + - ci_lo + - ci_hi + - combined + - include_in_portal + - end_point_name + - end_point_link zebrafishSampXYCoords: description: XY Coordinates of curve fit data for sample extracts in zebrafish slots: - - Sample_ID - - X_vals - - Y_vals - - IncludeInPortal - - End_Point_Name - - endPointLink + - sample_id + - x_vals + - y_vals + - combined + - include_in_portal + - end_point_name + - end_point_link zebrafishChemBMDs: description: Benchmark dose measurements of chemicals in zebrafish slots: