From de1cf2ecf883d62c89826105e1fd7f4f17e1d5b8 Mon Sep 17 00:00:00 2001
From: christinehc <christine.chang@pnnl.gov>
Date: Thu, 9 Jan 2025 13:43:38 -0800
Subject: [PATCH] feat,build: update link-ml spec for sample/chem output

---
 build_script.py   | 19 +++++++++++-----
 srpAnalytics.yaml | 57 +++++++++++++++++++++++++----------------------
 2 files changed, 43 insertions(+), 33 deletions(-)

diff --git a/build_script.py b/build_script.py
index 6aab037..af6493a 100644
--- a/build_script.py
+++ b/build_script.py
@@ -10,6 +10,9 @@
 from tqdm import tqdm
 from sampleChemMapping.src.mapping import load_mapping_reference, get_mapping_file
 
+# DEFINE OUTPUT DIRECTORY
+OUTPUT_DIR = "."  # "/tmp"
+
 
 # TODO: Write this functions
 def fitCurveFiles(morpho_behavior_tuples):
@@ -69,7 +72,7 @@ def runSampMap(
     cclass: str = "",
     fses: str = "",
     descfile: str = "",
-    output_dir: str = "/tmp/",
+    output_dir: str = OUTPUT_DIR,
 ) -> list[str]:
     """
     run sample mapping
@@ -118,7 +121,7 @@ def runExposome(chem_id_file):
     cmd = f"Rscript exposome/exposome_summary_stats.R {chem_id_file}"
     tqdm.write(cmd)
     os.system(cmd)
-    return ["/tmp/exposomeGeneStats.csv"]
+    return [os.path.join(OUTPUT_DIR, "exposomeGeneStats.csv")]
 
 
 def runExpression(gex, chem, ginfo):
@@ -128,7 +131,11 @@ def runExpression(gex, chem, ginfo):
     cmd = f"Rscript zfExp/parseGexData.R {gex} {chem} {ginfo}"
     tqdm.write(cmd)
     os.system(cmd)
-    return ["/tmp/srpDEGPathways.csv", "/tmp/srpDEGStats.csv", "/tmp/allGeneEx.csv"]
+    return [
+        os.path.join(OUTPUT_DIR, "srpDEGPathways.csv"),
+        os.path.join(OUTPUT_DIR, "srpDEGStats.csv"),
+        os.path.join(OUTPUT_DIR, "allGeneEx.csv"),
+    ]
 
 
 def runSchemaCheck(dbfiles: list[Optional[str]] = []):
@@ -235,7 +242,7 @@ def main():
                 fdf = combineFiles(
                     df.loc[df.sample_type == st].loc[df.data_type == dt], dt
                 )
-                fname = f"/tmp/tmp_{st}_{dt}.csv"
+                fname = os.path.join(OUTPUT_DIR, f"tmp_{st}_{dt}.csv")
                 fdf.to_csv(fname, index=False)
                 if st == "chemical":
                     chem_files.append(fname)
@@ -300,10 +307,10 @@ def main():
     # Gene Expression Workflow
     # ------------------------
     if args.geneEx:
-        if not os.path.exists("/tmp/chemicals.csv"):
+        if not os.path.exists(os.path.join(OUTPUT_DIR, "chemicals.csv")):
             runSampMap(False, [], sid, cid, emap, cclass, fses, descfile)
 
-        res = runExpression(gex1, "/tmp/chemicals.csv", ginfo)
+        res = runExpression(gex1, os.path.join(OUTPUT_DIR, "chemicals.csv"), ginfo)
         runSchemaCheck(res)
 
 
diff --git a/srpAnalytics.yaml b/srpAnalytics.yaml
index 1f66b5b..c9fb365 100644
--- a/srpAnalytics.yaml
+++ b/srpAnalytics.yaml
@@ -180,39 +180,42 @@ classes:
   zebrafishSampBMDs:
     description: Benchmark dose measurements of sample extracts in zebrafish
     slots:
-      - Sample_ID
-      - Model
-      - BMD10
-      - BMD50
-      - Min_Dose
-      - Max_Dose
-      - AUC_Norm
-      - BMD_Analysis_Flag
-      - IncludeInPortal
-      - End_Point_Name
-      - Description
-      - endPointLink
-      - DataQC_Flag
+      - sample_id
+      - sample_name
+      - model
+      - bmd10
+      - bmd50
+      - min_dose
+      - max_dose
+      - auc_norm
+      - bmd_analysis_flag
+      - include_in_portal
+      - end_point_name
+      - description
+      - end_point_link
+      - data_qc_Flag
   zebrafishSampDoseResponse:
     description: Dose response datapoints of sample extracts in zebrafish
     slots:
-      - Sample_ID
-      - Dose
-      - Response
-      - CI_Lo
-      - CI_Hi
-      - IncludeInPortal
-      - End_Point_Name
-      - endPointLink
+      - sample_id
+      - dose
+      - response
+      - ci_lo
+      - ci_hi
+      - combined
+      - include_in_portal
+      - end_point_name
+      - end_point_link
   zebrafishSampXYCoords:
     description: XY Coordinates of curve fit data for sample extracts in zebrafish
     slots:
-      - Sample_ID
-      - X_vals
-      - Y_vals
-      - IncludeInPortal
-      - End_Point_Name
-      - endPointLink
+      - sample_id
+      - x_vals
+      - y_vals
+      - combined
+      - include_in_portal
+      - end_point_name
+      - end_point_link
   zebrafishChemBMDs:
     description: Benchmark dose measurements of chemicals in zebrafish
     slots: