Skip to content

Commit

Permalink
feat,build: update link-ml spec for sample/chem output
Browse files Browse the repository at this point in the history
  • Loading branch information
christinehc committed Jan 9, 2025
1 parent 746aaf7 commit de1cf2e
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 33 deletions.
19 changes: 13 additions & 6 deletions build_script.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@
from tqdm import tqdm
from sampleChemMapping.src.mapping import load_mapping_reference, get_mapping_file

# DEFINE OUTPUT DIRECTORY
OUTPUT_DIR = "." # "/tmp"


# TODO: Write this functions
def fitCurveFiles(morpho_behavior_tuples):
Expand Down Expand Up @@ -69,7 +72,7 @@ def runSampMap(
cclass: str = "",
fses: str = "",
descfile: str = "",
output_dir: str = "/tmp/",
output_dir: str = OUTPUT_DIR,
) -> list[str]:
"""
run sample mapping
Expand Down Expand Up @@ -118,7 +121,7 @@ def runExposome(chem_id_file):
cmd = f"Rscript exposome/exposome_summary_stats.R {chem_id_file}"
tqdm.write(cmd)
os.system(cmd)
return ["/tmp/exposomeGeneStats.csv"]
return [os.path.join(OUTPUT_DIR, "exposomeGeneStats.csv")]


def runExpression(gex, chem, ginfo):
Expand All @@ -128,7 +131,11 @@ def runExpression(gex, chem, ginfo):
cmd = f"Rscript zfExp/parseGexData.R {gex} {chem} {ginfo}"
tqdm.write(cmd)
os.system(cmd)
return ["/tmp/srpDEGPathways.csv", "/tmp/srpDEGStats.csv", "/tmp/allGeneEx.csv"]
return [
os.path.join(OUTPUT_DIR, "srpDEGPathways.csv"),
os.path.join(OUTPUT_DIR, "srpDEGStats.csv"),
os.path.join(OUTPUT_DIR, "allGeneEx.csv"),
]


def runSchemaCheck(dbfiles: list[Optional[str]] = []):
Expand Down Expand Up @@ -235,7 +242,7 @@ def main():
fdf = combineFiles(
df.loc[df.sample_type == st].loc[df.data_type == dt], dt
)
fname = f"/tmp/tmp_{st}_{dt}.csv"
fname = os.path.join(OUTPUT_DIR, f"tmp_{st}_{dt}.csv")
fdf.to_csv(fname, index=False)
if st == "chemical":
chem_files.append(fname)
Expand Down Expand Up @@ -300,10 +307,10 @@ def main():
# Gene Expression Workflow
# ------------------------
if args.geneEx:
if not os.path.exists("/tmp/chemicals.csv"):
if not os.path.exists(os.path.join(OUTPUT_DIR, "chemicals.csv")):
runSampMap(False, [], sid, cid, emap, cclass, fses, descfile)

res = runExpression(gex1, "/tmp/chemicals.csv", ginfo)
res = runExpression(gex1, os.path.join(OUTPUT_DIR, "chemicals.csv"), ginfo)
runSchemaCheck(res)


Expand Down
57 changes: 30 additions & 27 deletions srpAnalytics.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -180,39 +180,42 @@ classes:
zebrafishSampBMDs:
description: Benchmark dose measurements of sample extracts in zebrafish
slots:
- Sample_ID
- Model
- BMD10
- BMD50
- Min_Dose
- Max_Dose
- AUC_Norm
- BMD_Analysis_Flag
- IncludeInPortal
- End_Point_Name
- Description
- endPointLink
- DataQC_Flag
- sample_id
- sample_name
- model
- bmd10
- bmd50
- min_dose
- max_dose
- auc_norm
- bmd_analysis_flag
- include_in_portal
- end_point_name
- description
- end_point_link
- data_qc_Flag
zebrafishSampDoseResponse:
description: Dose response datapoints of sample extracts in zebrafish
slots:
- Sample_ID
- Dose
- Response
- CI_Lo
- CI_Hi
- IncludeInPortal
- End_Point_Name
- endPointLink
- sample_id
- dose
- response
- ci_lo
- ci_hi
- combined
- include_in_portal
- end_point_name
- end_point_link
zebrafishSampXYCoords:
description: XY Coordinates of curve fit data for sample extracts in zebrafish
slots:
- Sample_ID
- X_vals
- Y_vals
- IncludeInPortal
- End_Point_Name
- endPointLink
- sample_id
- x_vals
- y_vals
- combined
- include_in_portal
- end_point_name
- end_point_link
zebrafishChemBMDs:
description: Benchmark dose measurements of chemicals in zebrafish
slots:
Expand Down

0 comments on commit de1cf2e

Please sign in to comment.