Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/fix hpc #1

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions assets/annotate_hpo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from pandas import read_csv
from pyhpo import Ontology
from pyhpo.set import HPOSet
from csv import QUOTE_NONNUMERIC

# TODO: change hardcoded filepath of input to variable.
df = read_csv(
"/Users/ejong19/repos/vip-2/assets/test_data/list_pats_V2_13-03-2023.csv",
delimiter=";", quotechar='"'
)

_ = Ontology()

unknown = {}
for i, row in df.iterrows():
diagnoses = row["Diagnosis"].rstrip(";").split("; ")
hpo_terms = ""
for diagnosis in diagnoses:
try:
hpo_term = Ontology.get_hpo_object(diagnosis.strip(" "))
except RuntimeError:
hpo_term = f"Unknown HPO for '{diagnosis}'"
if not unknown.get(diagnosis):
unknown[diagnosis] = 0
unknown[diagnosis] += 1
hpo_terms = f"{hpo_terms}; {str(hpo_term)}".lstrip("; ")
df.at[i,'hpo'] = hpo_terms

# TODO: change hardcoded filepath of output to variable.
df.to_csv("/Users/ejong19/repos/vip-2/assets/test_data/annotated_list_patients_06062024_full.csv", index=False, quoting=QUOTE_NONNUMERIC, sep=";")
2 changes: 2 additions & 0 deletions assets/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
pandas==2.2.2
pyhpo==3.3.0
13 changes: 10 additions & 3 deletions config/nxf.config
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ env {

process {
memory = '4GB'
time = '4h'
time = {4.h * task.attempt}
}

profiles {
Expand All @@ -30,9 +30,16 @@ profiles {
// optimize the number of cpus so that all chunks can be processed in parallel
process.cpus = 6
process.executor = 'slurm'
process.errorStrategy = { task.exitStatus in [9, 143, 137, 104, 134, 139, 247] ? 'retry' : 'finish' }
process.errorStrategy = 'retry'
process.maxErrors = '-1'
process.maxRetries = 3
process.maxRetries = 1
process.queue = 'cpu'
executor {
queueSize = 1000
pollInterval = '1min'
queueStatInterval = '5min'
submitRatelimit = '10sec'
}
}
}

Expand Down
8 changes: 4 additions & 4 deletions config/nxf_vcf.config
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,18 @@ env {
process {
withLabel: 'vcf_validate' {
memory = '100MB'
time = '30m'
time = {30.m * task.attempt}
}

withLabel: 'vcf_split' {
memory = '100MB'
time = '30m'
time = {30.m * task.attempt}
}

withLabel:'vcf_annotate' {
cpus = 4
memory = '8GB'
time = '4h'
memory = {16.GB * task.attempt}
time = {4.h * task.attempt}
}

withLabel:'vcf_classify|vcf_classify_samples|vcf_inheritance' {
Expand Down
Loading