UMCUGenetics · ellendejong · Jan 22, 2024 · Jan 22, 2024 · Jan 22, 2024 · Jan 22, 2024
diff --git a/assets/annotate_hpo.py b/assets/annotate_hpo.py
@@ -0,0 +1,30 @@
+from pandas import read_csv
+from pyhpo import Ontology
+from pyhpo.set import HPOSet
+from csv import QUOTE_NONNUMERIC
+
+# TODO: change hardcoded filepath of input to variable.
+df = read_csv(
+    "/Users/ejong19/repos/vip-2/assets/test_data/list_pats_V2_13-03-2023.csv",
+    delimiter=";", quotechar='"'
+)
+
+_ = Ontology()
+
+unknown = {}
+for i, row in df.iterrows():
+    diagnoses = row["Diagnosis"].rstrip(";").split("; ")
+    hpo_terms = ""
+    for diagnosis in diagnoses:
+        try:
+            hpo_term = Ontology.get_hpo_object(diagnosis.strip(" "))
+        except RuntimeError:
+            hpo_term = f"Unknown HPO for '{diagnosis}'"
+            if not unknown.get(diagnosis):
+                unknown[diagnosis] = 0
+            unknown[diagnosis] += 1
+        hpo_terms = f"{hpo_terms}; {str(hpo_term)}".lstrip("; ")
+    df.at[i,'hpo'] = hpo_terms
+
+# TODO: change hardcoded filepath of output to variable.
+df.to_csv("/Users/ejong19/repos/vip-2/assets/test_data/annotated_list_patients_06062024_full.csv", index=False, quoting=QUOTE_NONNUMERIC, sep=";")
diff --git a/assets/requirements.txt b/assets/requirements.txt
@@ -0,0 +1,2 @@
+pandas==2.2.2
+pyhpo==3.3.0
diff --git a/config/nxf.config b/config/nxf.config
@@ -16,7 +16,7 @@ env {
 
 process {
   memory = '4GB'
-  time = '4h'
+  time = {4.h * task.attempt}
 }
 
 profiles {
@@ -30,9 +30,16 @@ profiles {
     // optimize the number of cpus so that all chunks can be processed in parallel
     process.cpus = 6
     process.executor = 'slurm'
-    process.errorStrategy = { task.exitStatus in [9, 143, 137, 104, 134, 139, 247] ? 'retry' : 'finish' }
+    process.errorStrategy = 'retry'
     process.maxErrors = '-1'
-    process.maxRetries = 3
+    process.maxRetries = 1
+    process.queue = 'cpu'
+    executor {
+        queueSize = 1000
+        pollInterval = '1min'
+        queueStatInterval = '5min'
+        submitRatelimit = '10sec'
+    }
   }
 }
 

diff --git a/config/nxf_vcf.config b/config/nxf_vcf.config
@@ -18,18 +18,18 @@ env {
 process {
   withLabel: 'vcf_validate' {
     memory = '100MB'
-    time = '30m'
+    time = {30.m * task.attempt}
   }
 
   withLabel: 'vcf_split' {
     memory = '100MB'
-    time = '30m'
+    time = {30.m * task.attempt}
   }
 
   withLabel:'vcf_annotate' {
     cpus = 4
-    memory = '8GB'
-    time = '4h'
+    memory = {16.GB * task.attempt}
+    time = {4.h * task.attempt}
   }
 
   withLabel:'vcf_classify|vcf_classify_samples|vcf_inheritance' {