Adjust the modelname in CMPB #20

draeger-lab · Dec 28, 2024 · 3b95aa8 · 3b95aa8
1 parent fa7d40c
commit 3b95aa8
Show file tree

Hide file tree

Showing 3 changed files with 43 additions and 12 deletions.
diff --git a/src/specimen/cmpb/workflow.py b/src/specimen/cmpb/workflow.py
@@ -8,18 +8,18 @@
 # requirements
 ################################################################################
 
-import logging
-import pandas as pd
 from datetime import date
+import logging
 import model_polisher as mp
+import os
+import pandas as pd
 from pathlib import Path
 from typing import Union
 
 import warnings
 
 from cobra import Reaction,Model
 from libsbml import readSBML
-import subprocess
 
 from refinegems.analysis import growth
 from refinegems.analysis.investigate import plot_rea_sbo_single
@@ -130,11 +130,16 @@ def between_analysis(model: Model, cfg:dict, step:str):
     if not configpath:
         config = save_cmpb_user_input()
     else:
-        config = validate_config(configpath, 'cmpb') 
+        config = validate_config(configpath, 'cmpb')
+
+    if not config['carveme']['modelname']:
+        modelname = 'i'+config['general']['organism']+config['general']['strainid']+config['general']['authorinitials']+str(date.today().year).removeprefix('20')
+    else:
+        modelname = config['carveme']['modelname']
 
     dir = config['general']['dir']
     if not config['general']['save_all_models']:
-        only_modelpath = Path(dir,'cmpb_out','model.xml') 
+        only_modelpath = Path(dir,'cmpb_out',f'{modelname}.xml') 
 
     # create directory structure
     # --------------------------
@@ -169,10 +174,18 @@ def between_analysis(model: Model, cfg:dict, step:str):
     #########
     if not config['input']['modelpath']:
         if config['carveme']['gram'] == "grampos" or config['carveme']['gram'] == "gramneg":
-            subprocess.run(["carve", config['general']['protein_fasta'], "--solver", "scip", '-u', config['carveme']['gram'], "-o", dir+r"\cmpb_out\models\Draft.xml"])
+            os.system(f"carve {config['general']['protein_fasta']} --solver scip -u {config['carveme']['gram']} -o {dir}\cmpb_out\models\{modelname}.xml")
+            # try:
+            #     subprocess.run(["carve", config['general']['protein_fasta'], "--solver", "scip", '-u', config['carveme']['gram'], "-o", dir+f"\cmpb_out\models\{modelname}.xml"], shell=True, check=True, text=True)
+            # except subprocess.CalledSystemError as e:
+            #     print(f"Error with the execution of CarveMe: {e}")
         else: 
-            subprocess.run(["carve", config['general']['protein_fasta'], "--solver", "scip", "-o", dir+r"\cmpb_out\models\Draft.xml"])
-        config['input']['modelpath'] = dir+r'\cmpb_out\models\Draft.xml'
+            os.system(f"carve {config['general']['protein_fasta']} --solver scip -o {dir}\cmpb_out\models\{modelname}.xml")
+            # try:
+            #     subprocess.run(["carve", config['general']['protein_fasta'], "--solver", "scip", "-o", dir+f"\cmpb_out\models\{modelname}.xml"], shell=True, check=True, text=True)
+            # except subprocess.CalledSystemError as e:
+            #     print(f"Error with the execution of CarveMe: {e}")
+        config['input']['modelpath'] = dir+fr'\cmpb_out\models\{modelname}.xml'
     current_modelpath = config['input']['modelpath']
 
     # CarveMe correction
@@ -443,7 +456,7 @@ def between_analysis(model: Model, cfg:dict, step:str):
         case _:
             solver = egcs.EGCSolver()
             logger.info(f'\tFound EGCs:\n')
-            logger.info(f'\t{solver.find_egcs(current_model,with_reacs=True,namespace=config['general']['namespace'])}') # @NOTE automatically uses c,p as compartments 
+            logger.info(f'\t{solver.find_egcs(current_model,with_reacs=True,namespace=config["general"]["namespace"])}') # @NOTE automatically uses c,p as compartments 
 
     # BOF
     # ---

diff --git a/src/specimen/data/config/cmpb_config.yaml b/src/specimen/data/config/cmpb_config.yaml
@@ -22,6 +22,9 @@ input:
 # ---------------
 general:
   dir: './'                  # Path/Name of a directory to save output to
+  authorinitials: USER       # Intials or Abbreviation of the author for naming the model
+  strainid: USER             # ID of the strain 
+  organism: USER             # Abbreviation for the organism
   colours: 'YlGn'            # Set the colour scheme for the plots
                              # should be a valid matplotlib continuous color palette
   namespace: BiGG            # Namespace to use for the model
@@ -53,8 +56,9 @@ carveme:
   # CarveMe requires protein_fasta under general to be set instead of modelpath
   # if CarveMe should be run, 
   # fill out the params below
+  modelname: USER      # Name of the model
   gram: USER           # Choose either grampos or gramneg, depending on the Gram-test
-                       # resilts of your organism
+                       # results of your organism
 
 # Polish a CarveMe model
 #    Only neccessary, if the model will or has been build with CarveMe
@@ -100,7 +104,7 @@ gapfilling:
     swissprot-dmnd: USER          # Path to the SwissProt DIAMOND database file.
     swissprot-mapping: USER       # Path to the SwissProt mapping file (against EC / BRENDA)
     check-NCBI: False             # Enable checking NCBI accession numbers for EC numbers - time costly.
-    sensitivity: 'more-sensitiv'  # Sensitivity option for the DIAMOND run.
+    sensitivity: 'more-sensitive' # Sensitivity option for the DIAMOND run.
     coverage: 90.0                # Coverage (parameter for DIAMOND).
     percentage identity: 90.0     # Percentage identity threshold value for accepting
                                   # matches found by DIAMOND as homologous.

diff --git a/src/specimen/util/set_up.py b/src/specimen/util/set_up.py
@@ -384,6 +384,10 @@ def dict_recursive_check(dictA:dict, key:str=None,
     else:
         dict_recursive_check(combined_config, key=None, pipeline=pipeline)
 
+    if pipeline=='cmpb':
+        if combined_config['carveme']['modelname'] is None and (combined_config['general']['authorinitials'] is None or combined_config['general']['organism'] is None or combined_config['general']['strainid'] is None):
+            raise ValueError(f'Either the model name or all of the following parameters must be stated: authorinitials, organism and strainID')
+
     return combined_config
 
 
@@ -435,6 +439,16 @@ def save_cmpb_user_input(configpath:Union[str,None]=None) -> dict:
 
     # output directory
     config['general']['dir'] = click.prompt('Enter your desired output directory path', type=click.Path())
+
+    # name for the model
+    modelname = click.prompt('Do you have a specific name for your model?', type=click.Choice(['y','n']), show_choices=True)
+    match modelname:
+        case 'y':
+            config['carveme']['modelname'] = click.prompt('Please enter your desired name for the model', type=str)
+        case 'n':
+            config['general']['authorinitials'] = click.prompt('An automated name based on the pattern iOrganismStrainAuthorYear will be created. \n Please enter your intials.', type=str)
+            config['general']['organism'] = click.prompt('Please enter an abbreviation for your organism.', type=str)
+            config['general']['strainid'] = click.prompt('Please enter the ID for your strain.', type=str)
 
     # colour 
     set_col = click.prompt('Do you want to use the default colour map YlGn for the visualisation?', type=click.Choice(['y','n']), show_choices=True)
@@ -471,7 +485,7 @@ def save_cmpb_user_input(configpath:Union[str,None]=None) -> dict:
 
     # some additional, sometimes required, sometimes optional files
     refseq = click.prompt('If you want to run a gap analysis with KEGG or have a CarveMe model, please enter the path to your refseq gff file', type=click.Path())
-    config['general']['refseq_organism_id'] = refseq
+    config['general']['gff'] = refseq
 
     kegg_org_id = click.prompt('If you want to run a gap analysis with KEGG, please enter the KEGG organism ID')
     config['general']['kegg_organism_id'] = kegg_org_id