Skip to content

Commit

Permalink
Adjust the modelname in CMPB #20
Browse files Browse the repository at this point in the history
  • Loading branch information
niinina committed Dec 28, 2024
1 parent fa7d40c commit 3b95aa8
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 12 deletions.
31 changes: 22 additions & 9 deletions src/specimen/cmpb/workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,18 @@
# requirements
################################################################################

import logging
import pandas as pd
from datetime import date
import logging
import model_polisher as mp
import os
import pandas as pd
from pathlib import Path
from typing import Union

import warnings

from cobra import Reaction,Model
from libsbml import readSBML
import subprocess

from refinegems.analysis import growth
from refinegems.analysis.investigate import plot_rea_sbo_single
Expand Down Expand Up @@ -130,11 +130,16 @@ def between_analysis(model: Model, cfg:dict, step:str):
if not configpath:
config = save_cmpb_user_input()
else:
config = validate_config(configpath, 'cmpb')
config = validate_config(configpath, 'cmpb')

if not config['carveme']['modelname']:
modelname = 'i'+config['general']['organism']+config['general']['strainid']+config['general']['authorinitials']+str(date.today().year).removeprefix('20')
else:
modelname = config['carveme']['modelname']

dir = config['general']['dir']
if not config['general']['save_all_models']:
only_modelpath = Path(dir,'cmpb_out','model.xml')
only_modelpath = Path(dir,'cmpb_out',f'{modelname}.xml')

# create directory structure
# --------------------------
Expand Down Expand Up @@ -169,10 +174,18 @@ def between_analysis(model: Model, cfg:dict, step:str):
#########
if not config['input']['modelpath']:
if config['carveme']['gram'] == "grampos" or config['carveme']['gram'] == "gramneg":
subprocess.run(["carve", config['general']['protein_fasta'], "--solver", "scip", '-u', config['carveme']['gram'], "-o", dir+r"\cmpb_out\models\Draft.xml"])
os.system(f"carve {config['general']['protein_fasta']} --solver scip -u {config['carveme']['gram']} -o {dir}\cmpb_out\models\{modelname}.xml")
# try:
# subprocess.run(["carve", config['general']['protein_fasta'], "--solver", "scip", '-u', config['carveme']['gram'], "-o", dir+f"\cmpb_out\models\{modelname}.xml"], shell=True, check=True, text=True)
# except subprocess.CalledSystemError as e:
# print(f"Error with the execution of CarveMe: {e}")
else:
subprocess.run(["carve", config['general']['protein_fasta'], "--solver", "scip", "-o", dir+r"\cmpb_out\models\Draft.xml"])
config['input']['modelpath'] = dir+r'\cmpb_out\models\Draft.xml'
os.system(f"carve {config['general']['protein_fasta']} --solver scip -o {dir}\cmpb_out\models\{modelname}.xml")
# try:
# subprocess.run(["carve", config['general']['protein_fasta'], "--solver", "scip", "-o", dir+f"\cmpb_out\models\{modelname}.xml"], shell=True, check=True, text=True)
# except subprocess.CalledSystemError as e:
# print(f"Error with the execution of CarveMe: {e}")
config['input']['modelpath'] = dir+fr'\cmpb_out\models\{modelname}.xml'
current_modelpath = config['input']['modelpath']

# CarveMe correction
Expand Down Expand Up @@ -443,7 +456,7 @@ def between_analysis(model: Model, cfg:dict, step:str):
case _:
solver = egcs.EGCSolver()
logger.info(f'\tFound EGCs:\n')
logger.info(f'\t{solver.find_egcs(current_model,with_reacs=True,namespace=config['general']['namespace'])}') # @NOTE automatically uses c,p as compartments
logger.info(f'\t{solver.find_egcs(current_model,with_reacs=True,namespace=config["general"]["namespace"])}') # @NOTE automatically uses c,p as compartments

# BOF
# ---
Expand Down
8 changes: 6 additions & 2 deletions src/specimen/data/config/cmpb_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ input:
# ---------------
general:
dir: './' # Path/Name of a directory to save output to
authorinitials: USER # Intials or Abbreviation of the author for naming the model
strainid: USER # ID of the strain
organism: USER # Abbreviation for the organism
colours: 'YlGn' # Set the colour scheme for the plots
# should be a valid matplotlib continuous color palette
namespace: BiGG # Namespace to use for the model
Expand Down Expand Up @@ -53,8 +56,9 @@ carveme:
# CarveMe requires protein_fasta under general to be set instead of modelpath
# if CarveMe should be run,
# fill out the params below
modelname: USER # Name of the model
gram: USER # Choose either grampos or gramneg, depending on the Gram-test
# resilts of your organism
# results of your organism

# Polish a CarveMe model
# Only neccessary, if the model will or has been build with CarveMe
Expand Down Expand Up @@ -100,7 +104,7 @@ gapfilling:
swissprot-dmnd: USER # Path to the SwissProt DIAMOND database file.
swissprot-mapping: USER # Path to the SwissProt mapping file (against EC / BRENDA)
check-NCBI: False # Enable checking NCBI accession numbers for EC numbers - time costly.
sensitivity: 'more-sensitiv' # Sensitivity option for the DIAMOND run.
sensitivity: 'more-sensitive' # Sensitivity option for the DIAMOND run.
coverage: 90.0 # Coverage (parameter for DIAMOND).
percentage identity: 90.0 # Percentage identity threshold value for accepting
# matches found by DIAMOND as homologous.
Expand Down
16 changes: 15 additions & 1 deletion src/specimen/util/set_up.py
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,10 @@ def dict_recursive_check(dictA:dict, key:str=None,
else:
dict_recursive_check(combined_config, key=None, pipeline=pipeline)

if pipeline=='cmpb':
if combined_config['carveme']['modelname'] is None and (combined_config['general']['authorinitials'] is None or combined_config['general']['organism'] is None or combined_config['general']['strainid'] is None):
raise ValueError(f'Either the model name or all of the following parameters must be stated: authorinitials, organism and strainID')

return combined_config


Expand Down Expand Up @@ -435,6 +439,16 @@ def save_cmpb_user_input(configpath:Union[str,None]=None) -> dict:

# output directory
config['general']['dir'] = click.prompt('Enter your desired output directory path', type=click.Path())

# name for the model
modelname = click.prompt('Do you have a specific name for your model?', type=click.Choice(['y','n']), show_choices=True)
match modelname:
case 'y':
config['carveme']['modelname'] = click.prompt('Please enter your desired name for the model', type=str)
case 'n':
config['general']['authorinitials'] = click.prompt('An automated name based on the pattern iOrganismStrainAuthorYear will be created. \n Please enter your intials.', type=str)
config['general']['organism'] = click.prompt('Please enter an abbreviation for your organism.', type=str)
config['general']['strainid'] = click.prompt('Please enter the ID for your strain.', type=str)

# colour
set_col = click.prompt('Do you want to use the default colour map YlGn for the visualisation?', type=click.Choice(['y','n']), show_choices=True)
Expand Down Expand Up @@ -471,7 +485,7 @@ def save_cmpb_user_input(configpath:Union[str,None]=None) -> dict:

# some additional, sometimes required, sometimes optional files
refseq = click.prompt('If you want to run a gap analysis with KEGG or have a CarveMe model, please enter the path to your refseq gff file', type=click.Path())
config['general']['refseq_organism_id'] = refseq
config['general']['gff'] = refseq

kegg_org_id = click.prompt('If you want to run a gap analysis with KEGG, please enter the KEGG organism ID')
config['general']['kegg_organism_id'] = kegg_org_id
Expand Down

0 comments on commit 3b95aa8

Please sign in to comment.