Skip to content

Commit

Permalink
Update pipeline CMPB #8 #1
Browse files Browse the repository at this point in the history
  • Loading branch information
cb-Hades committed May 22, 2024
1 parent 9d2098e commit 5d73813
Show file tree
Hide file tree
Showing 7 changed files with 217 additions and 200 deletions.
173 changes: 157 additions & 16 deletions src/specimen/cmpb/workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,74 +15,215 @@
from datetime import date
from pathlib import Path

import warnings
import yaml

from refinegems.curation.pathways import kegg_pathway_analysis
from refinegems.classes.reports import ModelInfoReport
from refinegems.curation.biomass import test_biomass_presence
from refinegems.analysis import growth
from refinegems.utility.connections import run_memote, perform_mcc, adjust_BOF
from refinegems.curation.curate import resolve_duplicates
from refinegems.curation.pathways import kegg_pathways

# from SBOannotator import *
from SBOannotator import sbo_annotator

from ..util.set_up import save_cmpb_user_input

################################################################################
# functions
################################################################################

def run():
def run(configpath:str):

# setup phase
#############

# load config
# -----------
if not configpath:
config = save_cmpb_user_input(Path(dir, 'config_user.yaml'))
else:
with open(configpath, "r") as cfg:
config = yaml.load(cfg, Loader=yaml.loader.FullLoader)

# create log
# ----------
today = date.today().strftime("%Y%m%d")
log_file = Path(config["out_path"],f'rg_{str(today)}.log')

# ....................................................
# @TODO / @IDEAS
# global options
# run memote after every step
# calculate model stats after each step
# use temp folder or report all model/in-between steps
# what to write in the log file
# ....................................................

# CarveMe
# -------
#########
# @TODO
# in a future update
# will come in a future update
if not config['input']['modelpath']:
# run CarveMe
raise ValueError('Currently, CarveMe has not been included in the pipeline. Please use it separatly.mThis wfunction will be provided in a future update.')

# CarveMe correction
# ------------------
####################
libmodel


# check, if input is a CarveMe model
# rg.polish
# polish(model: libModel, email: str, id_db: str, refseq_gff: str,
# protein_fasta: str, lab_strain: bool, kegg_organism_id: str, path: str)
# rg correct charges
#

# growth test
# -----------
model
media_path
namespace
# try to set objective to growth
growth_func_list = test_biomass_presence(model)
if growth_func_list:
# independently of how many growth functions are found, the first one will be used
model.objective = growth_func_list[0]
# simulate growth on different media
growth_report = growth.growth_analysis(model, media_path,
namespace=namespace, retrieve='report')
growth_report.save(Path(dir,'growth')) # @TODO adjust Path, just a placeholder really

else:
warnings.warn('No growth/biomass function detected, growth simulation before gapfilling will be skipped.')


# gapfilling
# ----------
############
# options: automatic/manual extension/manual input

# ModelPolisher
# -------------
###############

# Annotations
# -----------
#############
model
media_path
namespace

# KEGGPathwayGroups, optional
# -----------------
modelpath
new_libmodel, missing_list = kegg_pathways(modelpath)

# SBOannotator
# ------------
# @TODO
# theoretically:msoething along the way:
libsbml_doc = readSBML(model)
libsbml_model = libsbml_doc.getModel()
sbo_annotator(libsbml_doc, libsbml_model, 'constraint-based', True, 'create_dbs',
Path(dir,'step3-annotation',libsbml_model.getId()+'_SBOannotated.xml'))


# growth test
# -----------
# try to set objective to growth
growth_func_list = test_biomass_presence(model)
if growth_func_list:
# independently of how many growth functions are found, the first one will be used
model.objective = growth_func_list[0]
# simulate growth on different media
growth_report = growth.growth_analysis(model, media_path,
namespace=namespace, retrieve='report')
growth_report.save(Path(dir,'growth')) # @TODO adjust Path, just a placeholder really

else:
warnings.warn('No growth/biomass function detected, growth simulation after annotation will be skipped.')


# model cleanup
# -------------
###############
model

# duplicates
# BOFdat?
# mcc
# ----------
# @TODO which params to set and which to set as optional input?
resolve_duplicates(model, check_reac:bool=True,
check_meta:Literal['default','exhaustive','skip']='default',
replace_dupl_meta:bool=True, remove_unused_meta:bool=False,
remove_dupl_reac:bool=True)


# BOF
# ---
# @TODO
# BOFdat - optional

# check and normalise

# MCC
# ---
# @TODO
model = perform_mcc(model, Path(dir,'mcc'),apply=True) # @TODO Path is just a placeholder

# analysis
# --------
##########
# @TODO
# set / get params from config or upstream pipeline
dir
model

namespace
media_path

# stats
# -----
stats_report = ModelInfoReport(model)
stats_report.save(Path(dir,'stats')) # adjust Path, just a placeholder really
stats_report.save(Path(dir,'stats')) # @TODO adjust Path, just a placeholder really

# kegg pathway
# ------------
pathway_report = kegg_pathway_analysis(model)
pathway_report.save(Path(dir,'kegg_pathway')) # adjust Path, just a placeholder really
pathway_report.save(Path(dir,'kegg_pathway')) # @TODO adjust Path, just a placeholder really

# sbo terms
# sbo term
# --------
# @TODO
# plot_rea_sbo_single(model: libModel) -> fig?

# memote
# ------
run_memote(model, 'html', save_res=Path(dir,'final_memote.html'))

# growth
# ------
# try to set objective to growth
growth_func_list = test_biomass_presence(model)
if growth_func_list:
# independently of how many growth functions are found, the first one will be used
model.objective = growth_func_list[0]
# simulate growth on different media
growth_report = growth.growth_analysis(model, media_path,
namespace=namespace, retrieve='report')
growth_report.save(Path(dir,'growth')) # @TODO adjust Path, just a placeholder really

else:
warnings.warn('No growth/biomass function detected, final growth simulation will be skipped.')

# auxotrophies
# ------------
media_list = growth.read_media_config(media_path)
auxo_report = growth.test_auxotrophies(model, media_list[0], media_list[1], namespace)
auxo_report.save(Path(dir,'auxotrophies')) # @TODO adjust Path, just a placeholder really






pass

###########
# old stuff
Expand Down
94 changes: 55 additions & 39 deletions src/specimen/data/config/cmpb_config.yaml
Original file line number Diff line number Diff line change
@@ -1,40 +1,58 @@
Description: >
This file can be adapted to choose what refineGEMs should do.
Note: For windows use \ instead of / for the paths
General Setting: >
Path to GEM to be investigated
model: 'data/e_coli_core.xml'
# Set the out path for all analysis files
out_path: ''

Settings for scripts that investigate the model: >
These are only necessary if none of the scripts to manipulate the model are used.
# Set to TRUE if you want pngs that aid in model investigation, will be saved to a folder called 'visualization'
visualize: TRUE

# Set the path to a medium config for growth simulation
mediapath: 'media_config.yaml'

# Namespace to use for the model
namespace: 'BiGG'

# Settings if you want to compare multiple models
multiple: FALSE
multiple_paths: # enter as many paths as you need below
- 'data/e_coli_core.xml'
- ''
- ''
single: TRUE # set to False if you only want to work with the multiple models

# Determine whether the biomass function should be checked & normalised
biomass: TRUE

# determine whether the memote score should be calculated, default: FALSE
memote: FALSE
# Configuration file for the SPECIMEN CMPB pipeline
# parameters with the value __USER__ are required to be specified by the user

# meta info:
# model: __USER__
# organism: __USER__
# date: __USER__
# author: __USER__

# input for the pipeline
# ----------------------
input:
modelpath: NULL # optional, path to a model.
# If not given, runs CarveMe
annotated_genome: __USER__ # required, path to the annotated genome file
namespace: BiGG # namespace to use for the model
mediapath: __USER__ # path to a media config to tests growth with

# general options
# ---------------
general:
dir: SPECIMEN-CMPB # Path/Name of a directory to save output to
memote_always_on: False # run memote after every step
stats_always_on: False # calculate the model statistics after every step

# part-specific options
# ---------------------

# add KEGG pathways as groups
kegg_pathway_groups: True

# resolve duplicates
duplicates:
# three possible option for the resolvement of duplicates for the following model entities:
# - check: check for duplicates and simply report them
# - remove: check for and remove duplicates from the model (if possible)
# - skip: skip the resolvement
reactions: remove
metabolites: remove
# additional remove unused metabolites (reduces possible knowledge base)
remove_unused_metabs: False

# BOFdat / Biomass objective function
BOF:
run_bofdat:
# @TODO

# gapfilling
gapfilling:
# @TODO


##################
# old struff below
##################

# compare metabolites to the ModelSEED database
modelseed: FALSE # set to False if not needed
Expand All @@ -48,8 +66,6 @@ entrez_email: '' # necessary to access NCBI API
organismid: 'cstr' # Needs to be specified for db_to_compare='KEGG' for the gap_analysis, Can be provided for polish
gff_file: 'data/cstr.gff' # Path to RefSeq GFF file: Required for db_to_compare='KEGG', Can be provided for polish

### Addition of KEGG Pathways as Groups ###
keggpathways: FALSE

### SBO-Term Annotation ###
sboterms: FALSE
Expand Down
2 changes: 1 addition & 1 deletion src/specimen/hqtb/core/generate_draft_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from refinegems.utility.io import load_model
from refinegems.utility.entities import resolve_compartment_names
from refinegems.curation.biomass import test_biomass_presence
from refinegems.analysis.investigate import run_memote
from refinegems.utility.connections import run_memote

from refinegems.analysis.growth import MIN_GROWTH_THRESHOLD

Expand Down
2 changes: 1 addition & 1 deletion src/specimen/hqtb/core/refinement/annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

# refinegems
from refinegems.utility.io import load_model, kegg_reaction_parser
from refinegems.analysis.investigate import run_memote
from refinegems.utility.connections import run_memote

# from SBOannotator import *
from SBOannotator import sbo_annotator
Expand Down
2 changes: 1 addition & 1 deletion src/specimen/hqtb/core/refinement/cleanup.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from refinegems.utility.io import load_model
from refinegems.classes.medium import medium_to_model, Medium
from refinegems.analysis.growth import read_media_config
from refinegems.analysis.investigate import run_memote
from refinegems.utility.connections import run_memote
from refinegems.curation.curate import resolve_duplicates, complete_BioMetaCyc

################################################################################
Expand Down
2 changes: 1 addition & 1 deletion src/specimen/hqtb/core/refinement/extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@

from refinegems.utility.io import kegg_reaction_parser, load_a_table_from_database
from refinegems.utility.entities import create_random_id, get_reaction_annotation_dict, match_id_to_namespace
from refinegems.analysis.investigate import run_memote
from refinegems.utility.connections import run_memote

# further required programs:
# - DIAMOND, tested with version 0.9.14 (works only for certain sensitivity mode)
Expand Down
Loading

0 comments on commit 5d73813

Please sign in to comment.