Skip to content

Commit

Permalink
Fixing steps in hqtb #7
Browse files Browse the repository at this point in the history
  • Loading branch information
niinina committed Oct 1, 2024
1 parent f5fcaed commit f3b15a1
Show file tree
Hide file tree
Showing 12 changed files with 182 additions and 163 deletions.
6 changes: 4 additions & 2 deletions src/specimen/data/config/hqtb_advanced_config_expl.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -73,15 +73,15 @@ parameters:
generate_draft_model:
# if the gene ids of the template model do not match the
# identifiers of the annotated genome file, use this option to edit them
edit_names: no
edit_names: "no"
# filter for genes based on percentage identity value
pid: 80.0
# Which medium to save the model with.
medium: default

refinement_extension:
# Id to use for checking (change not advised)
id: locus_tag
id: null
# change the sensitivity of DIAMOND
sensitivity: more-sensitive
coverage: 95.0
Expand All @@ -90,6 +90,7 @@ parameters:
# include for knowledge-based model)
exclude_dna: True
exclude_rna: True
email: USER

refinement_cleanup:
# Options for duplicate checking/removal
Expand All @@ -100,6 +101,7 @@ parameters:
remove_dupl_meta: True
# Optional for gapfilling (input is a media config file, refineGEMs standart)
media_gap: null
growth_threshold: 0.05

refinement_annotation:
# for KEGG pathway annotation
Expand Down
3 changes: 2 additions & 1 deletion src/specimen/data/config/hqtb_basic_config_expl.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -65,14 +65,15 @@ parameters:
generate_draft_model:
# if the gene ids of the template model do not match the
# identifiers of the annotated genome file, use this option to edit them
edit_names: no
edit_names: "no"
pid: 80.0

refinement_extension:
# set the sensitivity for DIAMOND
sensitivity: more-sensitive
coverage: 95.0
pid: 90.0
email: USER

refinement_cleanup:
# current default means no gapfilling
Expand Down
8 changes: 5 additions & 3 deletions src/specimen/data/config/hqtb_config_default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -69,13 +69,13 @@ parameters:
sensitivity: more-sensitive

generate_draft_model:
edit_names: no
edit_names: "no"
pid: 80.0
medium: default

refinement_extension:
# default (usually) fine
id: locus_tag
id: null
# default fine
sensitivity: more-sensitive
# default alright but good to edit for trying different options
Expand All @@ -84,16 +84,18 @@ parameters:
# default almost needed, except for special cases
exclude_dna: True
exclude_rna: True
email: USER

refinement_cleanup:
# default as standart
# default as standard
check_dupl_reac: True
check_dupl_meta: default
remove_unused_meta: False
remove_dupl_reac: True
remove_dupl_meta: True
# current default means no gapfilling
media_gap: null
growth_threshold: 0.05

refinement_annotation:
# for KEGG pathway annotation
Expand Down
2 changes: 1 addition & 1 deletion src/specimen/hqtb/core/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def run(model_path:str, dir:str,
print('Given directory already has required structure.')

# load model
model = load_model(model_path,'cobra')
model = load_model(str(model_path),'cobra')

# ------------------
# general statistics
Expand Down
11 changes: 6 additions & 5 deletions src/specimen/hqtb/core/bidirectional_blast.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ def extract_cds(file: str, name: str, dir: str, collect_info: list, identifier:
):

for record in SeqIO.parse(f,"genbank"):
x = 1
if record.features:
for feature in record.features:

Expand Down Expand Up @@ -132,14 +133,14 @@ def create_diamond_db(dir: str, name: str, path: str, threads: int):
"""

# check if database already exists
if os.path.isfile(Path(dir,'DIAMONDdb',name+'.dmnd')):
if os.path.isfile(Path(dir,'db',name+'.dmnd')):
print(F'database for {name} already exists')
else:
# generate new database using diamond makedb
print(F'create DIAMOND database for {name} using:')
print(F'diamond makedb --in {path} -d {str(Path(dir,"db",name))} -p {int(threads)}')
print(F'diamond makedb --in {path} -d {str(Path(dir,"db",name+".dmnd"))} -p {int(threads)}')
start = time.time()
subprocess.run([F'diamond makedb --in {path} -d {str(Path(dir,"db",name))} -p {int(threads)}'], shell=True)
subprocess.run(["diamond", "makedb", "--in", path, "-d", str(Path(dir,"db",name+".dmnd")), "-p", str(threads)], shell=True)
end = time.time()
print(F'\t time: {end - start}s')

Expand Down Expand Up @@ -172,7 +173,7 @@ def run_diamond_blastp(dir: str, db: str, query: str, fasta_path:str , sensitivi
print(F'blast {query} against {db} using:')
print(F'diamond blastp -d {str(Path(dir,"db/",db+".dmnd"))} -q {F"{fasta_path}"} --{sensitivity} -p {int(threads)} -o {outname} --outfmt 6 qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore qlen ')
start = time.time()
subprocess.run([F'diamond blastp -d {str(Path(dir,"db",db+".dmnd"))} -q {F"{fasta_path}"} --{sensitivity} -p {int(threads)} -o {outname} --outfmt 6 qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore qlen '], shell=True)
subprocess.run(["diamond", "blastp", "-d", str(Path(dir,"db",db+".dmnd")), "-q", fasta_path, "--"+sensitivity, "-p", str(threads), "-o", outname, "--outfmt", "6", "qseqid", "sseqid", "pident", "length", "mismatch", "gapopen", "qstart", "qend", "sstart", "send", "evalue", "bitscore", "qlen"], shell=True)
end = time.time()
print(F'\ttime: {end - start}s')

Expand Down Expand Up @@ -309,7 +310,7 @@ def run(template:str, input:str, dir:str,
Defaults to None.
- temp_header (str, optional):
Feature qualifier of the gbff (NCBI) / faa (PROKKA) of the template to use as header for the FASTA files.
If None is given, sets it based on file extension (currently only implememted for gbff and faa).
If None is given, sets it based on file extension (currently only implemented for gbff and faa).
Defaults to 'protein_id'.
- in_header (str, optional):
Feature qualifier of the gbff (NCBI) / faa (PROKKA) of the input to use as header for the FASTA files.
Expand Down
5 changes: 2 additions & 3 deletions src/specimen/hqtb/core/generate_draft_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,6 @@ def run(template:str, bpbbh:str, dir:str,
# -----------
# check input
# -----------

if edit_names not in ['no','dot-to-underscore']:
raise ValueError(F'Edit_names value {edit_names} not in list of allowed values: no, dot-to-underscore')

Expand All @@ -403,7 +402,7 @@ def run(template:str, bpbbh:str, dir:str,
if len(growth_objfunc) == 1:
template_model.objective = growth_objfunc[0]
elif len(growth_objfunc) > 1:
mes = f'Multiple BOF detected. Chosing the following: {growth_objfunc[0]}'
mes = f'Multiple BOF detected. Choosing the following: {growth_objfunc[0]}'
warnings.warn(mes, category=UserWarning)
template_model.objective = growth_objfunc[0]
else:
Expand Down Expand Up @@ -435,7 +434,7 @@ def run(template:str, bpbbh:str, dir:str,
# -------------------

if memote:
memote_path = Path(dir,'step1-extension',name+'.html')
memote_path = str(Path(dir,name+'.html'))
run_memote(draft, 'html', return_res=False, save_res=memote_path, verbose=True)

total_time_e = time.time()
Expand Down
20 changes: 13 additions & 7 deletions src/specimen/hqtb/core/refinement/annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,15 @@ def kegg_reaction_to_kegg_pathway(model:cobra.Model, viaEC:bool=False, viaRC:boo

# via reaction
try:
reaction = kegg_reaction_parser(reac.annotation['kegg.reaction'])
if 'db' in reaction and 'kegg.pathway' in reaction['db']:
pathways = reaction['db']['kegg.pathway']
if isinstance(reac.annotation['kegg.reaction'],list):
for annotation in reac.annotation['kegg.reaction']:
reaction = kegg_reaction_parser(annotation)
if reaction is not None and 'db' in reaction and 'kegg.pathway' in reaction['db']:
pathways.append(reaction['db']['kegg.pathway'])
else:
reaction = kegg_reaction_parser(reac.annotation['kegg.reaction'])
if reaction is not None and 'db' in reaction and 'kegg.pathway' in reaction['db']:
pathways = reaction['db']['kegg.pathway']
except urllib.error.HTTPError:
print(F'HTTPError: {reac.id}, {reac.annotation["kegg.reaction"]}')
except ConnectionResetError:
Expand Down Expand Up @@ -175,13 +181,13 @@ def run(model:str, dir:str, kegg_viaEC:bool=False,
libsbml_model = libsbml_doc.getModel()

libsbml_model = run_SBOannotator(libsbml_model)
write_model_to_file(libsbml_model, Path(dir,'step3-annotation',libsbml_model.getId()+'_SBOannotated.xml'))
write_model_to_file(libsbml_model, str(Path(dir,'step3-annotation',libsbml_model.getId()+'_SBOannotated.xml')))

end = time.time()
print(F'\ttime: {end - start}s')

# reload model
model = load_model(Path(dir,'step3-annotation',libsbml_model.getId()+'_SBOannotated.xml'), 'cobra')
model = load_model(str(Path(dir,'step3-annotation',libsbml_model.getId()+'_SBOannotated.xml')), 'cobra')

# ................................................................
# @EXTENDABLE
Expand All @@ -204,7 +210,7 @@ def run(model:str, dir:str, kegg_viaEC:bool=False,
# ----------
# save model
# ----------
cobra.io.write_sbml_model(model, Path(dir,'step3-annotation',{model.id}+'_annotated.xml'))
cobra.io.write_sbml_model(model, Path(dir,'step3-annotation',model.id+'_annotated.xml'))

# ---------------------------------
# assess model quality using memote
Expand All @@ -213,7 +219,7 @@ def run(model:str, dir:str, kegg_viaEC:bool=False,
if memote:
start = time.time()
name = model.id
memote_path = Path(dir,'step3-annotation',name+'_annotated.html')
memote_path = str(Path(dir,'step3-annotation',name+'_annotated.html'))
run_memote(model, 'html', return_res=False, save_res=memote_path, verbose=True)
end = time.time()
print(F'\ttotal time: {end - start}s')
2 changes: 1 addition & 1 deletion src/specimen/hqtb/core/refinement/cleanup.py
Original file line number Diff line number Diff line change
Expand Up @@ -499,5 +499,5 @@ def run(model:str, dir:str,
# ---------------------------------

if memote:
memote_path = Path(dir,'step2-clean-up',name+'.html')
memote_path = str(Path(dir,'step2-clean-up',name+'.html'))
run_memote(model, 'html', return_res=False, save_res=memote_path, verbose=True)
Loading

0 comments on commit f3b15a1

Please sign in to comment.