From 6adfa514838b710dc50a4ce24022418f64fb2b2d Mon Sep 17 00:00:00 2001 From: RAHenriksen Date: Thu, 2 Jan 2025 11:34:46 +0100 Subject: [PATCH] udpate --- bifrost_chewbbaca/pipeline_blat.smk | 2 ++ bifrost_chewbbaca/rule__mmseqs_genecall.py | 20 ++++++++++++-------- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/bifrost_chewbbaca/pipeline_blat.smk b/bifrost_chewbbaca/pipeline_blat.smk index 44c995c..b8026d6 100755 --- a/bifrost_chewbbaca/pipeline_blat.smk +++ b/bifrost_chewbbaca/pipeline_blat.smk @@ -119,6 +119,8 @@ rule blat_gene_call: gene_call_results = directory(f"{component['name']}/blat_gene_call_results"), gene_calls = f"{component['name']}/blat_gene_call_results/gene_calls.fa", gene_call_done = f"{component['name']}/blat_gene_call_done" + message: + f"Running step:{rule_name}, outputs: {component['name']}/blat_gene_call_results" script: os.path.join(os.path.dirname(workflow.snakefile), "rule__blat_genecall.py") diff --git a/bifrost_chewbbaca/rule__mmseqs_genecall.py b/bifrost_chewbbaca/rule__mmseqs_genecall.py index df6b534..78c1a9a 100755 --- a/bifrost_chewbbaca/rule__mmseqs_genecall.py +++ b/bifrost_chewbbaca/rule__mmseqs_genecall.py @@ -78,23 +78,27 @@ def parse_mmseqs_output(output_tsv, assembly_sequences): return alleles -def run_mmseqs_and_parse(query_fa, db_fa, output_tsv, assembly_sequences): +def run_mmseqs_and_parse(query_fa, db, output_tsv, assembly_sequences): """ Runs mmseqs and processes the outputs. """ # Define the mmseqs command mmseqs_cmd = [ - "mmseqs easy-search", - db_fa, + "mmseqs", + "easy-linsearch", query_fa, + db, output_tsv, "tmp", - "--search-type",'4', - "--format-mode",'0' - "-format-output",'query,target,tlen,pident,alnlen,mismatch,gapopen,qstart,qend,tstart,tend,evalue,bits' - "--min-seq-id",0.9 + "--search-type",'3', + "--format-mode",'0', + "--format-output",'query,target,tlen,pident,alnlen,mismatch,gapopen,qstart,qend,tstart,tend,evalue,bits', + "--min-seq-id",'0.9', + "--threads",'6', + "--remove-tmp-files",'1', ] - # mmseqs easy-search assemblatron__v2.3.3/test_cdiff_single___2405W4378.fasta all_Clostridioides_loci.fa alnRes.tsv tmp --search-type 4 --format-mode 0 --format-output "query,target,tlen,pident,alnlen,mismatch,gapopen,qstart,qend,tstart,tend,evalue,bits" --min-seq-id 0.9 + print(f"msmeqs commands {mmseqs_cmd}") + #mmseqs easy-linsearch assemblatron__v2.3.3/test_cdiff_single___2405W4378.fasta all_Clostridioides_loci_db alnRes.tsv tmp --search-type 3 --threads 6 --min-seq-id 0.9 --remove-tmp-files 1 --format-mode 0 --format-output "query,target,qlen,pident,alnlen,mismatch,gapopen,qstart,qend,tstart,tend,evalue,bits" # Run mmseqs try: