Skip to content

Commit

Permalink
formating
Browse files Browse the repository at this point in the history
  • Loading branch information
SilasK committed Oct 6, 2023
1 parent 3b804fa commit 9739e59
Show file tree
Hide file tree
Showing 16 changed files with 50 additions and 76 deletions.
4 changes: 2 additions & 2 deletions atlas/atlas.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

from snakemake.io import load_configfile
from .make_config import validate_config
from .init.atlas_init import run_init#, run_init_sra
from .init.atlas_init import run_init # , run_init_sra

from .__init__ import __version__

Expand Down Expand Up @@ -66,7 +66,7 @@ def cli(obj):
cli.add_command(run_init)


#cli.add_command(run_init_sra)
# cli.add_command(run_init_sra)


def get_snakefile(file="workflow/Snakefile"):
Expand Down
1 change: 0 additions & 1 deletion atlas/init/atlas_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@ def prepare_sample_table_for_atlas(

sample_table["BinGroup"] = "All"


validate_sample_table(sample_table)

sample_table.to_csv(outfile, sep="\t")
Expand Down
2 changes: 0 additions & 2 deletions atlas/make_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,8 +236,6 @@ def make_config(
)




def validate_config(config, workflow):
conf = load_configfile(config)

Expand Down
7 changes: 3 additions & 4 deletions workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ wildcard_constraints:
include: "rules/sample_table.smk"
include: "rules/download.smk" # contains hard coded variables
include: "rules/qc.smk"
include: "rules/screen.smk" # expects function get_input_fastq defined in qc
include: "rules/screen.smk" # expects function get_input_fastq defined in qc
include: "rules/assemble.smk"
include: "rules/binning.smk"
include: "rules/derep.smk"
Expand Down Expand Up @@ -201,9 +201,10 @@ rule qc:
output:
touch("finished_QC"),


rule screen:
input:
"QC/screen/sketch_comparison.tsv.gz"
"QC/screen/sketch_comparison.tsv.gz",


# overwrite commands in rules/download.snakefile
Expand All @@ -222,14 +223,12 @@ onerror:

for r in workflow.rules:
if not "mem_mb" in r.resources:

if "mem" in r.resources:
r.resources["mem_mb"] = r.resources["mem"] * 1000
else:
# default
r.resources["mem_mb"] = config["mem"] * 1000


# add time if ot present. Simple jobs use simple time

if "time_min" not in r.resources:
Expand Down
14 changes: 5 additions & 9 deletions workflow/rules/assemble.smk
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,6 @@ if SKIP_QC & (len(MULTIFILE_FRACTIONS) < 3):
-Xmx{resources.java_mem}G 2> {log}
"""


else:

localrules:
Expand All @@ -97,7 +96,6 @@ else:




#
rule normalize_reads:
input:
Expand Down Expand Up @@ -273,7 +271,6 @@ if config.get("assembler", "megahit") == "megahit":
shell:
"cat {input} > {output}"


def megahit_input_parsing(input):
Nfiles = len(input)

Expand Down Expand Up @@ -350,7 +347,6 @@ if config.get("assembler", "megahit") == "megahit":
shell:
"cp {input} {output}"


else:
if PAIRED_END:
ASSEMBLY_FRACTIONS = ["R1", "R2"]
Expand Down Expand Up @@ -465,16 +461,17 @@ else:
temp("{sample}/assembly/{sample}_raw_contigs.fasta"),
shell:
"cp {input} {output}"
# standardizes header labels within contig FASTAs


# standardizes header labels within contig FASTAs


rule rename_contigs:
input:
"{sample}/assembly/{sample}_raw_contigs.fasta",
output:
fasta="{sample}/assembly/{sample}_prefilter_contigs.fasta",
mapping_table = "{sample}/assembly/old2new_contig_names.tsv"
mapping_table="{sample}/assembly/old2new_contig_names.tsv",
threads: config.get("simplejob_threads", 1)
resources:
mem=config["simplejob_mem"],
Expand All @@ -489,8 +486,6 @@ rule rename_contigs:
"../scripts/rename_assembly.py"




if config["filter_contigs"]:

ruleorder: align_reads_to_prefilter_contigs > align_reads_to_final_contigs
Expand Down Expand Up @@ -570,9 +565,10 @@ if config["filter_contigs"]:
minl={params.minl} \
trim={params.trim} \
-Xmx{resources.java_mem}G 2> {log}"""
# HACK: this makes two copies of the same file


# HACK: this makes two copies of the same file


else: # no filter

Expand Down
8 changes: 5 additions & 3 deletions workflow/rules/binning.smk
Original file line number Diff line number Diff line change
Expand Up @@ -137,11 +137,13 @@ rule get_metabat_depth_file:
"{sample}/binning/metabat/metabat.log",
conda:
"../envs/metabat.yaml"
threads: config["threads"] # multithreaded trough OMP_NUM_THREADS
threads: config["threads"] # multithreaded trough OMP_NUM_THREADS
resources:
mem_mb=config["mem"]*1000,
mem_mb=config["mem"] * 1000,
params:
minid = lambda wc, input: config["cobinning_readmapping_id"] *100 if len(input.bams)>1 else 97
minid=lambda wc, input: config["cobinning_readmapping_id"] * 100
if len(input.bams) > 1
else 97,
shell:
"jgi_summarize_bam_contig_depths "
" --percentIdentity {params.minid} "
Expand Down
37 changes: 19 additions & 18 deletions workflow/rules/cobinning.smk
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,14 @@ rule filter_contigs:


def get_samples_of_bingroup(wildcards):

samples_of_group= sampleTable.query(f'BinGroup=="{wildcards.bingroup}"').index.tolist()
samples_of_group = sampleTable.query(
f'BinGroup=="{wildcards.bingroup}"'
).index.tolist()

return samples_of_group


def get_filtered_contigs_of_bingroup(wildcards):

samples_of_group = get_samples_of_bingroup(wildcards)

if len(samples_of_group) < 5:
Expand All @@ -51,7 +51,6 @@ def get_filtered_contigs_of_bingroup(wildcards):


def get_bams_of_bingroup(wildcards):

samples_of_group = get_samples_of_bingroup(wildcards)

return expand(
Expand All @@ -63,7 +62,7 @@ def get_bams_of_bingroup(wildcards):

rule combine_contigs:
input:
fasta= get_filtered_contigs_of_bingroup,
fasta=get_filtered_contigs_of_bingroup,
output:
"Intermediate/cobinning/{bingroup}/combined_contigs.fasta.gz",
log:
Expand All @@ -80,9 +79,9 @@ rule combine_contigs:
with gz.open(input_fasta, "rb") as fin:
for line in fin:
# if line is a header add sample name
if line[0] == ord('>'):
if line[0] == ord(">"):
line = f">{sample}{params.seperator}".encode() + line[1:]
# write each line to the combined file
# write each line to the combined file
fout.write(line)


Expand All @@ -95,7 +94,7 @@ rule minimap_index:
index_size="12G",
resources:
mem=config["mem"], # limited num of fatnodes (>200g)
threads: config["simplejob_threads"],
threads: config["simplejob_threads"]
log:
"logs/cobinning/{bingroup}/minimap_index.log",
benchmark:
Expand Down Expand Up @@ -142,7 +141,9 @@ rule minimap:
shell:
"""minimap2 -t {threads} -ax sr {input.mmi} {input.fq} | grep -v "^@" | cat {input.dict} - | samtools view -F 3584 -b - > {output.bam} 2>{log}"""

# samtools filters out secondary alignments

# samtools filters out secondary alignments


ruleorder: sort_bam > minimap

Expand All @@ -156,8 +157,8 @@ rule sort_bam:
prefix="Intermediate/cobinning/{bingroup}/bams/tmp.{sample}",
threads: 2
resources:
mem_mb=config["simplejob_mem"] *1000,
time_min=int(config["runtime"]["simplejob"]*60),
mem_mb=config["simplejob_mem"] * 1000,
time_min=int(config["runtime"]["simplejob"] * 60),
log:
"logs/cobinning/{bingroup}/mapping/sortbam/{sample}.log",
conda:
Expand All @@ -175,14 +176,14 @@ rule summarize_bam_contig_depths:
"logs/cobinning/{bingroup}/combine_coverage.log",
conda:
"../envs/metabat.yaml"
threads: config["threads"] # multithreaded trough OMP_NUM_THREADS
threads: config["threads"] # multithreaded trough OMP_NUM_THREADS
benchmark:
"logs/benchmarks/cobinning/{bingroup}/summarize_bam_contig_depths.tsv"
resources:
mem_mb=config["mem"]*1000,
time_min = config["runtime"]["long"]*60
mem_mb=config["mem"] * 1000,
time_min=config["runtime"]["long"] * 60,
params:
minid = config["cobinning_readmapping_id"] *100
minid=config["cobinning_readmapping_id"] * 100,
shell:
"jgi_summarize_bam_contig_depths "
" --percentIdentity {params.minid} "
Expand Down Expand Up @@ -218,8 +219,8 @@ rule run_vamb:
"../envs/vamb.yaml"
threads: config["threads"]
resources:
mem_mb=config["mem"]*1000,
time_min=config["runtime"]["long"]*60,
mem_mb=config["mem"] * 1000,
time_min=config["runtime"]["long"] * 60,
log:
"logs/cobinning/run_vamb/{bingroup}.log",
benchmark:
Expand Down Expand Up @@ -258,7 +259,7 @@ rule parse_vamb_output:
fasta_extension=".fna",
output_path=lambda wc: vamb_cluster_attribution_path, # path with {sample} to replace
samples=SAMPLES,
bingroups = sampleTable.BinGroup.unique()
bingroups=sampleTable.BinGroup.unique(),
conda:
"../envs/fasta.yaml"
script:
Expand Down
1 change: 0 additions & 1 deletion workflow/rules/derep.smk
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ rule skani_2_parquet:
threads: 1
run:
try:

skani_column_dtypes = {
"Ref_file": "category",
"Query_file": "category",
Expand Down
11 changes: 2 additions & 9 deletions workflow/rules/genecatalog.smk
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@ if config["genecatalog"]["source"] == "contigs":
cat_files(input.fna, output.fna)
cat_files(input.short, output.short)


else:

localrules:
Expand Down Expand Up @@ -175,15 +174,15 @@ if (config["genecatalog"]["clustermethod"] == "linclust") or (
"logs/Genecatalog/clustering/generate_orf_info.log",
script:
"../scripts/generate_orf_info.py"
# cluster genes with cd-hit-est


# cluster genes with cd-hit-est


elif config["genecatalog"]["clustermethod"] == "cd-hit-est":

include: "cdhit.smk"


else:
raise Exception(
"Didn't understood the genecatalog clustermethod: {}".format(
Expand Down Expand Up @@ -388,7 +387,6 @@ rule combine_gene_coverages:
old_subset_folder = Path("Genecatalog/subsets/genes")
new_subset_folder = "Intermediate/genecatalog/subsets"
if old_subset_folder.exists():

logger.info(f"I move {old_subset_folder} to {new_subset_folder}")

import shutil
Expand Down Expand Up @@ -416,7 +414,6 @@ checkpoint gene_subsets:


def get_subset_names(wildcards):

dir_for_subsets = Path(checkpoints.gene_subsets.get(**wildcards).output[0])
subset_names = glob_wildcards(str(dir_for_subsets / "{subset}.faa")).subset

Expand Down Expand Up @@ -518,7 +515,6 @@ rule combine_egg_nogg_annotations:
time=config["runtime"]["default"],
run:
try:

import pandas as pd

Tables = [
Expand All @@ -538,7 +534,6 @@ rule combine_egg_nogg_annotations:

combined.to_parquet(output[0], index=False)
except Exception as e:

import traceback

with open(log[0], "w") as logfile:
Expand Down Expand Up @@ -566,7 +561,6 @@ rule convert_eggNOG_tsv2parquet:
df.to_parquet(output[0], index=False)

except Exception as e:

import traceback

with open(log[0], "w") as logfile:
Expand Down Expand Up @@ -618,7 +612,6 @@ rule DRAM_annotate_genecatalog:


def combine_genecatalog_dram_input(wildcards):

all_subsets = get_subset_names(wildcards)

return expand(
Expand Down
4 changes: 1 addition & 3 deletions workflow/rules/genomes.smk
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ rule get_contig2genomes:
if ext == ".gz":
bin_name = os.path.splitext(bin_name)[0]

# write names of contigs in mapping file
# write names of contigs in mapping file
with open(fasta) as f:
for line in f:
if line[0] == ">":
Expand Down Expand Up @@ -226,7 +226,6 @@ if config["genome_aligner"] == "minimap":
wrapper:
"v1.19.0/bio/minimap2/aligner"


elif config["genome_aligner"] == "bwa":

rule index_genomes:
Expand Down Expand Up @@ -262,7 +261,6 @@ elif config["genome_aligner"] == "bwa":
wrapper:
"v1.19.0/bio/bwa-mem2/mem"


else:
raise Exception(
"'genome_aligner' not understood, it should be 'minimap' or 'bwa', got '{genome_aligner}'. check config file".format(
Expand Down
Loading

0 comments on commit 9739e59

Please sign in to comment.