formating

metagenome-atlas · Oct 6, 2023 · 9739e59 · 9739e59
1 parent 3b804fa
commit 9739e59
Show file tree

Hide file tree

Showing 16 changed files with 50 additions and 76 deletions.
diff --git a/atlas/atlas.py b/atlas/atlas.py
@@ -9,7 +9,7 @@
 
 from snakemake.io import load_configfile
 from .make_config import validate_config
-from .init.atlas_init import run_init#, run_init_sra
+from .init.atlas_init import run_init  # , run_init_sra
 
 from .__init__ import __version__
 
@@ -66,7 +66,7 @@ def cli(obj):
 cli.add_command(run_init)
 
 
-#cli.add_command(run_init_sra)
+# cli.add_command(run_init_sra)
 
 
 def get_snakefile(file="workflow/Snakefile"):

diff --git a/atlas/init/atlas_init.py b/atlas/init/atlas_init.py
@@ -62,7 +62,6 @@ def prepare_sample_table_for_atlas(
 
     sample_table["BinGroup"] = "All"
 
-
     validate_sample_table(sample_table)
 
     sample_table.to_csv(outfile, sep="\t")

diff --git a/atlas/make_config.py b/atlas/make_config.py
@@ -236,8 +236,6 @@ def make_config(
         )
 
 
-
-
 def validate_config(config, workflow):
     conf = load_configfile(config)
 

diff --git a/workflow/Snakefile b/workflow/Snakefile
@@ -40,7 +40,7 @@ wildcard_constraints:
 include: "rules/sample_table.smk"
 include: "rules/download.smk"  # contains hard coded variables
 include: "rules/qc.smk"
-include: "rules/screen.smk" # expects function get_input_fastq defined in qc
+include: "rules/screen.smk"  # expects function get_input_fastq defined in qc
 include: "rules/assemble.smk"
 include: "rules/binning.smk"
 include: "rules/derep.smk"
@@ -201,9 +201,10 @@ rule qc:
     output:
         touch("finished_QC"),
 
+
 rule screen:
     input:
-        "QC/screen/sketch_comparison.tsv.gz"
+        "QC/screen/sketch_comparison.tsv.gz",
 
 
 # overwrite commands in rules/download.snakefile
@@ -222,14 +223,12 @@ onerror:
 
 for r in workflow.rules:
     if not "mem_mb" in r.resources:
-
         if "mem" in r.resources:
             r.resources["mem_mb"] = r.resources["mem"] * 1000
         else:
             # default
             r.resources["mem_mb"] = config["mem"] * 1000
 
-
     # add time if ot present. Simple jobs use simple time
 
     if "time_min" not in r.resources:

diff --git a/workflow/rules/assemble.smk b/workflow/rules/assemble.smk
@@ -70,7 +70,6 @@ if SKIP_QC & (len(MULTIFILE_FRACTIONS) < 3):
                 -Xmx{resources.java_mem}G 2> {log}
             """
 
-
 else:
 
     localrules:
@@ -97,7 +96,6 @@ else:
 
 
 
-
 #
 rule normalize_reads:
     input:
@@ -273,7 +271,6 @@ if config.get("assembler", "megahit") == "megahit":
         shell:
             "cat {input} > {output}"
 
-
     def megahit_input_parsing(input):
         Nfiles = len(input)
 
@@ -350,7 +347,6 @@ if config.get("assembler", "megahit") == "megahit":
         shell:
             "cp {input} {output}"
 
-
 else:
     if PAIRED_END:
         ASSEMBLY_FRACTIONS = ["R1", "R2"]
@@ -465,16 +461,17 @@ else:
             temp("{sample}/assembly/{sample}_raw_contigs.fasta"),
         shell:
             "cp {input} {output}"
-# standardizes header labels within contig FASTAs
 
 
+# standardizes header labels within contig FASTAs
+
 
 rule rename_contigs:
     input:
         "{sample}/assembly/{sample}_raw_contigs.fasta",
     output:
         fasta="{sample}/assembly/{sample}_prefilter_contigs.fasta",
-        mapping_table = "{sample}/assembly/old2new_contig_names.tsv"
+        mapping_table="{sample}/assembly/old2new_contig_names.tsv",
     threads: config.get("simplejob_threads", 1)
     resources:
         mem=config["simplejob_mem"],
@@ -489,8 +486,6 @@ rule rename_contigs:
         "../scripts/rename_assembly.py"
 
 
-
-
 if config["filter_contigs"]:
 
     ruleorder: align_reads_to_prefilter_contigs > align_reads_to_final_contigs
@@ -570,9 +565,10 @@ if config["filter_contigs"]:
             minl={params.minl} \
             trim={params.trim} \
             -Xmx{resources.java_mem}G 2> {log}"""
-# HACK: this makes two copies of the same file
 
 
+# HACK: this makes two copies of the same file
+
 
 else:  # no filter
 

diff --git a/workflow/rules/binning.smk b/workflow/rules/binning.smk
@@ -137,11 +137,13 @@ rule get_metabat_depth_file:
         "{sample}/binning/metabat/metabat.log",
     conda:
         "../envs/metabat.yaml"
-    threads: config["threads"] # multithreaded trough OMP_NUM_THREADS
+    threads: config["threads"]  # multithreaded trough OMP_NUM_THREADS
     resources:
-        mem_mb=config["mem"]*1000,
+        mem_mb=config["mem"] * 1000,
     params:
-        minid = lambda wc, input: config["cobinning_readmapping_id"] *100 if len(input.bams)>1 else 97
+        minid=lambda wc, input: config["cobinning_readmapping_id"] * 100
+        if len(input.bams) > 1
+        else 97,
     shell:
         "jgi_summarize_bam_contig_depths "
         " --percentIdentity {params.minid} "

diff --git a/workflow/rules/cobinning.smk b/workflow/rules/cobinning.smk
@@ -30,14 +30,14 @@ rule filter_contigs:
 
 
 def get_samples_of_bingroup(wildcards):
-
-    samples_of_group= sampleTable.query(f'BinGroup=="{wildcards.bingroup}"').index.tolist()
+    samples_of_group = sampleTable.query(
+        f'BinGroup=="{wildcards.bingroup}"'
+    ).index.tolist()
 
     return samples_of_group
 
 
 def get_filtered_contigs_of_bingroup(wildcards):
-
     samples_of_group = get_samples_of_bingroup(wildcards)
 
     if len(samples_of_group) < 5:
@@ -51,7 +51,6 @@ def get_filtered_contigs_of_bingroup(wildcards):
 
 
 def get_bams_of_bingroup(wildcards):
-
     samples_of_group = get_samples_of_bingroup(wildcards)
 
     return expand(
@@ -63,7 +62,7 @@ def get_bams_of_bingroup(wildcards):
 
 rule combine_contigs:
     input:
-        fasta= get_filtered_contigs_of_bingroup,
+        fasta=get_filtered_contigs_of_bingroup,
     output:
         "Intermediate/cobinning/{bingroup}/combined_contigs.fasta.gz",
     log:
@@ -80,9 +79,9 @@ rule combine_contigs:
                 with gz.open(input_fasta, "rb") as fin:
                     for line in fin:
                         # if line is a header add sample name
-                        if line[0] == ord('>'):
+                        if line[0] == ord(">"):
                             line = f">{sample}{params.seperator}".encode() + line[1:]
-                        # write each line to the combined file
+                            # write each line to the combined file
                         fout.write(line)
 
 
@@ -95,7 +94,7 @@ rule minimap_index:
         index_size="12G",
     resources:
         mem=config["mem"],  # limited num of fatnodes (>200g)
-    threads: config["simplejob_threads"],
+    threads: config["simplejob_threads"]
     log:
         "logs/cobinning/{bingroup}/minimap_index.log",
     benchmark:
@@ -142,7 +141,9 @@ rule minimap:
     shell:
         """minimap2 -t {threads} -ax sr {input.mmi} {input.fq} | grep -v "^@" | cat {input.dict} - | samtools view -F 3584 -b - > {output.bam} 2>{log}"""
 
-    # samtools filters out secondary alignments
+
+# samtools filters out secondary alignments
+
 
 ruleorder: sort_bam > minimap
 
@@ -156,8 +157,8 @@ rule sort_bam:
         prefix="Intermediate/cobinning/{bingroup}/bams/tmp.{sample}",
     threads: 2
     resources:
-        mem_mb=config["simplejob_mem"] *1000,
-        time_min=int(config["runtime"]["simplejob"]*60),
+        mem_mb=config["simplejob_mem"] * 1000,
+        time_min=int(config["runtime"]["simplejob"] * 60),
     log:
         "logs/cobinning/{bingroup}/mapping/sortbam/{sample}.log",
     conda:
@@ -175,14 +176,14 @@ rule summarize_bam_contig_depths:
         "logs/cobinning/{bingroup}/combine_coverage.log",
     conda:
         "../envs/metabat.yaml"
-    threads: config["threads"] # multithreaded trough OMP_NUM_THREADS
+    threads: config["threads"]  # multithreaded trough OMP_NUM_THREADS
     benchmark:
         "logs/benchmarks/cobinning/{bingroup}/summarize_bam_contig_depths.tsv"
     resources:
-        mem_mb=config["mem"]*1000,
-        time_min = config["runtime"]["long"]*60
+        mem_mb=config["mem"] * 1000,
+        time_min=config["runtime"]["long"] * 60,
     params:
-        minid = config["cobinning_readmapping_id"] *100
+        minid=config["cobinning_readmapping_id"] * 100,
     shell:
         "jgi_summarize_bam_contig_depths "
         " --percentIdentity {params.minid} "
@@ -218,8 +219,8 @@ rule run_vamb:
         "../envs/vamb.yaml"
     threads: config["threads"]
     resources:
-        mem_mb=config["mem"]*1000,
-        time_min=config["runtime"]["long"]*60,
+        mem_mb=config["mem"] * 1000,
+        time_min=config["runtime"]["long"] * 60,
     log:
         "logs/cobinning/run_vamb/{bingroup}.log",
     benchmark:
@@ -258,7 +259,7 @@ rule parse_vamb_output:
         fasta_extension=".fna",
         output_path=lambda wc: vamb_cluster_attribution_path,  # path with {sample} to replace
         samples=SAMPLES,
-        bingroups = sampleTable.BinGroup.unique()
+        bingroups=sampleTable.BinGroup.unique(),
     conda:
         "../envs/fasta.yaml"
     script:

diff --git a/workflow/rules/derep.smk b/workflow/rules/derep.smk
@@ -42,7 +42,6 @@ rule skani_2_parquet:
     threads: 1
     run:
         try:
-
             skani_column_dtypes = {
                 "Ref_file": "category",
                 "Query_file": "category",

diff --git a/workflow/rules/genecatalog.smk b/workflow/rules/genecatalog.smk
@@ -50,7 +50,6 @@ if config["genecatalog"]["source"] == "contigs":
             cat_files(input.fna, output.fna)
             cat_files(input.short, output.short)
 
-
 else:
 
     localrules:
@@ -175,15 +174,15 @@ if (config["genecatalog"]["clustermethod"] == "linclust") or (
             "logs/Genecatalog/clustering/generate_orf_info.log",
         script:
             "../scripts/generate_orf_info.py"
-# cluster genes with cd-hit-est
 
 
+# cluster genes with cd-hit-est
+
 
 elif config["genecatalog"]["clustermethod"] == "cd-hit-est":
 
     include: "cdhit.smk"
 
-
 else:
     raise Exception(
         "Didn't understood the genecatalog clustermethod: {}".format(
@@ -388,7 +387,6 @@ rule combine_gene_coverages:
 old_subset_folder = Path("Genecatalog/subsets/genes")
 new_subset_folder = "Intermediate/genecatalog/subsets"
 if old_subset_folder.exists():
-
     logger.info(f"I move {old_subset_folder} to {new_subset_folder}")
 
     import shutil
@@ -416,7 +414,6 @@ checkpoint gene_subsets:
 
 
 def get_subset_names(wildcards):
-
     dir_for_subsets = Path(checkpoints.gene_subsets.get(**wildcards).output[0])
     subset_names = glob_wildcards(str(dir_for_subsets / "{subset}.faa")).subset
 
@@ -518,7 +515,6 @@ rule combine_egg_nogg_annotations:
         time=config["runtime"]["default"],
     run:
         try:
-
             import pandas as pd
 
             Tables = [
@@ -538,7 +534,6 @@ rule combine_egg_nogg_annotations:
 
             combined.to_parquet(output[0], index=False)
         except Exception as e:
-
             import traceback
 
             with open(log[0], "w") as logfile:
@@ -566,7 +561,6 @@ rule convert_eggNOG_tsv2parquet:
             df.to_parquet(output[0], index=False)
 
         except Exception as e:
-
             import traceback
 
             with open(log[0], "w") as logfile:
@@ -618,7 +612,6 @@ rule DRAM_annotate_genecatalog:
 
 
 def combine_genecatalog_dram_input(wildcards):
-
     all_subsets = get_subset_names(wildcards)
 
     return expand(

diff --git a/workflow/rules/genomes.smk b/workflow/rules/genomes.smk
@@ -90,7 +90,7 @@ rule get_contig2genomes:
                 if ext == ".gz":
                     bin_name = os.path.splitext(bin_name)[0]
 
-                # write names of contigs in mapping file
+                    # write names of contigs in mapping file
                 with open(fasta) as f:
                     for line in f:
                         if line[0] == ">":
@@ -226,7 +226,6 @@ if config["genome_aligner"] == "minimap":
         wrapper:
             "v1.19.0/bio/minimap2/aligner"
 
-
 elif config["genome_aligner"] == "bwa":
 
     rule index_genomes:
@@ -262,7 +261,6 @@ elif config["genome_aligner"] == "bwa":
         wrapper:
             "v1.19.0/bio/bwa-mem2/mem"
 
-
 else:
     raise Exception(
         "'genome_aligner' not understood, it should be 'minimap' or 'bwa', got '{genome_aligner}'. check config file".format(
Original file line number	Diff line number	Diff line change
Expand Up		@@ -62,7 +62,6 @@ def prepare_sample_table_for_atlas(

		sample_table["BinGroup"] = "All"


		validate_sample_table(sample_table)

		sample_table.to_csv(outfile, sep="\t")
Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -236,8 +236,6 @@ def make_config(
		)




		def validate_config(config, workflow):
		conf = load_configfile(config)

Expand Down