This repository has been archived by the owner on Dec 6, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathSnakefile
128 lines (91 loc) · 2.97 KB
/
Snakefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
configfile: "config.yaml"
import os,sys
scripts_dir= os.path.join(os.path.dirname(os.path.abspath(workflow.snakefile)),"scripts")
sys.path.append(scripts_dir)
Files_to_import={
"genomes/checkm/completeness.tsv": "Results/genome_completeness.tsv",
'genomes/counts/raw_counts_genomes.tsv': 'Results/counts/raw_counts_genomes.tsv',
"genomes/counts/median_coverage_genomes.tsv": "Results/counts/median_coverage_genomes.tsv",
"genomes/annotations/KO.tsv": "Results/annotations/KO.tsv" ,
"genomes/annotations/CAZy.tsv": "Results/annotations/CAZy.tsv",
}
rule all:
input:
"Results/Summary.html"
onsuccess:
print(f"Analysis finished. Have a look at the {os.path.abspath('.')}/Results/Summary.html")
localrules: analyze, get_taxonomy, import_files
rule import_files:
input:
Files_to_import.keys()
output:
Files_to_import.values()
run:
import shutil
for file in input:
shutil.copy(file,Files_to_import[file])
rule get_taxonomy:
input:
"genomes/taxonomy/gtdb/gtdbtk.bac120.summary.tsv"
output:
"Results/taxonomy.tsv"
script:
"scripts/get_taxonomy.py"
# rule copy_scripts:
# input:
# f"{scripts_dir}/utils",
# f"{scripts_dir}/Analyis_genome_abundances.ipynb"
# output:
# directory("Results/scripts"),
#
#
# run:
# os.makedirs(output[0])
#
# import shutil
# for f in input:
# shutil.copy(f, output[0])
rule analyze:
input:
"Results/taxonomy.tsv",
"Results/mapping_rate.tsv",
Files_to_import.values(),
expand("Results/annotations/{category}.tsv",category=['KO','CAZy']),
log:
# optional path to the processed notebook
notebook="Results/Code.ipynb"
notebook:
"scripts/Analyis_genome_abundances.ipynb"
rule convert_nb:
input:
"Results/Code.ipynb"
output:
"Results/Summary.html"
shell:
"jupyter nbconvert --output Summary --to=html --TemplateExporter.exclude_input=True {input}"
rule get_mapping_rate:
input:
read_counts= "stats/read_counts.tsv",
mapped='genomes/counts/raw_counts_genomes.tsv'
output:
"Results/mapping_rate.tsv"
run:
import pandas as pd
Nreads= pd.read_table(input.read_counts, index_col=[0,1])
Counts= pd.read_csv(input.mapped,index_col=0,sep='\t').T
mapping_rate = Counts.sum(1)/( 2* Nreads.Total_Reads.unstack().QC)
mapping_rate.name='Mapping_rate'
mapping_rate.to_csv(output[0],sep='\t',header=True)
rule get_annotations:
input:
"genomes/annotations/gene2genome.tsv.gz",
"Genecatalog/annotations/eggNog.tsv.gz"
output:
expand("genomes/annotations/{category}.tsv",category=['KO','CAZy']),
expand("Genecatalog/annotations/{category}.tsv",category=['KO','CAZy']),
resources:
mem= config['mem']
threads:
1
script:
"scripts/get_annoatations.py"