forked from jcame/WholeGenomeAssembly
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSingle_Genome_Assembly_script_23.08.2021.sh
99 lines (43 loc) · 1.89 KB
/
Single_Genome_Assembly_script_23.08.2021.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
## Quality Control
trimmomatic PE -threads 4 -phred33 *R1* *R2* PF1.fq UF1.fq PF2.fq UF2.fq ILLUMINACLIP:NexteraPE-PE.fa:2:30:10 LEADING:20 TRAILING:20 MINLEN:50
cat PF1.fq UF1.fq > forward.fq
cat PF2.fq UF2.fq > reverse.fq
rm PF1.fq
rm UF1.fq
rm PF2.fq
rm UF2.fq
## Dereplication
seqkit rmdup forward.fq -s -o DPF1.fq -j 4
seqkit rmdup reverse.fq -s -o DPF2.fq -j 4
rm forward.fq
rm reverse.fq
## Deconvolution
bbduk.sh in=DPF1.fq out=forward.fq ref=phiX/phi_X174_phage.fa k=31 hdist=1
bbduk.sh in=DPF2.fq out=reverse.fq ref=phi_X174_phage.fa k=31 hdist=1
rm DPF1.fq
rm DPF2.fq
## Pairing sequences (paired & unpaired)
fastq_pair forward.fq reverse.fq
rm forward.fq
rm reverse.fq
cat *single.fq > unpaired.fq
rm *single.fq
## Denovo Assembly using (i) Illumina
spades.py --pe1-1 forward.fq.paired.fq --pe1-2 reverse.fq.paired.fq --pe1-s unpaired.fq -o spades_folder_illumina -t 4 -m 7 --only-assembler
## Selecting long scaffolds (i) Illumina
seqkit seq spades_folder_illumina/scaffolds.fasta -m 2000 -g > scaffolds_2k_illumina.fasta
## results (i) Illumina
grep ">" scaffolds_2k_illumina.fasta
## Quality Control for ONT reads
cat BC10.fastq | NanoFilt -q 7 -l 500 --readtype 1D > BC10_HQ.fq
## Denovo Assembly using (ii) Illumina hybrid ONT
spades.py --pe1-1 forward.fq.paired.fq --pe1-2 reverse.fq.paired.fq --pe1-s unpaired.fq -o spades_folder_hybrid -t 4 -m 7 --only-assembler --nanopore BC10_HQ.fq
## Selecting long scaffolds (ii) Illumina hybrid ONT
seqkit seq spades_folder_hybrid/scaffolds.fasta -m 2000 -g > scaffolds_2k_hybrid.fasta
## results (ii) Illumina hybrid ONT
grep ">" scaffolds_2k_hybrid.fasta
## Cleaning
rm *fq
## Comparing Assemblies
makeblastdb -in scaffolds_2k_hybrid.fasta -dbtype nucl
blastn -query scaffolds_2k_illumina.fasta -db scaffolds_2k_hybrid.fasta -outfmt 6 -evalue 10e-5 -num_threads 4 -strand both -subject_besthit > OutSelfBlast.txt