-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrunReference.sh
71 lines (46 loc) · 1.78 KB
/
runReference.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#!/usr/bin/env bash
# mamba create -n phyloeuk -c bioconda -c conda-forge trimal mamba mafft busco=5 fasttree perl-bioperl perl-file-slurp bioawk iqtree
source activate phyloeuk
cd reference
ls *faa > ../listReference.txt
for i in *faa
do busco -m prot -c 8 -i $i -o ${i%.faa}_busco -l eukaryota
done
rm full_table_all.tsv
for i in *_busco/run_eukaryota_odb10/full_table.tsv
do cat $i >> full_table_all.tsv
done
awk '$2 ~ /Complete/' full_table_all.tsv | cut -f1 | sort | uniq -c | perl -pe 's/ +/\t/g' | awk '$2 > 30' | cut -f3 > ../listGenes.tsv
mkdir BuscoGenes
while read -r line; do mkdir BuscoGenes/$line\.faa; done < ../listGenes.tsv
for i in *busco
do for j in $i/run_eukaryota_odb10/busco_sequences/single_copy_busco_sequences/*faa
do cp $j BuscoGenes/${j#$i/run_eukaryota_odb10/busco_sequences/single_copy_busco_sequences/}/${i%_busco}\_${j#$i/run_eukaryota_odb10/busco_sequences/single_copy_busco_sequences/} 2>/dev/null
done
done
cd BuscoGenes
rm toRenameFasta.sh
for i in *.faa
do for j in $i/*
do echo perl -pe \'"s/>/>${j#$i/}/g"\' $j \> ${j%.faa}_rename.faa
done
done >> toRenameFasta.sh
bash toRenameFasta.sh
mkdir concatGenes
for i in *faa
do cat $i/*rename.faa > concatGenes/$i
done
cd concatGenes
mkdir align
for i in *faa
do mafft --auto --quiet --thread 8 $i > align/${i%.faa}\_align.faa
done
for i in align/*align.faa
do trimal -in $i -out ${i%.faa}_trim.faa -gt 0.5 -cons 25
done
cd ../../..
perl scripts/concatenateFastaReference.pl reference/BuscoGenes/concatGenes/align listReference.txt
## Check to see if all the sequences have the same size
bioawk -c fastx '{ print $name, length($seq) }' < Reference_align.fasta | cut -f2 | sort | uniq
## Make phylogenetic tree
iqtree2 -T AUTO --alrt 1000 -B 1000 -s Reference_align.fasta -m TEST