-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathAffinityCommunities.py
127 lines (103 loc) · 4.62 KB
/
AffinityCommunities.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
#!/usr/bin/env python
# Make a nexus file of the trees in each affinity community. Make consensus tree of each affinity comunity
# Usage AffinityCommunities.py 'Path/To/Treeset' Model Plateau
# ****The TreeScaper developers have changed what number the indices start from several times. The may start counting at 0 or 1. Make sure all the indices used
# in this script are right (for communities and trees). Adjust the lines searching [&U] for unrooted trees or [&R] for rooted trees according to which type of trees you are using
#Output files
# AffinityCom[number].nex: a nexus file of the trees in an affinity community
# AffinityCom[number].nex.con: consensus tree of an affinity community
# AffinityCom[number].nex.con.pdf: pdf of consensus tree of an affinity community
# AffinityCommunitiesTreeCount.txt: a file to count the frequency and relative frequency of trees in each affinity community
from __future__ import division
import re
import os
import sys
import fnmatch
from shutil import move
from tempfile import mkstemp
def reg_ex_match(file, pattern):
#returns the first match of a reg ex search
file.seek(0)
for line in file:
m = pattern.match(line)
if m:
return m.group(1)
def edit_treeset(treeFileEditPath):
#add comment blocks that number each tree with the indices used by TreeScaper
print treeFileEditPath
treeFileEdit = open(treeFileEditPath, 'r')
lineNum = 1
#make a temp file
fh, absPath = mkstemp()
tempFile = open(absPath,'w')
for line in treeFileEdit:
if line.find('[&U]') != -1: #Need to have this in every line with a tree! This will be [&U] for unrooted trees and [&R] for rooted
tempFile.write(line.replace('=','['+str(lineNum)+']='))
lineNum += 1
elif line.find('[&R]') != -1: #Need to have this in every line with a tree! This will be [&U] for unrooted trees and [&R] for rooted
tempFile.write(line.replace('=','['+str(lineNum)+']='))
lineNum += 1
else:
tempFile.write(line)
#close temp file
tempFile.close()
os.close(fh)
treeFileEdit.close()
#Remove original file
os.remove(treeFileEditPath)
#Move new file
move(absPath, treeFileEditPath)
return lineNum
def affinityCommunityConsensus(treeFile,model,plateau,rooted):
if model not in ('CPM','ERNM','CNM','NNM'):
print "invalid model choose CPM, ERNM, CNM, or NNM"
model = raw_input('Enter Model: ')
print plateau
#outputs community structure for current plateau value
os.system("/home/vestige/Documents/BrownLabJash/Programs/bin/treescaper_scripts_2017/CLVTreeScaper -trees "+\
"-f %s -ft Trees -w 0 -r %s -o Community -t Affinity -dm URF -am Exp -cm %s -lm manu -lp %s -ln 0" % (treeFile, rooted, model, plateau)+\
" > Affinity%s_%s_community.out" % (model, plateau))
#getting number of communities from the output files
comFile = open('Affinity%s_%s_community.out' % (model, plateau) , 'r' )
pattern = re.compile('Number of communities: (\d+)')
coms = int(reg_ex_match(comFile, pattern))
print
print coms
totalTrees = edit_treeset(treeFile)
treeFile = open(treeFile,'r')
#make a file to count the frequency and relative frequency of trees in each community
treeCountFile = open("AffinityCommunitiesTreeCount.txt", 'w')
#get the tree indices from each community
for i in range(1, coms+1):
pattern = re.compile('Community '+str(i)+' includes nodes: (.+)')
comStr = reg_ex_match(comFile, pattern)
comLs = comStr.split(",")
print comLs
comLs = filter(None, comLs)
#Pull out these trees from the original trees. Make a nexus file containing the trees for each community
comTreeSetStr = 'AffinityCom'+str(i)+'.nex'
comTreeSet = open(comTreeSetStr,'w')
treeCount = 0
treeFile.seek(0)
for line in treeFile:
if line.find('[&U]') == -1 & line.find('[&R]') == -1:
comTreeSet.write(line)
else:
for j in comLs:
if line.find('['+str(j)+']') != -1:
comTreeSet.write(line)
treeCount += 1
#Write frequency and relative frequency of trees in the community
treeCountFile.write("%s\t%s\t%.2f%% of trees\n" % (comTreeSetStr, str(treeCount),100*(treeCount/totalTrees)))
comTreeSet.close()
#Make a consensus tree of the affinity community
comTreeConStr = comTreeSetStr + ".con"
if rooted == '0':
os.system("sumtrees.py -r --unrooted -o %s %s" % (comTreeConStr,comTreeSetStr))
if rooted == '1':
os.system("sumtrees.py -r --rooted -o %s %s" % (comTreeConStr,comTreeSetStr))
os.system("cat ~/Documents/BrownLabJash/Programs/bin/SeqSim/FigTreeBlock.txt >> %s" % (comTreeConStr))
#Make a pdf of the consensus tree
os.system("figtree -graphic PDF %s %s.pdf" % (comTreeConStr, comTreeConStr))
if __name__=='__main__':
affinityCommunityConsensus(sys.argv[1],sys.argv[2],sys.argv[3],sys.argv[4])