Skip to content

Commit

Permalink
accommodate gap characters, closes #38
Browse files Browse the repository at this point in the history
  • Loading branch information
wsdewitt committed May 26, 2019
1 parent 55ae488 commit 06d5942
Showing 1 changed file with 3 additions and 2 deletions.
5 changes: 3 additions & 2 deletions bin/phylip_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def parse_seqdict(fh, mode='dnaml'):
if mode == 'dnaml':
patterns = re.compile("^\s*(?P<id>[a-zA-Z0-9>_.-]*)\s+(?P<seq>[a-zA-Z \-]+)")
elif mode == 'dnapars':
patterns = re.compile("^\s*\S+\s+(?P<id>[a-zA-Z0-9>_.-]*)\s+(yes\s+|no\s+|maybe\s+)?(?P<seq>[a-zA-Z \-]+)")
patterns = re.compile("^\s*\S+\s+(?P<id>[a-zA-Z0-9>_.-]*)\s+(yes\s+|no\s+|maybe\s+)?(?P<seq>[a-zA-Z \-\?]+)")
else:
raise ValueError('invalid mode '+mode)
fh.next()
Expand Down Expand Up @@ -115,11 +115,12 @@ def parse_outfile(outfile, countfile=None, naive='naive'):

def disambiguate(tree):
'''make random choices for ambiguous bases, respecting tree inheritance'''
ambiguous_dna_values['?'] = 'GATC-'
sequence_length = len(tree.sequence)
for node in tree.traverse():
for site in range(sequence_length):
base = node.sequence[site]
if base not in 'ACGT':
if base not in 'ACGT-':
new_base = random.choice(ambiguous_dna_values[base])
for node2 in node.traverse(is_leaf_fn=lambda n: False if base in [n2.sequence[site] for n2 in n.children] else True):
if node2.sequence[site] == base:
Expand Down

0 comments on commit 06d5942

Please sign in to comment.