-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrosalind.py
64 lines (54 loc) · 1.14 KB
/
rosalind.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import collections
import io
import itertools
def parse_fasta(fasta_file):
nucleotide = ""
for line in fasta_file:
if line[0] == ">":
if nucleotide != "":
yield nucleotide
nucleotide = ""
else:
nucleotide += line.strip()
yield nucleotide
fasta = """>Rosalind_1
GATTACA
>Rosalind_2
TAGACCA
>Rosalind_3
ATACA
"""
fasta2 = """>Rosalind_1
GAT
TACA
>Rosalind_2
TAG
ACCA
>Rosalind_3
ATA
CA
"""
def test_parse_fasta():
expected = [
"GATTACA",
"TAGACCA",
"ATACA",
]
dna = list(parse_fasta(io.StringIO(fasta)))
assert dna == expected
def test_parse_fasta2():
expected = [
"GATTACA",
"TAGACCA",
"ATACA",
]
dna = list(parse_fasta(io.StringIO(fasta2)))
assert dna == expected
def sliding_window(iterable, n):
"""Collect data into overlapping fixed-length chunks or blocks."""
"""sliding_window('ABCDEFG', 4) --> ABCD BCDE CDEF DEFG"""
it = iter(iterable)
window = collections.deque(itertools.islice(it, n - 1), maxlen=n)
for x in it:
window.append(x)
yield tuple(window)