-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy path02inferTs.py
56 lines (41 loc) · 1.24 KB
/
02inferTs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# USAGE: python 02inferTs.py infile genomeLength nDiploids
import tsinfer
import sys
import numpy as np
import logging
log = logging.getLogger('logger')
log.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(asctime)s - %(message)s')
ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
ch.setFormatter(formatter)
log.addHandler(ch)
infi = sys.argv[1]
log.info("Infile is %s" % infi)
ll = int(sys.argv[2])
log.info("Genome size set ot %d" % ll)
nInd = int(sys.argv[3])
log.info("Expecting %d diploid individuals" % nInd)
log.info("Adding site data to samples object")
samp = tsinfer.SampleData(sequence_length=ll)
for i in range(nInd):
samp.add_individual(ploidy=2)
c = 0
last=-1
with open(infi) as f:
for line in f:
c+=1
if c % 50000 == 0: log.info("Processed %d variant sites." % c)
if c > 6:
a = line.strip().split(" ")
if int(float(a[0])*ll) != last:
samp.add_site(float(a[0])*ll, np.array(a[2:], dtype=np.int16))
last=int(float(a[0])*ll)
samp.finalise()
log.info("Running inference...")
ts = tsinfer.infer(samp)
log.info("Writing TS and VCF...")
ts.dump(infi + ".inf.ts")
with open(infi + ".inf.vcf", "w") as f:
ts.write_vcf(f)
log.info("Done.")