-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathHPO.py
122 lines (110 loc) · 4.14 KB
/
HPO.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
'''
hpo objects, including doing analyses such as hpo similarity, and group of hpos similarities
hp.obo can be downloaded from: http://www.obofoundry.org/ontology/hp.html
ALL_SOURCES_ALL_FREQUENCIES_phenotype_to_genes.txt.gz can be downloaded from https://github.com/moonso/phizz/blob/master/phizz/resources/ALL_SOURCES_ALL_FREQUENCIES_phenotype_to_genes.txt.gz
'''
import requests
import json
import os
import time
import Genes
from CommonFuncs import *
from collections import defaultdict
from sqlite_utils import *
import pandas # for reading csv and dumping to sqlite
def _initiate_db(db_conn):
db_c = db_conn.cursor()
db_c.execute('''CREATE TABLE IF NOT EXISTS hpo
(id text PRIMARY KEY UNIQUE, name text, alt_id text, parents text, ancestors text, genes text)''')
db_conn.commit()
def _flatten_array_of_arrays(arrs):
# flatten array of arrays
result = []
[result.extend(i) for i in arrs]
return result
def _fetch_one(self,field):
db_c = self.db_conn.cursor()
db_c.execute('SELECT * FROM hpo WHERE id=?',(self._id,))
db_hpo = dict_factory(db_c,db_c.fetchone())
if db_hpo == None or db_hpo[field] == None:
# first check if hpo db has been constructed
raise ValueError('no %s can be retrieved' % field)
return db_hpo[field]
class Hpo:
def __init__(self,db_conn,id=None):
_initiate_db(db_conn)
self.db_conn = db_conn
self._check_db()
self.id = id
if id:
# alt id?
sql = 'SELECT id FROM hpo WHERE id=?'
db_c = db_conn.cursor()
db_c.execute(sql,(id,))
data = db_c.fetchone()
if data:
self._id = self.id
else:
#look into alt_id
sql = "SELECT id FROM hpo WHERE alt_id LIKE ?"
db_c.execute(sql,('%'+id+'%',))
data = db_c.fetchone()
if not data:
msg = '%s cannot be recognised' % id
raise ValueError(msg)
self._id = data[0]
def _find_ancestors(self,id,anc,data):
# construct_db helper function, to find all ancestors of a node
is_a = data[id]['is_a']
if not is_a:
return anc
else:
anc.extend(is_a)
for i in is_a:
return self._find_ancestors(i,anc,data)
def _check_db(self):
# hpo has been constructed? if not, raise error. if yes, do nothing
db_c = self.db_conn.cursor()
db_c.execute('SELECT * FROM hpo WHERE id=?',('HP:0000001',))
db_hpo = dict_factory(db_c,db_c.fetchone())
if db_hpo == None:
self.construct_db()
def construct_db(self):
# construct hpo database using the data/hpo.csv file
csvfile = os.path.join(os.path.dirname(__file__),'data','hpo.csv')
df = pandas.read_csv(csvfile)
df.to_sql('hpo', self.db_conn, if_exists='replace', index=False)
@property
def name(self):
if getattr(self,'_name',None) is None:
self._name = _fetch_one(self,'name')
return self._name
@property
def alt_ids(self):
if getattr(self,'_alt_ids',None) is None:
self._alt_ids = json.loads(_fetch_one(self,'alt_id'))
return self._alt_ids
@property
def parents(self):
if getattr(self,'_parents',None) is None:
self._parents = json.loads(_fetch_one(self,'parents'))
return self._parents
@property
def ancestors(self):
if getattr(self,'_ancestors',None) is None:
self._ancestors = json.loads(_fetch_one(self,'ancestors'))
return self._ancestors
@property
def genes(self):
if getattr(self,'_genes',None) is None:
self._genes = json.loads(_fetch_one(self,'genes'))
return self._genes
def names_to_ids(self, names):
# translate an array of names to a dictionary of name => keys
db_c = self.db_conn.cursor()
result = batch_query(db_c,'HPO',names,pointer='name')
data = {}
for i in result:
temp = dict_factory(db_c, i)
data[temp['name']] = temp['id']
return data