Skip to content

Commit

Permalink
Allow choosing preferred mappings
Browse files Browse the repository at this point in the history
  • Loading branch information
bgyori committed Jun 18, 2024
1 parent 0d5ac1b commit 601791b
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 5 deletions.
13 changes: 12 additions & 1 deletion indra/databases/hgnc_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,6 +422,10 @@ def get_hgnc_name_from_mgi_name(mgi_name: str) -> Union[str, None]:
def _read_hgnc_maps():
hgnc_file = get_resource_path("hgnc_entries.tsv")
csv_rows = read_unicode_csv(hgnc_file, delimiter='\t', encoding='utf-8')
hgnc_uniprot_preferred = get_resource_path("hgnc_uniprot_preferred.csv")
csv_rows_uniprot_preferred = \
read_unicode_csv(hgnc_uniprot_preferred, delimiter=',',
encoding='utf-8')
hgnc_names = {}
hgnc_ids = {}
hgnc_withdrawn = []
Expand Down Expand Up @@ -515,19 +519,26 @@ def _read_hgnc_maps():
for old_id, new_id in hgnc_withdrawn_new_ids.items():
hgnc_names[old_id] = hgnc_names[new_id]

uniprot_ids_preferred = {}
for row in csv_rows_uniprot_preferred:
hgnc_id = row[0]
uniprot_id = row[1]
uniprot_ids_preferred[hgnc_id] = uniprot_id

return (
hgnc_names, hgnc_ids, hgnc_withdrawn,
uniprot_ids, entrez_ids, entrez_ids_reverse, mouse_map, rat_map,
prev_sym_map, ensembl_ids, ensembl_ids_reverse, gene_types,
dict(hgnc_to_enzymes), dict(enzyme_to_hgncs),
uniprot_ids_preferred
)


(
hgnc_names, hgnc_ids, hgnc_withdrawn, uniprot_ids, entrez_ids,
entrez_ids_reverse, mouse_map, rat_map, prev_sym_map, ensembl_ids,
ensembl_ids_reverse, gene_type,
hgnc_to_enzymes, enzyme_to_hgncs,
hgnc_to_enzymes, enzyme_to_hgncs, uniprot_ids_preferred
) = _read_hgnc_maps()


Expand Down
12 changes: 8 additions & 4 deletions indra/ontology/bio/ontology.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class BioOntology(IndraOntology):
# should be incremented to "force" rebuilding the ontology to be consistent
# with the underlying resource files.
name = 'bio'
version = '1.33'
version = '1.34'
ontology_namespaces = [
'go', 'efo', 'hp', 'doid', 'chebi', 'ido', 'mondo', 'eccode',
]
Expand Down Expand Up @@ -147,11 +147,15 @@ def add_hgnc_uniprot_entrez_xrefs(self):
from indra.databases import hgnc_client
from indra.databases import uniprot_client
edges = []
for hid, uid in hgnc_client.uniprot_ids.items():
uids = uid.split(', ')
for hid, upid in hgnc_client.uniprot_ids.items():
uids = upid.split(', ')
preferred = hgnc_client.uniprot_ids_preferred.get(hid)
if preferred:
uids = [preferred]
for uid in uids:
edge_data = {'type': 'xref', 'source': 'hgnc'}
edges.append((self.label('HGNC', hid), self.label('UP', uid),
{'type': 'xref', 'source': 'hgnc'}))
edge_data))
self.add_edges_from(edges)

edges = [(self.label('UP', uid), self.label('HGNC', hid),
Expand Down

0 comments on commit 601791b

Please sign in to comment.