From 259eef893d7f2c075a4e137fae76f9e5dbf007ff Mon Sep 17 00:00:00 2001 From: korikuzma Date: Wed, 4 May 2022 18:51:40 -0400 Subject: [PATCH 1/4] feat: add regulatory_approval extension to therapy descriptors --- metakb/database.py | 9 +++++ metakb/normalizers.py | 68 +++++++++++++++++++++++++++++++++-- metakb/query.py | 10 +++++- metakb/transform/civic.py | 8 +++-- metakb/transform/moa.py | 5 ++- tests/conftest.py | 76 ++++++++++++++++++++++++++++++++++++++- 6 files changed, 168 insertions(+), 8 deletions(-) diff --git a/metakb/database.py b/metakb/database.py index b70ba308..6093bbee 100644 --- a/metakb/database.py +++ b/metakb/database.py @@ -203,6 +203,15 @@ def _add_descriptor(tx, descriptor: Dict, added_ids: Set[str]): 'description', 'xrefs', 'alternate_labels')) + if descr_type == 'TherapyDescriptor': + # handle extensions field in therapy descriptor + extensions = descriptor.get('extensions', []) + for ext in extensions: + name = ext['name'] + if name == 'regulatory_approval': + descriptor[name] = json.dumps(ext['value']) + descr_keys += f", {name}:${name}" + query = f''' MERGE (descr:{descr_type} {{ {descr_keys} }}) MERGE (value:{value_type} {{ id:${value_id} }}) diff --git a/metakb/normalizers.py b/metakb/normalizers.py index 150f00cb..5e8e8eef 100644 --- a/metakb/normalizers.py +++ b/metakb/normalizers.py @@ -1,11 +1,11 @@ """Module for VICC normalizers.""" -from typing import Optional, Tuple +from typing import List, Optional, Tuple from ga4gh.vrsatile.pydantic.vrs_models import VRSTypes -from ga4gh.vrsatile.pydantic.vrsatile_models import VariationDescriptor +from ga4gh.vrsatile.pydantic.vrsatile_models import VariationDescriptor, Extension from variation.query import QueryHandler as VariationQueryHandler from therapy.query import QueryHandler as TherapyQueryHandler -from therapy.schemas import NormalizationService as NormalizedTherapy +from therapy.schemas import NormalizationService as NormalizedTherapy, ApprovalRating from disease.query import QueryHandler as DiseaseQueryHandler from disease.schemas import NormalizationService as NormalizedDisease from gene.query import QueryHandler as GeneQueryHandler @@ -133,3 +133,65 @@ def normalize_therapy(self, queries)\ if highest_match == 100: break return therapy_norm_resp, normalized_therapy_id + + @staticmethod + def get_regulatory_approval_extension(therapy_norm_resp: NormalizedTherapy) -> List: + """Given therapy normalization service response, extract out the regulatory + approval extension + + :param NormalizedTherapy therapy_norm_resp: Response from normalizing therapy + :return: List containing regulatory approval extension if it exists + """ + therapy_norm_resp = therapy_norm_resp.dict() + tn_resp_exts = therapy_norm_resp.get("therapy_descriptor", {}).get("extensions") + tn_resp_exts = tn_resp_exts if tn_resp_exts else [] + regulatory_approval_extension = list() + + for ext in tn_resp_exts: + if ext["name"] == "regulatory_approval": + ext_value = ext["value"] + approval_ratings = ext_value.get("approval_ratings", []) + matched_ext_value = None + + if any(ar in [ApprovalRating.FDA_PRESCRIPTION, ApprovalRating.FDA_OTC] + for ar in approval_ratings): + matched_ext_value = "FDA" + if ApprovalRating.FDA_DISCONTINUED in approval_ratings: + if ApprovalRating.CHEMBL_4 not in approval_ratings: + matched_ext_value = None + elif ApprovalRating.CHEMBL_4 in approval_ratings: + matched_ext_value = "chembl_phase_4" + + if matched_ext_value: + has_indications = ext_value.get("has_indication", []) + matched_indications = list() + + for indication in has_indications: + indication_exts = indication.get("extensions", []) + for indication_ext in indication_exts: + if indication_ext["value"] == matched_ext_value: + matched_indications.append({ + "id": indication["id"], + "type": indication["type"], + "label": indication["label"], + "disease_id": indication["disease_id"] + }) + + if matched_ext_value == "FDA": + approval_rating = "FDA" + else: + approval_rating = "ChEMBL" + + regulatory_approval_extension.append( + Extension( + name="regulatory_approval", + value={ + "approval_rating": approval_rating, + "has_indications": matched_indications + } + ) + ) + + break + + return regulatory_approval_extension diff --git a/metakb/query.py b/metakb/query.py index db1b8a9d..ae2e9dd5 100644 --- a/metakb/query.py +++ b/metakb/query.py @@ -836,9 +836,17 @@ def _get_therapy_descriptor( "label": therapy_descriptor.get("label"), "therapy_id": None, "alternate_labels": therapy_descriptor.get("alternate_labels"), - "xrefs": therapy_descriptor.get("xrefs") + "xrefs": therapy_descriptor.get("xrefs"), + "extensions": [] } + key = "regulatory_approval" + val = therapy_descriptor.get(key) + if val: + td_params["extensions"].append(Extension(name=key, value=json.loads(val))) + else: + del td_params["extensions"] + with self.driver.session() as session: value_object = session.read_transaction( self._find_descriptor_value_object, td_params["id"] diff --git a/metakb/transform/civic.py b/metakb/transform/civic.py index 98a235d2..964a1ac1 100644 --- a/metakb/transform/civic.py +++ b/metakb/transform/civic.py @@ -679,7 +679,7 @@ def _get_therapy_descriptor(self, drug) \ ncit_id = f"ncit:{drug['ncit_id']}" queries = [ncit_id, label] - _, normalized_therapy_id = \ + therapy_norm_resp, normalized_therapy_id = \ self.vicc_normalizers.normalize_therapy(queries) if not normalized_therapy_id: @@ -687,13 +687,17 @@ def _get_therapy_descriptor(self, drug) \ f"using queries {ncit_id} and {label}") return None + regulatory_approval_extension = \ + self.vicc_normalizers.get_regulatory_approval_extension(therapy_norm_resp) # noqa: E501 + therapy_descriptor = ValueObjectDescriptor( id=therapy_id, type="TherapyDescriptor", label=label, therapy_id=normalized_therapy_id, alternate_labels=drug['aliases'], - xrefs=[ncit_id] + xrefs=[ncit_id], + extensions=regulatory_approval_extension if regulatory_approval_extension else None # noqa: E501 ).dict(exclude_none=True) return therapy_descriptor diff --git a/metakb/transform/moa.py b/metakb/transform/moa.py index b0fd38a8..dcca545a 100644 --- a/metakb/transform/moa.py +++ b/metakb/transform/moa.py @@ -381,12 +381,15 @@ def _get_therapy_descriptors(self, assertion): return [] if normalized_therapy_id: + regulatory_approval_extension = \ + self.vicc_normalizers.get_regulatory_approval_extension(therapy_norm_resp) # noqa: E501 therapy_descriptor = ValueObjectDescriptor( id=f"{schemas.SourceName.MOA.value}." f"{therapy_norm_resp.therapy_descriptor.id}", type="TherapyDescriptor", label=label, - therapy_id=normalized_therapy_id + therapy_id=normalized_therapy_id, + extensions=regulatory_approval_extension if regulatory_approval_extension else None # noqa: E501 ).dict(exclude_none=True) else: return [] diff --git a/tests/conftest.py b/tests/conftest.py index 5a500898..15210b7e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -173,6 +173,29 @@ def civic_tid146(): ], "xrefs": [ "ncit:C66940" + ], + "extensions": [ + { + "type": "Extension", + "name": "regulatory_approval", + "value": { + "approval_rating": "FDA", + "has_indications": [ + { + "id": "hemonc:25316", + "type": "DiseaseDescriptor", + "label": "Non-small cell lung cancer Squamous", + "disease_id": None + }, + { + "id": "hemonc:642", + "type": "DiseaseDescriptor", + "label": "Non-small cell lung cancer", + "disease_id": "ncit:C2926" + } + ] + } + } ] } @@ -1192,7 +1215,58 @@ def moa_imatinib(): "id": "moa.normalize.therapy:Imatinib", "type": "TherapyDescriptor", "label": "Imatinib", - "therapy_id": "rxcui:282388" + "therapy_id": "rxcui:282388", + "extensions": [{ + "type": "Extension", + "name": "regulatory_approval", + "value": { + "approval_rating": "FDA", + "has_indications": [ + { + "id": "hemonc:634", + "type": "DiseaseDescriptor", + "label": "Myelodysplastic syndrome", + "disease_id": "ncit:C3247" + }, + { + "id": "hemonc:616", + "type": "DiseaseDescriptor", + "label": "Hypereosinophilic syndrome", + "disease_id": "ncit:C27038" + }, + { + "id": "hemonc:582", + "type": "DiseaseDescriptor", + "label": "Chronic myelogenous leukemia", + "disease_id": "ncit:C3174" + }, + { + "id": "hemonc:669", + "type": "DiseaseDescriptor", + "label": "Systemic mastocytosis", + "disease_id": "ncit:C9235" + }, + { + "id": "hemonc:24309", + "type": "DiseaseDescriptor", + "label": "Acute lymphoblastic leukemia", + "disease_id": "ncit:C3167" + }, + { + "id": "hemonc:667", + "type": "DiseaseDescriptor", + "label": "Soft tissue sarcoma", + "disease_id": "ncit:C9306" + }, + { + "id": "hemonc:602", + "type": "DiseaseDescriptor", + "label": "Gastrointestinal stromal tumor", + "disease_id": "ncit:C3868" + } + ] + } + }] } From b734e8b60208d752aeb41a1f5feab2fae206490d Mon Sep 17 00:00:00 2001 From: korikuzma Date: Wed, 4 May 2022 20:36:36 -0400 Subject: [PATCH 2/4] refactor: clean up getting regulatory approval extension --- metakb/normalizers.py | 99 ++++++++++++++++++--------------------- metakb/transform/civic.py | 4 +- metakb/transform/moa.py | 2 +- 3 files changed, 48 insertions(+), 57 deletions(-) diff --git a/metakb/normalizers.py b/metakb/normalizers.py index 5e8e8eef..e77794f6 100644 --- a/metakb/normalizers.py +++ b/metakb/normalizers.py @@ -1,5 +1,5 @@ """Module for VICC normalizers.""" -from typing import List, Optional, Tuple +from typing import Optional, Tuple from ga4gh.vrsatile.pydantic.vrs_models import VRSTypes from ga4gh.vrsatile.pydantic.vrsatile_models import VariationDescriptor, Extension @@ -135,63 +135,54 @@ def normalize_therapy(self, queries)\ return therapy_norm_resp, normalized_therapy_id @staticmethod - def get_regulatory_approval_extension(therapy_norm_resp: NormalizedTherapy) -> List: + def get_regulatory_approval_extension( + therapy_norm_resp: NormalizedTherapy + ) -> Optional[Extension]: """Given therapy normalization service response, extract out the regulatory approval extension :param NormalizedTherapy therapy_norm_resp: Response from normalizing therapy - :return: List containing regulatory approval extension if it exists + :return: Extension containing transformed regulatory approval and indication + data if it `regulatory_approval` extensions exists in therapy normalizer """ - therapy_norm_resp = therapy_norm_resp.dict() - tn_resp_exts = therapy_norm_resp.get("therapy_descriptor", {}).get("extensions") - tn_resp_exts = tn_resp_exts if tn_resp_exts else [] - regulatory_approval_extension = list() - - for ext in tn_resp_exts: - if ext["name"] == "regulatory_approval": - ext_value = ext["value"] - approval_ratings = ext_value.get("approval_ratings", []) - matched_ext_value = None - - if any(ar in [ApprovalRating.FDA_PRESCRIPTION, ApprovalRating.FDA_OTC] - for ar in approval_ratings): - matched_ext_value = "FDA" - if ApprovalRating.FDA_DISCONTINUED in approval_ratings: - if ApprovalRating.CHEMBL_4 not in approval_ratings: - matched_ext_value = None - elif ApprovalRating.CHEMBL_4 in approval_ratings: - matched_ext_value = "chembl_phase_4" - - if matched_ext_value: - has_indications = ext_value.get("has_indication", []) - matched_indications = list() - - for indication in has_indications: - indication_exts = indication.get("extensions", []) - for indication_ext in indication_exts: - if indication_ext["value"] == matched_ext_value: - matched_indications.append({ - "id": indication["id"], - "type": indication["type"], - "label": indication["label"], - "disease_id": indication["disease_id"] - }) - - if matched_ext_value == "FDA": - approval_rating = "FDA" - else: - approval_rating = "ChEMBL" - - regulatory_approval_extension.append( - Extension( - name="regulatory_approval", - value={ - "approval_rating": approval_rating, - "has_indications": matched_indications - } - ) - ) - - break + regulatory_approval_extension = None + tn_resp_exts = therapy_norm_resp.dict().get("therapy_descriptor", {}).get("extensions") or [] # noqa: E501 + tn_ext = [v for v in tn_resp_exts if v["name"] == "regulatory_approval"] + + if tn_ext: + ext_value = tn_ext[0]["value"] + approval_ratings = ext_value.get("approval_ratings", []) + matched_ext_value = None + + if any(ar in {ApprovalRating.FDA_PRESCRIPTION, ApprovalRating.FDA_OTC} + for ar in approval_ratings): + matched_ext_value = "FDA" + if ApprovalRating.FDA_DISCONTINUED in approval_ratings: + if ApprovalRating.CHEMBL_4 not in approval_ratings: + matched_ext_value = None + elif ApprovalRating.CHEMBL_4 in approval_ratings: + matched_ext_value = "chembl_phase_4" + + if matched_ext_value: + has_indications = ext_value.get("has_indication", []) + matched_indications = list() + + for indication in has_indications: + indication_exts = indication.get("extensions", []) + for indication_ext in indication_exts: + if indication_ext["value"] == matched_ext_value: + matched_indications.append({ + "id": indication["id"], + "type": indication["type"], + "label": indication["label"], + "disease_id": indication["disease_id"] + }) + + regulatory_approval_extension = Extension( + name="regulatory_approval", + value={ + "approval_rating": "FDA" if matched_ext_value == "FDA" else "ChEMBL", # noqa: E501 + "has_indications": matched_indications + }) return regulatory_approval_extension diff --git a/metakb/transform/civic.py b/metakb/transform/civic.py index 964a1ac1..a882e546 100644 --- a/metakb/transform/civic.py +++ b/metakb/transform/civic.py @@ -688,7 +688,7 @@ def _get_therapy_descriptor(self, drug) \ return None regulatory_approval_extension = \ - self.vicc_normalizers.get_regulatory_approval_extension(therapy_norm_resp) # noqa: E501 + self.vicc_normalizers.get_regulatory_approval_extension(therapy_norm_resp) therapy_descriptor = ValueObjectDescriptor( id=therapy_id, @@ -697,7 +697,7 @@ def _get_therapy_descriptor(self, drug) \ therapy_id=normalized_therapy_id, alternate_labels=drug['aliases'], xrefs=[ncit_id], - extensions=regulatory_approval_extension if regulatory_approval_extension else None # noqa: E501 + extensions=[regulatory_approval_extension] if regulatory_approval_extension else None # noqa: E501 ).dict(exclude_none=True) return therapy_descriptor diff --git a/metakb/transform/moa.py b/metakb/transform/moa.py index dcca545a..622d0369 100644 --- a/metakb/transform/moa.py +++ b/metakb/transform/moa.py @@ -389,7 +389,7 @@ def _get_therapy_descriptors(self, assertion): type="TherapyDescriptor", label=label, therapy_id=normalized_therapy_id, - extensions=regulatory_approval_extension if regulatory_approval_extension else None # noqa: E501 + extensions=[regulatory_approval_extension] if regulatory_approval_extension else None # noqa: E501 ).dict(exclude_none=True) else: return [] From 8a1e6c591be024d3c070af9f49a3b43d6d4e13e9 Mon Sep 17 00:00:00 2001 From: korikuzma Date: Wed, 4 May 2022 20:41:19 -0400 Subject: [PATCH 3/4] refactor: icrement version + LAST_UPDATED --- metakb/version.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/metakb/version.py b/metakb/version.py index e59846ec..a54c3dda 100644 --- a/metakb/version.py +++ b/metakb/version.py @@ -1,4 +1,4 @@ """MetaKB version""" # REQ: EACH TIME VERSION IS UPDATED, MUST ALSO UPDATE LAST_UPDATED -__version__ = "1.1.0-alpha.6" -LAST_UPDATED = "2022-04-07" +__version__ = "1.1.0-alpha.7" +LAST_UPDATED = "2022-05-04" From 932575732d8f6431a382c7c25899a104b6048768 Mon Sep 17 00:00:00 2001 From: korikuzma Date: Fri, 6 May 2022 11:02:22 -0400 Subject: [PATCH 4/4] refactor: clean up regulatory_approval --- metakb/database.py | 2 +- metakb/normalizers.py | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/metakb/database.py b/metakb/database.py index 6093bbee..49096af2 100644 --- a/metakb/database.py +++ b/metakb/database.py @@ -204,7 +204,7 @@ def _add_descriptor(tx, descriptor: Dict, added_ids: Set[str]): 'alternate_labels')) if descr_type == 'TherapyDescriptor': - # handle extensions field in therapy descriptor + # capture regulatory_approval field in therapy descriptor extensions extensions = descriptor.get('extensions', []) for ext in extensions: name = ext['name'] diff --git a/metakb/normalizers.py b/metakb/normalizers.py index e77794f6..176bc414 100644 --- a/metakb/normalizers.py +++ b/metakb/normalizers.py @@ -156,10 +156,9 @@ def get_regulatory_approval_extension( if any(ar in {ApprovalRating.FDA_PRESCRIPTION, ApprovalRating.FDA_OTC} for ar in approval_ratings): - matched_ext_value = "FDA" - if ApprovalRating.FDA_DISCONTINUED in approval_ratings: - if ApprovalRating.CHEMBL_4 not in approval_ratings: - matched_ext_value = None + if ApprovalRating.FDA_DISCONTINUED not in approval_ratings or \ + ApprovalRating.CHEMBL_4 in approval_ratings: # noqa: E125 + matched_ext_value = "FDA" elif ApprovalRating.CHEMBL_4 in approval_ratings: matched_ext_value = "chembl_phase_4"