diff --git a/metakb/database.py b/metakb/database.py index b70ba308..49096af2 100644 --- a/metakb/database.py +++ b/metakb/database.py @@ -203,6 +203,15 @@ def _add_descriptor(tx, descriptor: Dict, added_ids: Set[str]): 'description', 'xrefs', 'alternate_labels')) + if descr_type == 'TherapyDescriptor': + # capture regulatory_approval field in therapy descriptor extensions + extensions = descriptor.get('extensions', []) + for ext in extensions: + name = ext['name'] + if name == 'regulatory_approval': + descriptor[name] = json.dumps(ext['value']) + descr_keys += f", {name}:${name}" + query = f''' MERGE (descr:{descr_type} {{ {descr_keys} }}) MERGE (value:{value_type} {{ id:${value_id} }}) diff --git a/metakb/normalizers.py b/metakb/normalizers.py index 150f00cb..176bc414 100644 --- a/metakb/normalizers.py +++ b/metakb/normalizers.py @@ -2,10 +2,10 @@ from typing import Optional, Tuple from ga4gh.vrsatile.pydantic.vrs_models import VRSTypes -from ga4gh.vrsatile.pydantic.vrsatile_models import VariationDescriptor +from ga4gh.vrsatile.pydantic.vrsatile_models import VariationDescriptor, Extension from variation.query import QueryHandler as VariationQueryHandler from therapy.query import QueryHandler as TherapyQueryHandler -from therapy.schemas import NormalizationService as NormalizedTherapy +from therapy.schemas import NormalizationService as NormalizedTherapy, ApprovalRating from disease.query import QueryHandler as DiseaseQueryHandler from disease.schemas import NormalizationService as NormalizedDisease from gene.query import QueryHandler as GeneQueryHandler @@ -133,3 +133,55 @@ def normalize_therapy(self, queries)\ if highest_match == 100: break return therapy_norm_resp, normalized_therapy_id + + @staticmethod + def get_regulatory_approval_extension( + therapy_norm_resp: NormalizedTherapy + ) -> Optional[Extension]: + """Given therapy normalization service response, extract out the regulatory + approval extension + + :param NormalizedTherapy therapy_norm_resp: Response from normalizing therapy + :return: Extension containing transformed regulatory approval and indication + data if it `regulatory_approval` extensions exists in therapy normalizer + """ + regulatory_approval_extension = None + tn_resp_exts = therapy_norm_resp.dict().get("therapy_descriptor", {}).get("extensions") or [] # noqa: E501 + tn_ext = [v for v in tn_resp_exts if v["name"] == "regulatory_approval"] + + if tn_ext: + ext_value = tn_ext[0]["value"] + approval_ratings = ext_value.get("approval_ratings", []) + matched_ext_value = None + + if any(ar in {ApprovalRating.FDA_PRESCRIPTION, ApprovalRating.FDA_OTC} + for ar in approval_ratings): + if ApprovalRating.FDA_DISCONTINUED not in approval_ratings or \ + ApprovalRating.CHEMBL_4 in approval_ratings: # noqa: E125 + matched_ext_value = "FDA" + elif ApprovalRating.CHEMBL_4 in approval_ratings: + matched_ext_value = "chembl_phase_4" + + if matched_ext_value: + has_indications = ext_value.get("has_indication", []) + matched_indications = list() + + for indication in has_indications: + indication_exts = indication.get("extensions", []) + for indication_ext in indication_exts: + if indication_ext["value"] == matched_ext_value: + matched_indications.append({ + "id": indication["id"], + "type": indication["type"], + "label": indication["label"], + "disease_id": indication["disease_id"] + }) + + regulatory_approval_extension = Extension( + name="regulatory_approval", + value={ + "approval_rating": "FDA" if matched_ext_value == "FDA" else "ChEMBL", # noqa: E501 + "has_indications": matched_indications + }) + + return regulatory_approval_extension diff --git a/metakb/query.py b/metakb/query.py index db1b8a9d..ae2e9dd5 100644 --- a/metakb/query.py +++ b/metakb/query.py @@ -836,9 +836,17 @@ def _get_therapy_descriptor( "label": therapy_descriptor.get("label"), "therapy_id": None, "alternate_labels": therapy_descriptor.get("alternate_labels"), - "xrefs": therapy_descriptor.get("xrefs") + "xrefs": therapy_descriptor.get("xrefs"), + "extensions": [] } + key = "regulatory_approval" + val = therapy_descriptor.get(key) + if val: + td_params["extensions"].append(Extension(name=key, value=json.loads(val))) + else: + del td_params["extensions"] + with self.driver.session() as session: value_object = session.read_transaction( self._find_descriptor_value_object, td_params["id"] diff --git a/metakb/transform/civic.py b/metakb/transform/civic.py index 98a235d2..a882e546 100644 --- a/metakb/transform/civic.py +++ b/metakb/transform/civic.py @@ -679,7 +679,7 @@ def _get_therapy_descriptor(self, drug) \ ncit_id = f"ncit:{drug['ncit_id']}" queries = [ncit_id, label] - _, normalized_therapy_id = \ + therapy_norm_resp, normalized_therapy_id = \ self.vicc_normalizers.normalize_therapy(queries) if not normalized_therapy_id: @@ -687,13 +687,17 @@ def _get_therapy_descriptor(self, drug) \ f"using queries {ncit_id} and {label}") return None + regulatory_approval_extension = \ + self.vicc_normalizers.get_regulatory_approval_extension(therapy_norm_resp) + therapy_descriptor = ValueObjectDescriptor( id=therapy_id, type="TherapyDescriptor", label=label, therapy_id=normalized_therapy_id, alternate_labels=drug['aliases'], - xrefs=[ncit_id] + xrefs=[ncit_id], + extensions=[regulatory_approval_extension] if regulatory_approval_extension else None # noqa: E501 ).dict(exclude_none=True) return therapy_descriptor diff --git a/metakb/transform/moa.py b/metakb/transform/moa.py index b0fd38a8..622d0369 100644 --- a/metakb/transform/moa.py +++ b/metakb/transform/moa.py @@ -381,12 +381,15 @@ def _get_therapy_descriptors(self, assertion): return [] if normalized_therapy_id: + regulatory_approval_extension = \ + self.vicc_normalizers.get_regulatory_approval_extension(therapy_norm_resp) # noqa: E501 therapy_descriptor = ValueObjectDescriptor( id=f"{schemas.SourceName.MOA.value}." f"{therapy_norm_resp.therapy_descriptor.id}", type="TherapyDescriptor", label=label, - therapy_id=normalized_therapy_id + therapy_id=normalized_therapy_id, + extensions=[regulatory_approval_extension] if regulatory_approval_extension else None # noqa: E501 ).dict(exclude_none=True) else: return [] diff --git a/metakb/version.py b/metakb/version.py index e59846ec..a54c3dda 100644 --- a/metakb/version.py +++ b/metakb/version.py @@ -1,4 +1,4 @@ """MetaKB version""" # REQ: EACH TIME VERSION IS UPDATED, MUST ALSO UPDATE LAST_UPDATED -__version__ = "1.1.0-alpha.6" -LAST_UPDATED = "2022-04-07" +__version__ = "1.1.0-alpha.7" +LAST_UPDATED = "2022-05-04" diff --git a/tests/conftest.py b/tests/conftest.py index 5a500898..15210b7e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -173,6 +173,29 @@ def civic_tid146(): ], "xrefs": [ "ncit:C66940" + ], + "extensions": [ + { + "type": "Extension", + "name": "regulatory_approval", + "value": { + "approval_rating": "FDA", + "has_indications": [ + { + "id": "hemonc:25316", + "type": "DiseaseDescriptor", + "label": "Non-small cell lung cancer Squamous", + "disease_id": None + }, + { + "id": "hemonc:642", + "type": "DiseaseDescriptor", + "label": "Non-small cell lung cancer", + "disease_id": "ncit:C2926" + } + ] + } + } ] } @@ -1192,7 +1215,58 @@ def moa_imatinib(): "id": "moa.normalize.therapy:Imatinib", "type": "TherapyDescriptor", "label": "Imatinib", - "therapy_id": "rxcui:282388" + "therapy_id": "rxcui:282388", + "extensions": [{ + "type": "Extension", + "name": "regulatory_approval", + "value": { + "approval_rating": "FDA", + "has_indications": [ + { + "id": "hemonc:634", + "type": "DiseaseDescriptor", + "label": "Myelodysplastic syndrome", + "disease_id": "ncit:C3247" + }, + { + "id": "hemonc:616", + "type": "DiseaseDescriptor", + "label": "Hypereosinophilic syndrome", + "disease_id": "ncit:C27038" + }, + { + "id": "hemonc:582", + "type": "DiseaseDescriptor", + "label": "Chronic myelogenous leukemia", + "disease_id": "ncit:C3174" + }, + { + "id": "hemonc:669", + "type": "DiseaseDescriptor", + "label": "Systemic mastocytosis", + "disease_id": "ncit:C9235" + }, + { + "id": "hemonc:24309", + "type": "DiseaseDescriptor", + "label": "Acute lymphoblastic leukemia", + "disease_id": "ncit:C3167" + }, + { + "id": "hemonc:667", + "type": "DiseaseDescriptor", + "label": "Soft tissue sarcoma", + "disease_id": "ncit:C9306" + }, + { + "id": "hemonc:602", + "type": "DiseaseDescriptor", + "label": "Gastrointestinal stromal tumor", + "disease_id": "ncit:C3868" + } + ] + } + }] }