Skip to content

Commit

Permalink
wip: very messy initial work
Browse files Browse the repository at this point in the history
  • Loading branch information
korikuzma committed Jan 28, 2025
1 parent 53770c7 commit f11dc65
Show file tree
Hide file tree
Showing 7 changed files with 499 additions and 218 deletions.
4 changes: 2 additions & 2 deletions src/metakb/load_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from neo4j import Driver, ManagedTransaction

from metakb.database import get_driver
from metakb.transformers.base import NORMALIZER_PRIORITY_EXT_NAME, TherapyType
from metakb.transformers.base import NormalizerExtensionName, TherapyType

_logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -46,7 +46,7 @@ def _add_mappings_and_exts_to_obj(obj: dict, obj_keys: list[str]) -> None:
for mapping in obj["mappings"]:
extensions = mapping.get("extensions") or []
for ext in extensions:
if ext["name"] == NORMALIZER_PRIORITY_EXT_NAME and ext["value"]:
if ext["name"] == NormalizerExtensionName.PRIORITY and ext["value"]:
normalizer_id = mapping["coding"]["code"]
obj["normalizer_id"] = normalizer_id
obj_keys.append("normalizer_id:$normalizer_id")
Expand Down
81 changes: 46 additions & 35 deletions src/metakb/transformers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,12 @@
NormalizedGene: "gene",
}

# Normalizer priority extension name
NORMALIZER_PRIORITY_EXT_NAME = "vicc_normalizer_priority"

class NormalizerExtensionName(str, Enum):
"""Define constraints for normalizer extension names"""

PRIORITY = "vicc_normalizer_priority"
FAILURE = "vicc_normalizer_failure"


class EcoLevel(str, Enum):
Expand Down Expand Up @@ -112,6 +116,14 @@ class ViccConceptVocab(BaseModel):
definition: StrictStr


class _Cache(BaseModel):
"""Define model for caching transformed records"""

therapies: ClassVar[dict[str, MappableConcept]] = {}
conditions: ClassVar[dict[str, MappableConcept]] = {}
genes: ClassVar[dict[str, MappableConcept]] = {}


class TransformedData(BaseModel):
"""Define model for transformed data"""

Expand Down Expand Up @@ -266,23 +278,14 @@ def __init__(
:param Optional[Path] harvester_path: Path to previously harvested data
:param ViccNormalizers normalizers: normalizer collection instance
"""
self._cache: _Cache
self.name = self.__class__.__name__.lower().split("transformer")[0]
self.data_dir = data_dir / self.name
self.harvester_path = harvester_path

self.vicc_normalizers = (
ViccNormalizers() if normalizers is None else normalizers
)

self.processed_data = TransformedData()

# Cache for concepts that were unable to normalize. Set of source concept IDs
self.able_to_normalize = {}
self.unable_to_normalize = {
"conditions": set(),
"therapies": set(),
}

self.evidence_level_to_vicc_concept_mapping = (
self._evidence_level_to_vicc_concept_mapping()
)
Expand Down Expand Up @@ -396,6 +399,14 @@ def _get_digest_for_str_lists(str_list: list[str]) -> str:
)
return sha512t24u(blob)

@staticmethod
def _get_vicc_normalizer_failure_ext() -> Extension:
"""Return extension for a VICC normalizer failure
:return: Extension for VICC normalizer failure
"""
return Extension(name=NormalizerExtensionName.FAILURE.value, value=True)

@abstractmethod
def _get_therapy(self, therapy: dict) -> MappableConcept | None:
"""Get therapy mappable concept for source therapy object
Expand Down Expand Up @@ -484,7 +495,7 @@ def _add_therapy(
therapies: list[dict],
therapy_type: TherapyType,
therapy_interaction_type: str | None = None,
) -> MappableConcept | None:
) -> MappableConcept:
"""Create or get therapy mappable concept given therapies
First look in cache for existing therapy, if not found will attempt to
normalize. Will add `therapy_id` to `therapies` and
Expand All @@ -497,32 +508,29 @@ def _add_therapy(
`TherapyType.THERAPY`, the list will only contain a single therapy.
:param therapy_type: The type of therapy
:param therapy_interaction_type: drug interaction type
:return: Therapy mappable concept, if successful normalization
:return: Therapy mappable concept
"""
therapy = self.able_to_normalize["therapies"].get(therapy_id)
therapy = self._cache.therapies.get(therapy_id)
if therapy:
return therapy

if therapy_id not in self.unable_to_normalize["therapies"]:
if therapy_type == TherapyType.THERAPY:
therapy = self._get_therapy(therapies[0])
elif therapy_type == TherapyType.THERAPEUTIC_SUBSTITUTE_GROUP:
therapy = self._get_therapeutic_substitute_group(
therapy_id, therapies, therapy_interaction_type
)
elif therapy_type == TherapyType.COMBINATION_THERAPY:
therapy = self._get_combination_therapy(
therapy_id, therapies, therapy_interaction_type
)
else:
# not supported
return None
if therapy_type == TherapyType.THERAPY:
therapy = self._get_therapy(therapies[0])
elif therapy_type == TherapyType.THERAPEUTIC_SUBSTITUTE_GROUP:
therapy = self._get_therapeutic_substitute_group(
therapy_id, therapies, therapy_interaction_type
)
elif therapy_type == TherapyType.COMBINATION_THERAPY:
therapy = self._get_combination_therapy(
therapy_id, therapies, therapy_interaction_type
)
else:
# not supported
return None

self._cache.therapies[therapy_id] = therapy
self.processed_data.therapies.append(therapy)

if therapy:
self.able_to_normalize["therapies"][therapy_id] = therapy
self.processed_data.therapies.append(therapy)
else:
self.unable_to_normalize["therapies"].add(therapy_id)
return therapy

@staticmethod
Expand Down Expand Up @@ -552,7 +560,7 @@ def _add_merged_id_ext(
:return: ConceptMapping with normalizer extension added
"""
merged_id_ext = Extension(
name=NORMALIZER_PRIORITY_EXT_NAME, value=is_priority
name=NormalizerExtensionName.PRIORITY.value, value=is_priority
)
if mapping.extensions:
mapping.extensions.append(merged_id_ext)
Expand All @@ -567,6 +575,9 @@ def _add_merged_id_ext(
mappings: list[ConceptMapping] = []
attr_name = NORMALIZER_INSTANCE_TO_ATTR[type(normalizer_resp)]
normalizer_resp_obj = getattr(normalizer_resp, attr_name)
if not normalizer_resp_obj:
return mappings

normalizer_mappings = normalizer_resp_obj.mappings or []
if isinstance(normalizer_resp, NormalizedDisease):
for mapping in normalizer_mappings:
Expand Down
Loading

0 comments on commit f11dc65

Please sign in to comment.