Skip to content

Commit

Permalink
wip: more cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
korikuzma committed Jan 30, 2025
1 parent 08d8c79 commit 42a48b0
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 35 deletions.
10 changes: 8 additions & 2 deletions src/metakb/transformers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from abc import ABC, abstractmethod
from enum import Enum
from pathlib import Path
from typing import ClassVar
from typing import ClassVar, TypeVar

from disease.schemas import (
SYSTEM_URI_TO_NAMESPACE as DISEASE_SYSTEM_URI_TO_NAMESPACE,
Expand Down Expand Up @@ -55,6 +55,8 @@
NormalizedGene: "gene",
}

_CacheType = TypeVar("_CacheType", bound="_TransformedRecordsCache")


def _sanitize_name(name: str) -> str:
"""Trim leading and trailing whitespace and replace whitespace characters with
Expand Down Expand Up @@ -289,7 +291,7 @@ def __init__(
:param Optional[Path] harvester_path: Path to previously harvested data
:param ViccNormalizers normalizers: normalizer collection instance
"""
self._cache: _TransformedRecordsCache
self._cache = self._create_cache()
self.name = self.__class__.__name__.lower().split("transformer")[0]
self.data_dir = data_dir / self.name
self.harvester_path = harvester_path
Expand All @@ -308,6 +310,10 @@ async def transform(self, harvested_data: _HarvestedData) -> None:
:param harvested_data: Source harvested data
"""

@abstractmethod
def _create_cache() -> _CacheType:
"""Create cache for transformed records"""

def extract_harvested_data(self) -> _HarvestedData:
"""Get harvested data from file.
Expand Down
12 changes: 8 additions & 4 deletions src/metakb/transformers/civic.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ class _VariationCache(BaseModel):
aliases: list[Extension] | None = None
coordinates: dict | None = None
members: list[Variation] | None = None
extensions: list[Extension] = []
extensions: list[Extension] | None = None


class SourcePrefix(str, Enum):
Expand Down Expand Up @@ -192,7 +192,11 @@ def __init__(
self.processed_data.methods = [
self.methods_mapping[MethodId.CIVIC_EID_SOP.value]
]
self._cache = _CivicTransformedCache()
self._cache = self._create_cache()

def _create_cache(self) -> _CivicTransformedCache:
"""Create cache for transformed records"""
return _CivicTransformedCache()

@staticmethod
def _mp_to_variant_mapping(molecular_profiles: list[dict]) -> tuple[list, dict]:
Expand Down Expand Up @@ -611,7 +615,7 @@ def _is_supported_variant_query(variant_name: str, variant_id: int) -> bool:

return True

async def _get_variation_members(self, variant: dict) -> list[Variation] | None:
async def _get_variation_members(self, variant: dict) -> list[Variation]:
"""Get members field for variation object. This is the related variant concepts.
:param variant: CIViC Variant record
Expand Down Expand Up @@ -701,7 +705,7 @@ async def _add_variations(self, variants: list[dict]) -> None:
label="_".join(vt["name"].lower().split()),
)
for vt in variant["variant_types"]
if vt and vt["url"]
if vt and vt["url"] # system is required
]

# Get mappings
Expand Down
50 changes: 21 additions & 29 deletions src/metakb/transformers/moa.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,11 @@ def __init__(
self.processed_data.methods = [
self.methods_mapping[MethodId.MOA_ASSERTION_BIORXIV.value]
]
self._cache = _MoaTransformedCache()
self._cache = self._create_cache()

def _create_cache(self) -> _MoaTransformedCache:
"""Create cache for transformed records"""
return _MoaTransformedCache()

async def transform(self, harvested_data: MoaHarvestedData) -> None:
"""Transform MOA harvested JSON to common data model. Will store transformed
Expand Down Expand Up @@ -223,10 +227,11 @@ async def _add_categorical_variants(self, variants: list[dict]) -> None:
moa_variation = None
gene = variant.get("gene") or variant.get("gene1")
moa_gene = self._cache.genes[_sanitize_name(gene)]
protein_change = variant.get("protein_change")
constraints = None
extensions = []

if "rearrangement_type" in variant:
if "rearrangement_type" in variant or not protein_change:
logger.debug(
"Variation Normalizer does not support %s: %s",
moa_variant_id,
Expand All @@ -237,37 +242,24 @@ async def _add_categorical_variants(self, variants: list[dict]) -> None:
# Form query and run through variation-normalizer
# For now, the normalizer only support amino acid substitution
vrs_variation = None
if variant.get("protein_change"):
gene = moa_gene.label
query = f"{gene} {variant['protein_change'][2:]}"
vrs_variation = await self.vicc_normalizers.normalize_variation(
[query]
)

if not vrs_variation:
logger.debug(
"Variation Normalizer unable to normalize: moa.variant: %s using query: %s",
variant_id,
query,
)
extensions.append(self._get_vicc_normalizer_failure_ext())
else:
# Create VRS Variation object
params = vrs_variation.model_dump(exclude_none=True)
moa_variant_id = f"moa.variant:{variant_id}"
params["id"] = vrs_variation.id
moa_variation = Variation(**params)
constraints = [
DefiningAlleleConstraint(allele=moa_variation.root)
]
gene = moa_gene.label
query = f"{gene} {protein_change[2:]}"
vrs_variation = await self.vicc_normalizers.normalize_variation([query])

else:
if not vrs_variation:
logger.debug(
"Variation Normalizer does not support %s: %s",
moa_variant_id,
feature,
"Variation Normalizer unable to normalize: moa.variant: %s using query: %s",
variant_id,
query,
)
extensions.append(self._get_vicc_normalizer_failure_ext())
else:
# Create VRS Variation object
params = vrs_variation.model_dump(exclude_none=True)
moa_variant_id = f"moa.variant:{variant_id}"
params["id"] = vrs_variation.id
moa_variation = Variation(**params)
constraints = [DefiningAlleleConstraint(allele=moa_variation.root)]

# Add MOA representative coordinate data to extensions
coordinates_keys = [
Expand Down

0 comments on commit 42a48b0

Please sign in to comment.