ONSdigital · liamtoozer · May 8, 2024 · Mar 19, 2024 · Mar 20, 2024 · Mar 20, 2024
@@ -158,3 +158,8 @@ cython_debug/
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 .idea/
+
+# To unignore the mock/test/ path, we need to unignore parent directory mock_data first
+!mock_data/
+mock_data/*
+!mock_data/test
@@ -29,5 +29,7 @@ COPY . /app
 # Expose the port that Gunicorn will listen on
 EXPOSE 5003
 
+RUN make load-mock-unit-data
+
 # Start Gunicorn to serve the application
 CMD ["gunicorn", "app.main:app", "-b", "0.0.0.0:5003", "--worker-class", "uvicorn.workers.UvicornWorker", "--timeout", "0"]
@@ -10,3 +10,6 @@ lint:
 test:
 	poetry run pytest .
 
+load-mock-unit-data:
+	./scripts/load_mock_data.sh
+
@@ -18,11 +18,23 @@ poetry install
 
 ## Running Locally
 
+### Prerequisites
+To launch business surveys that make use of supplementary data, you'll first need to pull down example unit data from 
+the [sds-schema-definitions](https://github.com/ONSdigital/sds-schema-definitions/tree/main/examples) examples. To do 
+this, run the following command:
+
+```bash
+make load-mock-unit-data
+```
+
+**IMPORTANT:** The hardcoded `MOCK_DATA_PATHS_BY_SURVEY_ID` in `app/main.py` will need to be updated if **any** of the 
+`sds-schema-definitions` example folders change
+
+## Running 
 To run the FastAPI application locally using `uvicorn`, use the following command:
 
 ```bash
 make run
-
 ```
 
 The application will be accessible at `http://localhost:5003`.

@@ -1,67 +1,212 @@
+import hashlib
 import json
+from datetime import datetime, timedelta, timezone
+from functools import lru_cache
+from pathlib import Path
 from typing import MutableMapping
 from uuid import UUID
 
 import uvicorn
 import yaml
 from fastapi import FastAPI, Query, HTTPException
+from pydantic import BaseModel
 from sdc.crypto.jwe_helper import JWEHelper
 from sdc.crypto.key_store import KeyStore
 
-KEY_PURPOSE_SDS = "supplementary_data"
-
 app = FastAPI()
 
 with open("dev-keys.yml", encoding="UTF-8") as keys_file:
     keys = KeyStore(yaml.safe_load(keys_file))
 
+KEY_PURPOSE_SDS = "supplementary_data"
+MOCK_DATA_ROOT_PATH = Path(__file__).parent.parent / "mock_data"
+
+# period_id to match with Launcher
+PERIOD_ID = "201605"
+
+# Currently only have a single schema version
+SCHEMA_VERSION = "v1.0.0"
+
+# Currently only have 1 reporting unit example for each schema version
+TOTAL_REPORTING_UNITS = 1
+
+FORM_TYPES = ["001"]
+
+# Hardcoded paths for now - update if any changes are made to https://github.com/ONSdigital/sds-schema-definitions/tree/main/examples
+MOCK_DATA_PATHS_BY_SURVEY_ID = {
+    "test": ["123"],
+    "prodcom": ["014"],
+    "bres_and_brs": [
+        "221",  # BRES
+        "241",  # BRS
+    ],
+    "roofing_tiles_slate": [
+        "068",  # Roofing tiles
+        "071",  # Slate
+    ],
+    "sand_and_gravel": [
+        "066",  # Sand and & Gravel (Land Won)
+        "076",  # Sand & Gravel (Marine Dredged)
+    ],
+}
+
+
+class DatasetMetadata(BaseModel):
+    """
+    Model for SDS Metadata Response
+    copied from https://github.com/ONSdigital/sds/blob/main/src/app/models/dataset_models.py#L19
+    """
+
+    # Required fields
+    survey_id: str
+    period_id: str
+    form_types: list[str]
+    sds_published_at: str
+    total_reporting_units: int
+    schema_version: str
+    sds_dataset_version: int
+    filename: str
+    dataset_id: UUID
+
+    # Optional fields
+    title: str | None = None
+
+
+class UnitData(BaseModel):
+    """
+    Model for SDS Unit Data Response
+    copied from https://github.com/ONSdigital/sds/blob/main/src/app/models/dataset_models.py#L53
+    """
+
+    dataset_id: UUID
+    survey_id: str
+    period_id: str
+    form_types: list[str]
+    schema_version: str
+    data: str
+
 
 @app.get("/v1/unit_data")
-def get_sds_data(
-    dataset_id: UUID, identifier: str = Query(min_length=1)
-) -> MutableMapping:
+def get_unit_data(dataset_id: UUID, identifier: str = Query(min_length=1)) -> UnitData:
     # The mock current does not make use of identifier
-    guid_filename_map = {
-        "c067f6de-6d64-42b1-8b02-431a3486c178": "supplementary_data",
-        "693dc252-2e90-4412-bd9c-c4d953e36fcd": "supplementary_data_v2",
-        "9b418603-ba90-4c93-851a-f9cecfbda06f": "supplementary_data_v3",
-    }
+    """Return an encrypted map of mocked unit data for the given dataset_id"""
+    _, dataset_to_unit_data_map = load_mock_data()
 
-    if filename := guid_filename_map.get(str(dataset_id)):
-        return encrypt_mock_data(load_mock_data(f"mock_data/{filename}.json"))
+    if unit_data := dataset_to_unit_data_map.get(dataset_id):
+        return unit_data
 
     raise HTTPException(status_code=404)
 
 
 @app.get("/v1/dataset_metadata")
-def get_sds_dataset_ids(
+def get_dataset_metadata(
     survey_id: str = Query(min_length=1), period_id: str = Query(min_length=1)
-) -> list[dict]:
-    # The mock current does not make use of period_id
-    return load_mock_sds_dataset_metadata(survey_id)
-
-
-def load_mock_data(filename: str) -> dict | list:
-    with open(filename, encoding="utf-8") as mock_data_file:
-        return json.load(mock_data_file)
+) -> list[DatasetMetadata]:
+    """Return a list of dataset metadata for the given survey_id"""
+    # The mock currently does not make use of period_id
+    dataset_metadata_collection, _ = load_mock_data()
+
+    if filtered_dataset_metadata_by_survey_id := [
+        dataset_metadata
+        for dataset_metadata in dataset_metadata_collection
+        if dataset_metadata.survey_id == survey_id
+    ]:
+        return filtered_dataset_metadata_by_survey_id
+    raise HTTPException(status_code=404)
 
 
-def load_mock_sds_dataset_metadata(survey_id: str) -> list[dict]:
-    survey_id_filename_map = {
-        "123": "supplementary_dataset_metadata_response",
+def get_mocked_chronological_date(schema_version: str) -> str:
+    """Using the schema version, we can ensure mocked dates appear 'chronological'"""
+    chronological_date = datetime.now(timezone.utc) + timedelta(
+        weeks=get_version_number(schema_version)
+    )
+    return chronological_date.strftime("%Y-%m-%dT%H:%M:%SZ")
+
+
+def get_version_number(dataset_version: str) -> int:
+    return int(dataset_version[1:])
+
+
+def build_dataset_metadata(
+    *, survey_id: str, period_id: str, dataset_id: UUID, path: Path
+) -> DatasetMetadata:
+    dataset_metadata = {
+        "survey_id": survey_id,
+        "period_id": period_id,
+        "form_types": FORM_TYPES,
+        "sds_published_at": get_mocked_chronological_date(path.stem),
+        "total_reporting_units": TOTAL_REPORTING_UNITS,
+        "schema_version": SCHEMA_VERSION,
+        "sds_dataset_version": get_version_number(path.stem),
+        "filename": "",
+        "dataset_id": dataset_id,
+        "title": f"{path.stem} {path.parent.name} supplementary data",
     }
 
-    if filename := survey_id_filename_map.get(survey_id):
-        return load_mock_data(f"mock_data/{filename}.json")
+    return DatasetMetadata.model_validate({**dataset_metadata}, from_attributes=True)
 
-    raise HTTPException(status_code=404)
 
+def build_unit_data(
+    *, survey_id: str, period_id: str, dataset_id: UUID, path: Path
+) -> UnitData:
+    unit_data = {
+        "dataset_id": dataset_id,
+        "survey_id": survey_id,
+        "period_id": period_id,
+        "form_types": FORM_TYPES,
+        "schema_version": SCHEMA_VERSION,
+        "data": encrypt_mock_data(json.loads(path.read_text())),
+    }
 
-def encrypt_mock_data(mock_data: MutableMapping) -> MutableMapping:
+    return UnitData.model_validate({**unit_data}, from_attributes=True)
+
+
+def generate_dataset_id(
+    *, survey_id: str, schema_version: str, dataset_version: str, period_id: str
+) -> UUID:
+    """deterministically a generate dataset_id"""
+    combined_hash = hashlib.sha256(
+        f"{survey_id}_{schema_version}_{dataset_version}_{period_id}".encode("utf-8")
+    ).hexdigest()
+    return UUID(combined_hash[:32])
+
+
+@lru_cache(maxsize=1)
+def load_mock_data() -> tuple[list[DatasetMetadata], dict[UUID, UnitData]]:
+    dataset_metadata_collection: list[DatasetMetadata] = []
+    dataset_id_unit_data_map: dict[UUID, UnitData] = {}
+
+    for survey_mock_data_path in MOCK_DATA_ROOT_PATH.glob("*"):
+        survey_ids = MOCK_DATA_PATHS_BY_SURVEY_ID.get(survey_mock_data_path.name, [])
+        for survey_id in survey_ids:
+            for path in survey_mock_data_path.iterdir():
+                dataset_id = generate_dataset_id(
+                    survey_id=survey_id,
+                    schema_version=SCHEMA_VERSION,
+                    dataset_version=path.stem,
+                    period_id=PERIOD_ID,
+                )
+                dataset_metadata_collection.append(
+                    build_dataset_metadata(
+                        survey_id=survey_id,
+                        period_id=PERIOD_ID,
+                        dataset_id=dataset_id,
+                        path=path,
+                    )
+                )
+                dataset_id_unit_data_map[dataset_id] = build_unit_data(
+                    survey_id=survey_id,
+                    period_id=PERIOD_ID,
+                    dataset_id=dataset_id,
+                    path=path,
+                )
+
+    return dataset_metadata_collection, dataset_id_unit_data_map
+
+
+def encrypt_mock_data(mock_data: MutableMapping) -> str:
     key = keys.get_key(purpose=KEY_PURPOSE_SDS, key_type="private")
-    mock_data["data"] = JWEHelper.encrypt_with_key(
-        json.dumps(mock_data["data"]), key.kid, key.as_jwk()
-    )
+    mock_data = JWEHelper.encrypt_with_key(json.dumps(mock_data), key.kid, key.as_jwk())
     return mock_data