Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support business prepop survey launch #11

Merged
merged 25 commits into from
May 8, 2024
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
0bd66b1
First pass
liamtoozer Mar 19, 2024
0685bbe
Tweak gitignore to ignore test data subpaths
liamtoozer Mar 20, 2024
d330140
Update end points. Use inner functions for path traversal for JSON fi…
liamtoozer Mar 20, 2024
c2e1ca4
Renaming
liamtoozer Mar 20, 2024
d2c497c
Formatting & move startup script to scripts folder
liamtoozer Mar 21, 2024
53a888f
Add clone script
liamtoozer Mar 21, 2024
3ad1816
Update tests. Move test mock files. Formatting
liamtoozer Mar 21, 2024
ba2f30f
Clean up of files
liamtoozer Mar 21, 2024
aa516c7
Rearrange title
liamtoozer Mar 22, 2024
94cacc0
Tweak Makefile
liamtoozer Mar 22, 2024
9745d9f
Remove test data files to fit new format
liamtoozer Mar 22, 2024
f2bb0ec
Add readme info. Fix spelling
liamtoozer Mar 22, 2024
2f90b94
Remove file modification script - will now mirror the sds schema repo…
liamtoozer Mar 27, 2024
d49dc41
Remove calls to python script from Makefile
liamtoozer Mar 27, 2024
cdc4a75
Remove info from README about folder rearrange script
liamtoozer Mar 27, 2024
3e5b2b5
Rename mocked unit data items
liamtoozer Mar 27, 2024
761b421
Move pr-convenience to separate script
liamtoozer Apr 3, 2024
18cc3d2
Update Docker dependencies. Change PR script to be executable. Tweak …
liamtoozer Apr 3, 2024
2e95327
Remove period_id from dataset ID generate. Update test. Remove unders…
liamtoozer Apr 8, 2024
98b5902
Formatting
liamtoozer Apr 8, 2024
4863a10
Merge branch 'main' into support-real-prepop-survey-launch
liamtoozer Apr 10, 2024
01d0b2d
Remove PR-convenience script after dependent sds-schema-definitions r…
liamtoozer Apr 23, 2024
a8c0d0c
Remove unnecessary SDS_SCHEMA_DEFINITIONS_REPO var
liamtoozer Apr 23, 2024
a0854b2
Merge branch 'main' into support-real-prepop-survey-launch
liamtoozer Apr 29, 2024
99d2d52
Merge branch 'main' into support-real-prepop-survey-launch
liamtoozer May 8, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -158,3 +158,8 @@ cython_debug/
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
.idea/

# To unignore the mock/test/ path, we need to unignore parent directory mock_data first
!mock_data/
mock_data/*
!mock_data/test
2 changes: 2 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,7 @@ COPY . /app
# Expose the port that Gunicorn will listen on
EXPOSE 5003

RUN make load-mock-unit-data
petechd marked this conversation as resolved.
Show resolved Hide resolved

# Start Gunicorn to serve the application
CMD ["gunicorn", "app.main:app", "-b", "0.0.0.0:5003", "--worker-class", "uvicorn.workers.UvicornWorker", "--timeout", "0"]
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,6 @@ lint:
test:
poetry run pytest .

load-mock-unit-data:
./scripts/load_mock_data.sh

14 changes: 13 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,23 @@ poetry install

## Running Locally

### Prerequisites
To launch business surveys that make use of supplementary data, you'll first need to pull down example unit data from
the [sds-schema-definitions](https://github.com/ONSdigital/sds-schema-definitions/tree/main/examples) examples. To do
this, run the following command:

```bash
make load-mock-unit-data
```

**IMPORTANT:** The hardcoded `MOCK_DATA_PATHS_BY_SURVEY_ID` in `app/main.py` will need to be updated if **any** of the
`sds-schema-definitions` example folders change

## Running
To run the FastAPI application locally using `uvicorn`, use the following command:

```bash
make run

```

The application will be accessible at `http://localhost:5003`.
Expand Down
207 changes: 176 additions & 31 deletions app/main.py
Original file line number Diff line number Diff line change
@@ -1,67 +1,212 @@
import hashlib
import json
from datetime import datetime, timedelta, timezone
from functools import lru_cache
from pathlib import Path
from typing import MutableMapping
from uuid import UUID

import uvicorn
import yaml
from fastapi import FastAPI, Query, HTTPException
from pydantic import BaseModel
from sdc.crypto.jwe_helper import JWEHelper
from sdc.crypto.key_store import KeyStore

KEY_PURPOSE_SDS = "supplementary_data"

app = FastAPI()

with open("dev-keys.yml", encoding="UTF-8") as keys_file:
keys = KeyStore(yaml.safe_load(keys_file))

KEY_PURPOSE_SDS = "supplementary_data"
MOCK_DATA_ROOT_PATH = Path(__file__).parent.parent / "mock_data"

# period_id to match with Launcher
PERIOD_ID = "201605"

# Currently only have a single schema version
SCHEMA_VERSION = "v1.0.0"

# Currently only have 1 reporting unit example for each schema version
TOTAL_REPORTING_UNITS = 1

FORM_TYPES = ["001"]

# Hardcoded paths for now - update if any changes are made to https://github.com/ONSdigital/sds-schema-definitions/tree/main/examples
MOCK_DATA_PATHS_BY_SURVEY_ID = {
"test": ["123"],
"prodcom": ["014"],
berroar marked this conversation as resolved.
Show resolved Hide resolved
"bres_and_brs": [
"221", # BRES
"241", # BRS
],
"roofing_tiles_slate": [
"068", # Roofing tiles
"071", # Slate
],
"sand_and_gravel": [
"066", # Sand and & Gravel (Land Won)
"076", # Sand & Gravel (Marine Dredged)
],
}


class DatasetMetadata(BaseModel):
"""
berroar marked this conversation as resolved.
Show resolved Hide resolved
Model for SDS Metadata Response
copied from https://github.com/ONSdigital/sds/blob/main/src/app/models/dataset_models.py#L19
"""

# Required fields
survey_id: str
period_id: str
form_types: list[str]
sds_published_at: str
total_reporting_units: int
schema_version: str
sds_dataset_version: int
filename: str
dataset_id: UUID

# Optional fields
title: str | None = None


class UnitData(BaseModel):
"""
Model for SDS Unit Data Response
copied from https://github.com/ONSdigital/sds/blob/main/src/app/models/dataset_models.py#L53
"""

dataset_id: UUID
survey_id: str
period_id: str
form_types: list[str]
schema_version: str
data: str


@app.get("/v1/unit_data")
def get_sds_data(
dataset_id: UUID, identifier: str = Query(min_length=1)
) -> MutableMapping:
def get_unit_data(dataset_id: UUID, identifier: str = Query(min_length=1)) -> UnitData:
liamtoozer marked this conversation as resolved.
Show resolved Hide resolved
# The mock current does not make use of identifier
guid_filename_map = {
"c067f6de-6d64-42b1-8b02-431a3486c178": "supplementary_data",
"693dc252-2e90-4412-bd9c-c4d953e36fcd": "supplementary_data_v2",
"9b418603-ba90-4c93-851a-f9cecfbda06f": "supplementary_data_v3",
}
"""Return an encrypted map of mocked unit data for the given dataset_id"""
_, dataset_to_unit_data_map = load_mock_data()
berroar marked this conversation as resolved.
Show resolved Hide resolved

if filename := guid_filename_map.get(str(dataset_id)):
return encrypt_mock_data(load_mock_data(f"mock_data/{filename}.json"))
if unit_data := dataset_to_unit_data_map.get(dataset_id):
return unit_data

raise HTTPException(status_code=404)


@app.get("/v1/dataset_metadata")
def get_sds_dataset_ids(
def get_dataset_metadata(
survey_id: str = Query(min_length=1), period_id: str = Query(min_length=1)
) -> list[dict]:
# The mock current does not make use of period_id
return load_mock_sds_dataset_metadata(survey_id)


def load_mock_data(filename: str) -> dict | list:
with open(filename, encoding="utf-8") as mock_data_file:
return json.load(mock_data_file)
) -> list[DatasetMetadata]:
"""Return a list of dataset metadata for the given survey_id"""
# The mock currently does not make use of period_id
dataset_metadata_collection, _ = load_mock_data()
berroar marked this conversation as resolved.
Show resolved Hide resolved

if filtered_dataset_metadata_by_survey_id := [
dataset_metadata
for dataset_metadata in dataset_metadata_collection
if dataset_metadata.survey_id == survey_id
]:
return filtered_dataset_metadata_by_survey_id
raise HTTPException(status_code=404)


def load_mock_sds_dataset_metadata(survey_id: str) -> list[dict]:
survey_id_filename_map = {
"123": "supplementary_dataset_metadata_response",
def get_mocked_chronological_date(schema_version: str) -> str:
"""Using the schema version, we can ensure mocked dates appear 'chronological'"""
chronological_date = datetime.now(timezone.utc) + timedelta(
weeks=get_version_number(schema_version)
)
return chronological_date.strftime("%Y-%m-%dT%H:%M:%SZ")


def get_version_number(dataset_version: str) -> int:
return int(dataset_version[1:])


def build_dataset_metadata(
*, survey_id: str, period_id: str, dataset_id: UUID, path: Path
) -> DatasetMetadata:
dataset_metadata = {
"survey_id": survey_id,
"period_id": period_id,
"form_types": FORM_TYPES,
"sds_published_at": get_mocked_chronological_date(path.stem),
"total_reporting_units": TOTAL_REPORTING_UNITS,
"schema_version": SCHEMA_VERSION,
"sds_dataset_version": get_version_number(path.stem),
"filename": "",
"dataset_id": dataset_id,
"title": f"{path.stem} {path.parent.name} supplementary data",
}

if filename := survey_id_filename_map.get(survey_id):
return load_mock_data(f"mock_data/{filename}.json")
return DatasetMetadata.model_validate({**dataset_metadata}, from_attributes=True)

raise HTTPException(status_code=404)

def build_unit_data(
*, survey_id: str, period_id: str, dataset_id: UUID, path: Path
) -> UnitData:
unit_data = {
"dataset_id": dataset_id,
"survey_id": survey_id,
"period_id": period_id,
"form_types": FORM_TYPES,
"schema_version": SCHEMA_VERSION,
"data": encrypt_mock_data(json.loads(path.read_text())),
}

def encrypt_mock_data(mock_data: MutableMapping) -> MutableMapping:
return UnitData.model_validate({**unit_data}, from_attributes=True)


def generate_dataset_id(
*, survey_id: str, schema_version: str, dataset_version: str, period_id: str
) -> UUID:
"""deterministically a generate dataset_id"""
combined_hash = hashlib.sha256(
f"{survey_id}_{schema_version}_{dataset_version}_{period_id}".encode("utf-8")
).hexdigest()
return UUID(combined_hash[:32])


@lru_cache(maxsize=1)
def load_mock_data() -> tuple[list[DatasetMetadata], dict[UUID, UnitData]]:
dataset_metadata_collection: list[DatasetMetadata] = []
dataset_id_unit_data_map: dict[UUID, UnitData] = {}

for survey_mock_data_path in MOCK_DATA_ROOT_PATH.glob("*"):
survey_ids = MOCK_DATA_PATHS_BY_SURVEY_ID.get(survey_mock_data_path.name, [])
for survey_id in survey_ids:
for path in survey_mock_data_path.iterdir():
dataset_id = generate_dataset_id(
survey_id=survey_id,
schema_version=SCHEMA_VERSION,
dataset_version=path.stem,
period_id=PERIOD_ID,
)
dataset_metadata_collection.append(
build_dataset_metadata(
survey_id=survey_id,
period_id=PERIOD_ID,
dataset_id=dataset_id,
path=path,
)
)
dataset_id_unit_data_map[dataset_id] = build_unit_data(
survey_id=survey_id,
period_id=PERIOD_ID,
dataset_id=dataset_id,
path=path,
)

return dataset_metadata_collection, dataset_id_unit_data_map


def encrypt_mock_data(mock_data: MutableMapping) -> str:
key = keys.get_key(purpose=KEY_PURPOSE_SDS, key_type="private")
mock_data["data"] = JWEHelper.encrypt_with_key(
json.dumps(mock_data["data"]), key.kid, key.as_jwk()
)
mock_data = JWEHelper.encrypt_with_key(json.dumps(mock_data), key.kid, key.as_jwk())
return mock_data


Expand Down
Loading
Loading