From ad03380c2c9aa1b20d29bad49a0ce396a4135d37 Mon Sep 17 00:00:00 2001 From: Victor San Kho Lin Date: Sat, 4 Jan 2025 21:08:48 +1100 Subject: [PATCH] Added raw schema link between Library hub to ExternalSubject hub --- .../raw/link_library_external_subject.sql | 49 +++++++++++++++++++ orcavault/models/raw/link_schema.yml | 26 ++++++++++ 2 files changed, 75 insertions(+) create mode 100644 orcavault/models/raw/link_library_external_subject.sql diff --git a/orcavault/models/raw/link_library_external_subject.sql b/orcavault/models/raw/link_library_external_subject.sql new file mode 100644 index 0000000..bd6a8e7 --- /dev/null +++ b/orcavault/models/raw/link_library_external_subject.sql @@ -0,0 +1,49 @@ +with source as ( + + select library_id, external_subject_id from {{ source('ods', 'data_portal_labmetadata') }} + union + select library_id, external_subject_id from {{ source('ods', 'data_portal_limsrow') }} + union + select lib.library_id as library_id, sbj.subject_id as external_subject_id from {{ source('ods', 'metadata_manager_library') }} as lib + join {{ source('ods', 'metadata_manager_subject') }} as sbj on sbj.orcabus_id = lib.subject_orcabus_id + +), + +cleaned as ( + + select + distinct library_id, trim(external_subject_id) as external_subject_id + from + source + where + (library_id is not null and library_id <> '') and + (external_subject_id is not null and external_subject_id <> '') + +), + +transformed as ( + + select + encode(sha256(cast(external_subject_id as bytea)), 'hex') as external_subject_hk, + encode(sha256(cast(library_id as bytea)), 'hex') as library_hk, + cast('{{ run_started_at }}' as timestamptz) as load_datetime, + (select 'lab') as record_source + from + cleaned + +), + +final as ( + + select + encode(sha256(concat(external_subject_hk, library_hk)::bytea), 'hex') as library_external_subject_hk, + external_subject_hk, + library_hk, + load_datetime, + record_source + from + transformed + +) + +select * from final diff --git a/orcavault/models/raw/link_schema.yml b/orcavault/models/raw/link_schema.yml index 32a7d8e..948e229 100644 --- a/orcavault/models/raw/link_schema.yml +++ b/orcavault/models/raw/link_schema.yml @@ -80,6 +80,32 @@ models: - name: record_source data_type: varchar(255) + - name: link_library_external_subject + config: + contract: { enforced: true } + constraints: + - type: primary_key + columns: [ library_external_subject_hk ] + - type: foreign_key + columns: [ external_subject_hk ] + to: ref('hub_external_subject') + to_columns: [ external_subject_hk ] + - type: foreign_key + columns: [ library_hk ] + to: ref('hub_library') + to_columns: [ library_hk ] + columns: + - name: library_external_subject_hk + data_type: char(64) + - name: external_subject_hk + data_type: char(64) + - name: library_hk + data_type: char(64) + - name: load_datetime + data_type: timestamptz + - name: record_source + data_type: varchar(255) + - name: link_library_experiment config: contract: { enforced: true }