Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added OrcaVault raw schema link models #2

Merged
merged 1 commit into from
Jan 4, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions dev/src/load.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ PGPASSWORD=dev psql -h 0.0.0.0 -d orcavault -U dev <<EOF
\copy ods.metadata_manager_sample from '/data/orcavault_ods_metadata_manager_sample.csv' with (format csv, header true, delimiter ',');
\copy ods.metadata_manager_individual from '/data/orcavault_ods_metadata_manager_individual.csv' with (format csv, header true, delimiter ',');
\copy ods.metadata_manager_subject from '/data/orcavault_ods_metadata_manager_subject.csv' with (format csv, header true, delimiter ',');
\copy ods.metadata_manager_subjectindividuallink from '/data/orcavault_ods_metadata_manager_subjectindividuallink.csv' with (format csv, header true, delimiter ',');
\copy ods.metadata_manager_project from '/data/orcavault_ods_metadata_manager_project.csv' with (format csv, header true, delimiter ',');
\copy ods.metadata_manager_libraryprojectlink from '/data/orcavault_ods_metadata_manager_libraryprojectlink.csv' with (format csv, header true, delimiter ',');
\copy ods.metadata_manager_contact from '/data/orcavault_ods_metadata_manager_contact.csv' with (format csv, header true, delimiter ',');
EOF
54 changes: 27 additions & 27 deletions dev/src/ods.sql
Original file line number Diff line number Diff line change
Expand Up @@ -114,10 +114,8 @@ CREATE TABLE IF NOT EXISTS orcavault.ods.data_portal_libraryrun

CREATE TABLE IF NOT EXISTS orcavault.ods.sequence_run_manager_sequence
(
orcabus_id varchar not null
primary key,
instrument_run_id varchar(255) not null
unique,
orcabus_id varchar not null,
instrument_run_id varchar(255) not null,
run_volume_name text not null,
run_folder_path text,
run_data_uri text not null,
Expand All @@ -136,10 +134,8 @@ CREATE TABLE IF NOT EXISTS orcavault.ods.sequence_run_manager_sequence

CREATE TABLE IF NOT EXISTS orcavault.ods.metadata_manager_library
(
orcabus_id varchar not null
primary key,
library_id varchar
unique,
orcabus_id varchar not null,
library_id varchar,
phenotype varchar,
workflow varchar,
quality varchar,
Expand All @@ -152,47 +148,51 @@ CREATE TABLE IF NOT EXISTS orcavault.ods.metadata_manager_library

CREATE TABLE IF NOT EXISTS orcavault.ods.metadata_manager_sample
(
orcabus_id varchar not null
primary key,
sample_id varchar
unique,
orcabus_id varchar not null,
sample_id varchar,
external_sample_id varchar,
source varchar
);

CREATE TABLE IF NOT EXISTS orcavault.ods.metadata_manager_individual
(
orcabus_id varchar not null
primary key,
individual_id varchar
unique,
orcabus_id varchar not null,
individual_id varchar,
source varchar
);

CREATE TABLE IF NOT EXISTS orcavault.ods.metadata_manager_subject
(
orcabus_id varchar not null
primary key,
orcabus_id varchar not null,
subject_id varchar
unique
);

CREATE TABLE IF NOT EXISTS orcavault.ods.metadata_manager_subjectindividuallink
(
id bigint,
individual_orcabus_id varchar not null,
subject_orcabus_id varchar not null
);

CREATE TABLE IF NOT EXISTS orcavault.ods.metadata_manager_project
(
orcabus_id varchar not null
primary key,
project_id varchar
unique,
orcabus_id varchar not null,
project_id varchar,
name varchar,
description varchar
);

CREATE TABLE IF NOT EXISTS orcavault.ods.metadata_manager_libraryprojectlink
(
id bigint,
library_orcabus_id varchar not null,
project_orcabus_id varchar not null
);

CREATE TABLE IF NOT EXISTS orcavault.ods.metadata_manager_contact
(
orcabus_id varchar not null
primary key,
contact_id varchar
unique,
orcabus_id varchar not null,
contact_id varchar,
name varchar,
description varchar,
email varchar(254)
Expand Down
44 changes: 44 additions & 0 deletions orcavault/models/raw/link_library_experiment.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
with source as (

select library_id, experiment_id from {{ source('ods', 'data_portal_labmetadata') }}

),

cleaned as (

select
distinct library_id, experiment_id
from
source
where
(library_id is not null and library_id <> '') and
(experiment_id is not null and experiment_id <> '')

),

transformed as (

select
encode(sha256(cast(experiment_id as bytea)), 'hex') as experiment_hk,
encode(sha256(cast(library_id as bytea)), 'hex') as library_hk,
cast('{{ run_started_at }}' as timestamptz) as load_datetime,
(select 'lab') as record_source
from
cleaned

),

final as (

select
encode(sha256(concat(experiment_hk, library_hk)::bytea), 'hex') as library_experiment_hk,
experiment_hk,
library_hk,
load_datetime,
record_source
from
transformed

)

select * from final
51 changes: 51 additions & 0 deletions orcavault/models/raw/link_library_internal_subject.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
with source as (

select library_id, subject_id as internal_subject_id from {{ source('ods', 'data_portal_labmetadata') }}
union
select library_id, subject_id as internal_subject_id from {{ source('ods', 'data_portal_limsrow') }}
union
select lib.library_id as library_id, idv.individual_id as internal_subject_id from {{ source('ods', 'metadata_manager_library') }} as lib
join {{ source('ods', 'metadata_manager_subject') }} as sbj on sbj.orcabus_id = lib.subject_orcabus_id
join {{ source('ods', 'metadata_manager_subjectindividuallink') }} as lnk on lnk.subject_orcabus_id = sbj.orcabus_id
join {{ source('ods', 'metadata_manager_individual') }} as idv on idv.orcabus_id = lnk.individual_orcabus_id

),

cleaned as (

select
distinct library_id, internal_subject_id
from
source
where
(library_id is not null and library_id <> '') and
(internal_subject_id is not null and internal_subject_id <> '')

),

transformed as (

select
encode(sha256(cast(internal_subject_id as bytea)), 'hex') as internal_subject_hk,
encode(sha256(cast(library_id as bytea)), 'hex') as library_hk,
cast('{{ run_started_at }}' as timestamptz) as load_datetime,
(select 'lab') as record_source
from
cleaned

),

final as (

select
encode(sha256(concat(internal_subject_hk, library_hk)::bytea), 'hex') as library_internal_subject_hk,
internal_subject_hk,
library_hk,
load_datetime,
record_source
from
transformed

)

select * from final
50 changes: 50 additions & 0 deletions orcavault/models/raw/link_library_project.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
with source as (

select library_id, project_name as project_id from {{ source('ods', 'data_portal_labmetadata') }}
union
select library_id, project_name as project_id from {{ source('ods', 'data_portal_limsrow') }}
union
select lib.library_id as library_id, prj.project_id as project_id from {{ source('ods', 'metadata_manager_library') }} as lib
join {{ source('ods', 'metadata_manager_libraryprojectlink') }} as lnk on lnk.library_orcabus_id = lib.orcabus_id
join {{ source('ods', 'metadata_manager_project') }} as prj on lnk.project_orcabus_id = prj.orcabus_id

),

cleaned as (

select
distinct library_id, project_id
from
source
where
(library_id is not null and library_id <> '') and
(project_id is not null and project_id <> '')

),

transformed as (

select
encode(sha256(cast(project_id as bytea)), 'hex') as project_hk,
encode(sha256(cast(library_id as bytea)), 'hex') as library_hk,
cast('{{ run_started_at }}' as timestamptz) as load_datetime,
(select 'lab') as record_source
from
cleaned

),

final as (

select
encode(sha256(concat(project_hk, library_hk)::bytea), 'hex') as library_project_hk,
project_hk,
library_hk,
load_datetime,
record_source
from
transformed

)

select * from final
49 changes: 49 additions & 0 deletions orcavault/models/raw/link_library_sample.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
with source as (

select library_id, sample_id from {{ source('ods', 'data_portal_limsrow') }}
union
select library_id, sample_id from {{ source('ods', 'data_portal_labmetadata') }}
union
select library_id, smp.sample_id as sample_id from {{ source('ods', 'metadata_manager_library') }} as lib
join {{ source('ods', 'metadata_manager_sample') }} as smp on lib.sample_orcabus_id = smp.orcabus_id

),

cleaned as (

select
distinct library_id, sample_id
from
source
where
(library_id is not null and library_id <> '') and
(sample_id is not null and sample_id <> '')

),

transformed as (

select
encode(sha256(cast(sample_id as bytea)), 'hex') as sample_hk,
encode(sha256(cast(library_id as bytea)), 'hex') as library_hk,
cast('{{ run_started_at }}' as timestamptz) as load_datetime,
(select 'lab') as record_source
from
cleaned

),

final as (

select
encode(sha256(concat(sample_hk, library_hk)::bytea), 'hex') as library_sample_hk,
sample_hk,
library_hk,
load_datetime,
record_source
from
transformed

)

select * from final
46 changes: 46 additions & 0 deletions orcavault/models/raw/link_library_sequencing_run.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
with source as (

select library_id, instrument_run_id as sequencing_run_id from {{ source('ods', 'data_portal_libraryrun') }}
union
select library_id, illumina_id as sequencing_run_id from {{ source('ods', 'data_portal_limsrow') }}

),

cleaned as (

select
distinct library_id, sequencing_run_id
from
source
where
(library_id is not null and library_id <> '') and
(sequencing_run_id is not null and sequencing_run_id <> '')

),

transformed as (

select
encode(sha256(cast(sequencing_run_id as bytea)), 'hex') as sequencing_run_hk,
encode(sha256(cast(library_id as bytea)), 'hex') as library_hk,
cast('{{ run_started_at }}' as timestamptz) as load_datetime,
(select 'lab') as record_source
from
cleaned

),

final as (

select
encode(sha256(concat(sequencing_run_hk, library_hk)::bytea), 'hex') as library_sequencing_run_hk,
sequencing_run_hk,
library_hk,
load_datetime,
record_source
from
transformed

)

select * from final
Loading
Loading