From c92c63ede4ceff7fb0421690c0a395d90b3e6fe7 Mon Sep 17 00:00:00 2001 From: Lint Action Date: Fri, 13 Oct 2023 22:22:18 +0000 Subject: [PATCH] =?UTF-8?q?style:=20=F0=9F=8E=A8=20fix=20code=20style=20is?= =?UTF-8?q?sues=20with=20Black?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- modules/etl/etl-test.py | 3324 ++++++++++---------- modules/etl/transforms/module_transform.py | 499 +-- modules/etl/transforms/redcap_transform.py | 107 +- 3 files changed, 2039 insertions(+), 1891 deletions(-) diff --git a/modules/etl/etl-test.py b/modules/etl/etl-test.py index ef09bbef..94fb4987 100644 --- a/modules/etl/etl-test.py +++ b/modules/etl/etl-test.py @@ -2,1652 +2,1718 @@ import numpy as np if __name__ == "__main__": + # Value assigned to missing values unless other specific value defined on function call + # (e.g. REDCapTransform.map_missing_values_by_columns(df, columns, new_missing_value)) + missing_value_generic = "Value Unavailable" - # Value assigned to missing values unless other specific value defined on function call - # (e.g. REDCapTransform.map_missing_values_by_columns(df, columns, new_missing_value)) - missing_value_generic = "Value Unavailable" + # Utility Column Groups + index_columns = [ + "record_id", + ] - # Utility Column Groups - index_columns = [ - "record_id", - ] + # Data Column Groups + data_columns = [ + "studyid", + "siteid", + "dm", + "genderid", + "scrsex", + "race", + "race2", + "ethnic", + "dvenvyn", + "dvenvstdat", + "dvenvcrcid", + "dvcgmyn", + "dvcgmstdat", + "dvcgmvrfy", + "dvamwyn", + "dvamwstdat", + "dvamwsn", + "dvrtmthd", + "dvrtnyn", + "dvrtnship", + "mhterm_dm1", + "mhterm_dm2", + "mhterm_predm", + "mh_dm_age", + "mh_a1c", + "cmtrt_a1c", + "cmtrt_insln", + "cmtrt_glcs", + "cmtrt_lfst", + "scrcmpdat", + ] - # Data Column Groups - data_columns = [ - "studyid", - "siteid", - "dm", - "genderid", - "scrsex", - "race", - "race2", - "ethnic", - "dvenvyn", - "dvenvstdat", - "dvenvcrcid", - "dvcgmyn", - "dvcgmstdat", - "dvcgmvrfy", - "dvamwyn", - "dvamwstdat", - "dvamwsn", - "dvrtmthd", - "dvrtnyn", - "dvrtnship", - "mhterm_dm1", - "mhterm_dm2", - "mhterm_predm", - "mh_dm_age", - "mh_a1c", - "cmtrt_a1c", - "cmtrt_insln", - "cmtrt_glcs", - "cmtrt_lfst", - "scrcmpdat", - ] + # Survey Column Groups + survey_columns = [ + "screening_survey_complete", + "study_enrollment_complete", + "recruitment_survey_complete", + "faq_survey_complete", + "recruitment_survey_management_complete", + "device_distribution_complete", + "preconsent_survey_complete", + "consent_survey_complete", + "staff_consent_attestation_survey_complete", + "demographics_survey_complete", + "health_survey_complete", + "substance_use_survey_complete", + "cesd10_survey_complete", + "paid5_dm_survey_complete", + "diabetes_survey_complete", + "dietary_survey_complete", + "ophthalmic_survey_complete", + "px_sdoh_combined_survey_complete", + "px_food_insecurity_survey_complete", + "px_neighborhood_environment_survey_complete", + "px_racial_ethnic_discrimination_survey_complete", + "decline_participation_survey_complete", + "meds_assessment_complete", + "driving_record_complete", + "physical_assessment_complete", + "bcva_complete", + "photopic_mars_complete", + "mesopic_mars_complete", + "monofilament_complete", + "moca_complete", + "ecg_complete", + "retinal_imaging_v2_complete", + "lab_results_complete", + "device_return_complete", + "specimen_management_complete", + "disposition_complete", + "data_management_complete", + ] - # Survey Column Groups - survey_columns = [ - "screening_survey_complete", - "study_enrollment_complete", - "recruitment_survey_complete", - "faq_survey_complete", - "recruitment_survey_management_complete", - "device_distribution_complete", - "preconsent_survey_complete", - "consent_survey_complete", - "staff_consent_attestation_survey_complete", - "demographics_survey_complete", - "health_survey_complete", - "substance_use_survey_complete", - "cesd10_survey_complete", - "paid5_dm_survey_complete", - "diabetes_survey_complete", - "dietary_survey_complete", - "ophthalmic_survey_complete", - "px_sdoh_combined_survey_complete", - "px_food_insecurity_survey_complete", - "px_neighborhood_environment_survey_complete", - "px_racial_ethnic_discrimination_survey_complete", - "decline_participation_survey_complete", - "meds_assessment_complete", - "driving_record_complete", - "physical_assessment_complete", - "bcva_complete", - "photopic_mars_complete", - "mesopic_mars_complete", - "monofilament_complete", - "moca_complete", - "ecg_complete", - "retinal_imaging_v2_complete", - "lab_results_complete", - "device_return_complete", - "specimen_management_complete", - "disposition_complete", - "data_management_complete", - ] + # Repeat Survey Column Groups + repeat_survey_columns = [ + "current_medications_complete", + ] - # Repeat Survey Column Groups - repeat_survey_columns = [ - "current_medications_complete", - ] + repeat_survey_data_columns = ["current_medications_complete", "current_medications"] - repeat_survey_data_columns = [ - "current_medications_complete", - "current_medications" - ] + # + # Value Maps + # - # - # Value Maps - # + survey_instrument_map = { + "2": "Complete", + "1": "Unverified", + "0": "Incomplete", + } - survey_instrument_map = { - "2": "Complete", - "1": "Unverified", - "0": "Incomplete", - } + # + # REDCap Transform Config + # - # - # REDCap Transform Config - # + redcapTransformConfig = { + "redcap_api_url": "https://redcap.iths.org/api/", + "redcap_api_key": "5508FE11E75105E0DB976205AA27DDA3", + "reports": [ + ( + "dashboard_data_generic", + {"report_id": 242544}, + [ + ("remap_values_by_columns", {"columns": data_columns}), + ("map_missing_values_by_columns", {"columns": data_columns}), + ("keep_columns", {"columns": index_columns + data_columns}), + ], + ), + ( + "dashboard_data_study_waypoints", + {"report_id": 251954}, + [ + ( + "remap_values_by_columns", + {"columns": survey_columns, "value_map": survey_instrument_map}, + ), + ("map_missing_values_by_columns", {"columns": survey_columns}), + ("keep_columns", {"columns": index_columns + survey_columns}), + ], + ), + ( + "dashboard_data_repeat_instruments", + {"report_id": 259920}, + [ + ("drop_rows", {"columns": repeat_survey_columns}), + ( + "aggregate_repeat_instrument_column_by_index", + {"aggregator": np.max, "dtype": str}, + ), + ( + "keep_columns", + {"columns": index_columns + repeat_survey_data_columns}, + ), + ], + ), + ], + "merge_transformed_reports": ( + "dashboard_data_generic", + [ + ( + "dashboard_data_study_waypoints", + {"on": index_columns, "how": "inner"}, + ), + ( + "dashboard_data_repeat_instruments", + {"on": index_columns, "how": "outer"}, + ), + ], + ), + "post_merge_transforms": [ + ( + "remap_values_by_columns", + {"columns": repeat_survey_columns, "value_map": survey_instrument_map}, + ), + ("map_missing_values_by_columns", {"columns": repeat_survey_data_columns}), + ], + "index_columns": ["record_id"], + "missing_value_generic": missing_value_generic, + } - redcapTransformConfig = { - "redcap_api_url": "https://redcap.iths.org/api/", - "redcap_api_key": "5508FE11E75105E0DB976205AA27DDA3", - "reports": [ - ("dashboard_data_generic", {"report_id": 242544}, [ - ("remap_values_by_columns", { - "columns": data_columns - }), - ("map_missing_values_by_columns", { - "columns": data_columns - }), - ("keep_columns", { - "columns": index_columns + data_columns - }) - ]), - ("dashboard_data_study_waypoints", {"report_id": 251954}, [ - ("remap_values_by_columns", { - "columns": survey_columns, - "value_map": survey_instrument_map - }), - ("map_missing_values_by_columns", { - "columns": survey_columns - }), - ("keep_columns", { - "columns": index_columns + survey_columns - }) - ]), - ("dashboard_data_repeat_instruments", {"report_id": 259920}, [ - ("drop_rows", { - "columns": repeat_survey_columns - }), - ("aggregate_repeat_instrument_column_by_index", { - "aggregator": np.max, - "dtype": str - }), - ("keep_columns", { - "columns": index_columns + repeat_survey_data_columns - }) - ]) - ], - "merge_transformed_reports" : ("dashboard_data_generic", [ - ("dashboard_data_study_waypoints", {"on": index_columns, "how": "inner"}), - ("dashboard_data_repeat_instruments", {"on": index_columns, "how": "outer"}) - ]), - "post_merge_transforms": [ - ("remap_values_by_columns", { - "columns": repeat_survey_columns, - "value_map": survey_instrument_map - }), - ("map_missing_values_by_columns", { - "columns": repeat_survey_data_columns - }) - ], - "index_columns": ["record_id"], - "missing_value_generic": missing_value_generic - } + # + # Visualization Transforms + # -# -# Visualization Transforms -# + # Sex & Gender Counts by Site + sexGenderTransform = ( + "simpleTransform", + { + "key": "sex-and-gender-transform", + "strict": True, + "transforms": { + "name": "Sex & Gender", + "vtype": "DoubleCategorical", + "method": { + "groups": ["siteid", "scrsex", "genderid"], + "value": "record_id", + "func": "count", + }, + "accessors": { + "filterby": { + "name": "Site", + "field": "siteid", + "missing_value": missing_value_generic, + }, + "group": { + "name": "Sex", + "field": "scrsex", + "missing_value": missing_value_generic, + "astype": str, + }, + "subgroup": { + "name": "Gender", + "field": "genderid", + "missing_value": missing_value_generic, + "astype": str, + }, + "color": { + "name": "Gender", + "field": "genderid", + "missing_value": missing_value_generic, + "astype": str, + }, + "value": { + "name": "Count (N)", + "field": "record_id", + "missing_value": missing_value_generic, + "astype": int, + }, + }, + }, + }, + ) - # Sex & Gender Counts by Site - sexGenderTransform = ("simpleTransform", { - "key": "sex-and-gender-transform", - "strict": True, - "transforms": { - "name": "Sex & Gender", - "vtype": "DoubleCategorical", - "method": { - "groups": ["siteid", "scrsex", "genderid"], - "value": "record_id", - "func": "count" - }, - "accessors": { - "filterby": { - "name": "Site", - "field": "siteid", - "missing_value": missing_value_generic - }, - "group": { - "name": "Sex", - "field": "scrsex", - "missing_value": missing_value_generic, - "astype": str - }, - "subgroup": { - "name": "Gender", - "field": "genderid", - "missing_value": missing_value_generic, - "astype": str - }, - "color": { - "name": "Gender", - "field": "genderid", - "missing_value": missing_value_generic, - "astype": str - }, - "value": { - "name": "Count (N)", - "field": "record_id", - "missing_value": missing_value_generic, - "astype": int - } - } - } - }) + # Race & Ethnicity Counts by Site + raceEthnicityTransform = ( + "simpleTransform", + { + "key": "race-and-ethnicity-transform", + "strict": True, + "transforms": { + "name": "Race & Ethnicity", + "vtype": "DoubleCategorical", + "method": { + "groups": ["siteid", "race", "ethnic"], + "value": "record_id", + "func": "count", + }, + "accessors": { + "filterby": { + "name": "Site", + "field": "siteid", + "missing_value": missing_value_generic, + "astype": str, + }, + "group": { + "name": "Race", + "field": "race", + "missing_value": missing_value_generic, + "astype": str, + }, + "subgroup": { + "name": "Ethnicity", + "field": "ethnic", + "missing_value": missing_value_generic, + "astype": str, + }, + "color": { + "name": "Ethnicity", + "field": "ethnic", + "missing_value": missing_value_generic, + "astype": str, + }, + "value": { + "name": "Count (N)", + "field": "record_id", + "missing_value": missing_value_generic, + "astype": int, + }, + }, + }, + }, + ) - # Race & Ethnicity Counts by Site - raceEthnicityTransform = ("simpleTransform", { - "key": "race-and-ethnicity-transform", - "strict": True, - "transforms": { - "name": "Race & Ethnicity", - "vtype": "DoubleCategorical", - "method": { - "groups": ["siteid", "race", "ethnic"], - "value": "record_id", - "func": "count" - }, - "accessors": { - "filterby": { - "name": "Site", - "field": "siteid", - "missing_value": missing_value_generic, - "astype": str - }, - "group": { - "name": "Race", - "field": "race", - "missing_value": missing_value_generic, - "astype": str - }, - "subgroup": { - "name": "Ethnicity", - "field": "ethnic", - "missing_value": missing_value_generic, - "astype": str - }, - "color": { - "name": "Ethnicity", - "field": "ethnic", - "missing_value": missing_value_generic, - "astype": str - }, - "value": { - "name": "Count (N)", - "field": "record_id", - "missing_value": missing_value_generic, - "astype": int - } - } - } - }) + # Phenotypes + phenotypeTransform = ( + "simpleTransform", + { + "key": "phenotype-transform", + "strict": True, + "transforms": { + "name": "Type II Diabetes", + "vtype": "SingleCategorical", + "method": { + "groups": ["siteid", "mhterm_dm2"], + "value": "record_id", + "func": "count", + }, + "accessors": { + "filterby": { + "name": "Site", + "field": "siteid", + "missing_value": missing_value_generic, + "astype": str, + }, + "group": { + "name": "Phenotype", + "field": "mhterm_dm2", + "missing_value": missing_value_generic, + "astype": str, + }, + "color": { + "name": "Phenotype", + "field": "mhterm_dm2", + "missing_value": missing_value_generic, + "astype": str, + }, + "value": { + "name": "Count (N)", + "field": "record_id", + "missing_value": missing_value_generic, + "astype": int, + }, + }, + }, + }, + ) - # Phenotypes - phenotypeTransform = ("simpleTransform", { - "key": "phenotype-transform", - "strict": True, - "transforms": { - "name": "Type II Diabetes", - "vtype": "SingleCategorical", - "method": { - "groups": ["siteid", "mhterm_dm2"], - "value": "record_id", - "func": "count" - }, - "accessors": { - "filterby": { - "name": "Site", - "field": "siteid", - "missing_value": missing_value_generic, - "astype": str - }, - "group": { - "name": "Phenotype", - "field": "mhterm_dm2", - "missing_value": missing_value_generic, - "astype": str - }, - "color": { - "name": "Phenotype", - "field": "mhterm_dm2", - "missing_value": missing_value_generic, - "astype": str - }, - "value": { - "name": "Count (N)", - "field": "record_id", - "missing_value": missing_value_generic, - "astype": int - } - } - } - }) + # Study Waypoints + studyWaypointsTransform = ( + "compoundTransform", + { + "key": "study-waypoints-transform", + "strict": True, + "transforms": [ + { + "name": "Recruitment Survey", + "vtype": "SingleCategorical", + "method": { + "groups": ["siteid", "recruitment_survey_complete"], + "value": "record_id", + "func": "count", + }, + "accessors": { + "filterby": { + "name": "Site", + "field": "siteid", + "missing_value": missing_value_generic, + "astype": str, + }, + "group": { + "name": "Recruitment Survey", + "field": "recruitment_survey_complete", + "missing_value": missing_value_generic, + "astype": str, + }, + "color": { + "name": "Recruitment Survey", + "field": "recruitment_survey_complete", + "missing_value": missing_value_generic, + "astype": str, + }, + "value": { + "name": "Count (N)", + "field": "record_id", + "missing_value": missing_value_generic, + "astype": int, + }, + }, + }, + { + "name": "FAQ Survey", + "vtype": "SingleCategorical", + "method": { + "groups": ["siteid", "faq_survey_complete"], + "value": "record_id", + "func": "count", + }, + "accessors": { + "filterby": { + "name": "Site", + "field": "siteid", + "missing_value": missing_value_generic, + "astype": str, + }, + "group": { + "name": "FAQ Survey", + "field": "faq_survey_complete", + "missing_value": missing_value_generic, + "astype": str, + }, + "color": { + "name": "FAQ Survey", + "field": "faq_survey_complete", + "missing_value": missing_value_generic, + "astype": str, + }, + "value": { + "name": "Count (N)", + "field": "record_id", + "missing_value": missing_value_generic, + "astype": int, + }, + }, + }, + { + "name": "Screening Survey", + "vtype": "SingleCategorical", + "method": { + "groups": ["siteid", "screening_survey_complete"], + "value": "record_id", + "func": "count", + }, + "accessors": { + "filterby": { + "name": "Site", + "field": "siteid", + "missing_value": missing_value_generic, + "astype": str, + }, + "group": { + "name": "Screening Survey", + "field": "screening_survey_complete", + "missing_value": missing_value_generic, + "astype": str, + }, + "color": { + "name": "Screening Survey", + "field": "screening_survey_complete", + "missing_value": missing_value_generic, + "astype": str, + }, + "value": { + "name": "Count (N)", + "field": "record_id", + "missing_value": missing_value_generic, + "astype": int, + }, + }, + }, + { + "name": "Preconsent Survey", + "vtype": "SingleCategorical", + "method": { + "groups": ["siteid", "preconsent_survey_complete"], + "value": "record_id", + "func": "count", + }, + "accessors": { + "filterby": { + "name": "Site", + "field": "siteid", + "missing_value": missing_value_generic, + "astype": str, + }, + "group": { + "name": "Preconsent Survey", + "field": "preconsent_survey_complete", + "missing_value": missing_value_generic, + "astype": str, + }, + "color": { + "name": "Preconsent Survey", + "field": "preconsent_survey_complete", + "missing_value": missing_value_generic, + "astype": str, + }, + "value": { + "name": "Count (N)", + "field": "record_id", + "missing_value": missing_value_generic, + "astype": int, + }, + }, + }, + { + "name": "Consent Survey", + "vtype": "SingleCategorical", + "method": { + "groups": ["siteid", "consent_survey_complete"], + "value": "record_id", + "func": "count", + }, + "accessors": { + "filterby": { + "name": "Site", + "field": "siteid", + "missing_value": missing_value_generic, + "astype": str, + }, + "group": { + "name": "Consent Survey", + "field": "consent_survey_complete", + "missing_value": missing_value_generic, + "astype": str, + }, + "color": { + "name": "Consent Survey", + "field": "consent_survey_complete", + "missing_value": missing_value_generic, + "astype": str, + }, + "value": { + "name": "Count (N)", + "field": "record_id", + "missing_value": missing_value_generic, + "astype": int, + }, + }, + }, + { + "name": "Staff Consent Attestation Survey", + "vtype": "SingleCategorical", + "method": { + "groups": [ + "siteid", + "staff_consent_attestation_survey_complete", + ], + "value": "record_id", + "func": "count", + }, + "accessors": { + "filterby": { + "name": "Site", + "field": "siteid", + "missing_value": missing_value_generic, + "astype": str, + }, + "group": { + "name": "Staff Consent Attestation Survey", + "field": "staff_consent_attestation_survey_complete", + "missing_value": missing_value_generic, + "astype": str, + }, + "color": { + "name": "Staff Consent Attestation Survey", + "field": "staff_consent_attestation_survey_complete", + "missing_value": missing_value_generic, + "astype": str, + }, + "value": { + "name": "Count (N)", + "field": "record_id", + "missing_value": missing_value_generic, + "astype": int, + }, + }, + }, + { + "name": "Demographics Survey", + "vtype": "SingleCategorical", + "method": { + "groups": ["siteid", "demographics_survey_complete"], + "value": "record_id", + "func": "count", + }, + "accessors": { + "filterby": { + "name": "Site", + "field": "siteid", + "missing_value": missing_value_generic, + "astype": str, + }, + "group": { + "name": "Demographics Survey", + "field": "demographics_survey_complete", + "missing_value": missing_value_generic, + "astype": str, + }, + "color": { + "name": "Demographics Survey", + "field": "demographics_survey_complete", + "missing_value": missing_value_generic, + "astype": str, + }, + "value": { + "name": "Count (N)", + "field": "record_id", + "missing_value": missing_value_generic, + "astype": int, + }, + }, + }, + { + "name": "Health Survey", + "vtype": "SingleCategorical", + "method": { + "groups": ["siteid", "health_survey_complete"], + "value": "record_id", + "func": "count", + }, + "accessors": { + "filterby": { + "name": "Site", + "field": "siteid", + "missing_value": missing_value_generic, + "astype": str, + }, + "group": { + "name": "Health Survey", + "field": "health_survey_complete", + "missing_value": missing_value_generic, + "astype": str, + }, + "color": { + "name": "Health Survey", + "field": "health_survey_complete", + "missing_value": missing_value_generic, + "astype": str, + }, + "value": { + "name": "Count (N)", + "field": "record_id", + "missing_value": missing_value_generic, + "astype": int, + }, + }, + }, + { + "name": "Substance Use Survey", + "vtype": "SingleCategorical", + "method": { + "groups": ["siteid", "substance_use_survey_complete"], + "value": "record_id", + "func": "count", + }, + "accessors": { + "filterby": { + "name": "Site", + "field": "siteid", + "missing_value": missing_value_generic, + "astype": str, + }, + "group": { + "name": "Substance Use Survey", + "field": "substance_use_survey_complete", + "missing_value": missing_value_generic, + "astype": str, + }, + "color": { + "name": "Substance Use Survey", + "field": "substance_use_survey_complete", + "missing_value": missing_value_generic, + "astype": str, + }, + "value": { + "name": "Count (N)", + "field": "record_id", + "missing_value": missing_value_generic, + "astype": int, + }, + }, + }, + { + "name": "CES-D-10 Survey", + "vtype": "SingleCategorical", + "method": { + "groups": ["siteid", "cesd10_survey_complete"], + "value": "record_id", + "func": "count", + }, + "accessors": { + "filterby": { + "name": "Site", + "field": "siteid", + "missing_value": missing_value_generic, + "astype": str, + }, + "group": { + "name": "CES-D-10 Survey", + "field": "cesd10_survey_complete", + "missing_value": missing_value_generic, + "astype": str, + }, + "color": { + "name": "CES-D-10 Survey", + "field": "cesd10_survey_complete", + "missing_value": missing_value_generic, + "astype": str, + }, + "value": { + "name": "Count (N)", + "field": "record_id", + "missing_value": missing_value_generic, + "astype": int, + }, + }, + }, + { + "name": "PAID-5 DM Survey", + "vtype": "SingleCategorical", + "method": { + "groups": ["siteid", "paid5_dm_survey_complete"], + "value": "record_id", + "func": "count", + }, + "accessors": { + "filterby": { + "name": "Site", + "field": "siteid", + "missing_value": missing_value_generic, + "astype": str, + }, + "group": { + "name": "PAID-5 DM Survey", + "field": "paid5_dm_survey_complete", + "missing_value": missing_value_generic, + "astype": str, + }, + "color": { + "name": "PAID-5 DM Survey", + "field": "paid5_dm_survey_complete", + "missing_value": missing_value_generic, + "astype": str, + }, + "value": { + "name": "Count (N)", + "field": "record_id", + "missing_value": missing_value_generic, + "astype": int, + }, + }, + }, + { + "name": "Diabetes Survey", + "vtype": "SingleCategorical", + "method": { + "groups": ["siteid", "diabetes_survey_complete"], + "value": "record_id", + "func": "count", + }, + "accessors": { + "filterby": { + "name": "Site", + "field": "siteid", + "missing_value": missing_value_generic, + "astype": str, + }, + "group": { + "name": "Diabetes Survey", + "field": "diabetes_survey_complete", + "missing_value": missing_value_generic, + "astype": str, + }, + "color": { + "name": "Diabetes Survey", + "field": "diabetes_survey_complete", + "missing_value": missing_value_generic, + "astype": str, + }, + "value": { + "name": "Count (N)", + "field": "record_id", + "missing_value": missing_value_generic, + "astype": int, + }, + }, + }, + { + "name": "Dietary Survey", + "vtype": "SingleCategorical", + "method": { + "groups": ["siteid", "dietary_survey_complete"], + "value": "record_id", + "func": "count", + }, + "accessors": { + "filterby": { + "name": "Site", + "field": "siteid", + "missing_value": missing_value_generic, + "astype": str, + }, + "group": { + "name": "Dietary Survey", + "field": "dietary_survey_complete", + "missing_value": missing_value_generic, + "astype": str, + }, + "color": { + "name": "Dietary Survey", + "field": "dietary_survey_complete", + "missing_value": missing_value_generic, + "astype": str, + }, + "value": { + "name": "Count (N)", + "field": "record_id", + "missing_value": missing_value_generic, + "astype": int, + }, + }, + }, + { + "name": "Opthalmic Survey", + "vtype": "SingleCategorical", + "method": { + "groups": ["siteid", "ophthalmic_survey_complete"], + "value": "record_id", + "func": "count", + }, + "accessors": { + "filterby": { + "name": "Site", + "field": "siteid", + "missing_value": missing_value_generic, + "astype": str, + }, + "group": { + "name": "Opthalmic Survey", + "field": "ophthalmic_survey_complete", + "missing_value": missing_value_generic, + "astype": str, + }, + "color": { + "name": "Opthalmic Survey", + "field": "ophthalmic_survey_complete", + "missing_value": missing_value_generic, + "astype": str, + }, + "value": { + "name": "Count (N)", + "field": "record_id", + "missing_value": missing_value_generic, + "astype": int, + }, + }, + }, + { + "name": "PhenX SDOH Combined Survey", + "vtype": "SingleCategorical", + "method": { + "groups": ["siteid", "px_sdoh_combined_survey_complete"], + "value": "record_id", + "func": "count", + }, + "accessors": { + "filterby": { + "name": "Site", + "field": "siteid", + "missing_value": missing_value_generic, + "astype": str, + }, + "group": { + "name": "PhenX SDOH Combined Survey", + "field": "px_sdoh_combined_survey_complete", + "missing_value": missing_value_generic, + "astype": str, + }, + "color": { + "name": "PhenX SDOH Combined Survey", + "field": "px_sdoh_combined_survey_complete", + "missing_value": missing_value_generic, + "astype": str, + }, + "value": { + "name": "Count (N)", + "field": "record_id", + "missing_value": missing_value_generic, + "astype": int, + }, + }, + }, + { + "name": "PhenX Food Insecurity Survey", + "vtype": "SingleCategorical", + "method": { + "groups": ["siteid", "px_food_insecurity_survey_complete"], + "value": "record_id", + "func": "count", + }, + "accessors": { + "filterby": { + "name": "Site", + "field": "siteid", + "missing_value": missing_value_generic, + "astype": str, + }, + "group": { + "name": "PhenX Food Insecurity Survey", + "field": "px_food_insecurity_survey_complete", + "missing_value": missing_value_generic, + "astype": str, + }, + "color": { + "name": "PhenX Food Insecurity Survey", + "field": "px_food_insecurity_survey_complete", + "missing_value": missing_value_generic, + "astype": str, + }, + "value": { + "name": "Count (N)", + "field": "record_id", + "missing_value": missing_value_generic, + "astype": int, + }, + }, + }, + { + "name": "PhenX Neighborhood Environment Survey", + "vtype": "SingleCategorical", + "method": { + "groups": [ + "siteid", + "px_neighborhood_environment_survey_complete", + ], + "value": "record_id", + "func": "count", + }, + "accessors": { + "filterby": { + "name": "Site", + "field": "siteid", + "missing_value": missing_value_generic, + "astype": str, + }, + "group": { + "name": "PhenX Neighborhood Environment Survey", + "field": "px_neighborhood_environment_survey_complete", + "missing_value": missing_value_generic, + "astype": str, + }, + "color": { + "name": "PhenX Neighborhood Environment Survey", + "field": "px_neighborhood_environment_survey_complete", + "missing_value": missing_value_generic, + "astype": str, + }, + "value": { + "name": "Count (N)", + "field": "record_id", + "missing_value": missing_value_generic, + "astype": int, + }, + }, + }, + { + "name": "PhenX Racial and Ethnic Discrimination Survey", + "vtype": "SingleCategorical", + "method": { + "groups": [ + "siteid", + "px_racial_ethnic_discrimination_survey_complete", + ], + "value": "record_id", + "func": "count", + }, + "accessors": { + "filterby": { + "name": "Site", + "field": "siteid", + "missing_value": missing_value_generic, + "astype": str, + }, + "group": { + "name": "PhenX Racial and Ethnic Discrimination Survey", + "field": "px_racial_ethnic_discrimination_survey_complete", + "missing_value": missing_value_generic, + "astype": str, + }, + "color": { + "name": "PhenX Racial and Ethnic Discrimination Survey", + "field": "px_racial_ethnic_discrimination_survey_complete", + "missing_value": missing_value_generic, + "astype": str, + }, + "value": { + "name": "Count (N)", + "field": "record_id", + "missing_value": missing_value_generic, + "astype": int, + }, + }, + }, + { + "name": "Decline Participation Survey", + "vtype": "SingleCategorical", + "method": { + "groups": ["siteid", "decline_participation_survey_complete"], + "value": "record_id", + "func": "count", + }, + "accessors": { + "filterby": { + "name": "Site", + "field": "siteid", + "missing_value": missing_value_generic, + "astype": str, + }, + "group": { + "name": "Decline Participation Survey", + "field": "decline_participation_survey_complete", + "missing_value": missing_value_generic, + "astype": str, + }, + "color": { + "name": "Decline Participation Survey", + "field": "decline_participation_survey_complete", + "missing_value": missing_value_generic, + "astype": str, + }, + "value": { + "name": "Count (N)", + "field": "record_id", + "missing_value": missing_value_generic, + "astype": int, + }, + }, + }, + { + "name": "Study Enrollment Survey", + "vtype": "SingleCategorical", + "method": { + "groups": ["siteid", "study_enrollment_complete"], + "value": "record_id", + "func": "count", + }, + "accessors": { + "filterby": { + "name": "Site", + "field": "siteid", + "missing_value": missing_value_generic, + "astype": str, + }, + "group": { + "name": "Study Enrollment Survey", + "field": "study_enrollment_complete", + "missing_value": missing_value_generic, + "astype": str, + }, + "color": { + "name": "Study Enrollment Survey", + "field": "study_enrollment_complete", + "missing_value": missing_value_generic, + "astype": str, + }, + "value": { + "name": "Count (N)", + "field": "record_id", + "missing_value": missing_value_generic, + "astype": int, + }, + }, + }, + { + "name": "Driving Record", + "vtype": "SingleCategorical", + "method": { + "groups": ["siteid", "driving_record_complete"], + "value": "record_id", + "func": "count", + }, + "accessors": { + "filterby": { + "name": "Site", + "field": "siteid", + "missing_value": missing_value_generic, + "astype": str, + }, + "group": { + "name": "Driving Record", + "field": "driving_record_complete", + "missing_value": missing_value_generic, + "astype": str, + }, + "color": { + "name": "Driving Record", + "field": "driving_record_complete", + "missing_value": missing_value_generic, + "astype": str, + }, + "value": { + "name": "Count (N)", + "field": "record_id", + "missing_value": missing_value_generic, + "astype": int, + }, + }, + }, + { + "name": "Device Distribution", + "vtype": "SingleCategorical", + "method": { + "groups": ["siteid", "device_distribution_complete"], + "value": "record_id", + "func": "count", + }, + "accessors": { + "filterby": { + "name": "Site", + "field": "siteid", + "missing_value": missing_value_generic, + "astype": str, + }, + "group": { + "name": "Device Distribution", + "field": "device_distribution_complete", + "missing_value": missing_value_generic, + "astype": str, + }, + "color": { + "name": "Device Distribution", + "field": "device_distribution_complete", + "missing_value": missing_value_generic, + "astype": str, + }, + "value": { + "name": "Count (N)", + "field": "record_id", + "missing_value": missing_value_generic, + "astype": int, + }, + }, + }, + { + "name": "Medications Assessment", + "vtype": "SingleCategorical", + "method": { + "groups": ["siteid", "meds_assessment_complete"], + "value": "record_id", + "func": "count", + }, + "accessors": { + "filterby": { + "name": "Site", + "field": "siteid", + "missing_value": missing_value_generic, + "astype": str, + }, + "group": { + "name": "Medications Assessment", + "field": "meds_assessment_complete", + "missing_value": missing_value_generic, + "astype": str, + }, + "color": { + "name": "Medications Assessment", + "field": "meds_assessment_complete", + "missing_value": missing_value_generic, + "astype": str, + }, + "value": { + "name": "Count (N)", + "field": "record_id", + "missing_value": missing_value_generic, + "astype": int, + }, + }, + }, + { + "name": "Physical Assessment", + "vtype": "SingleCategorical", + "method": { + "groups": ["siteid", "physical_assessment_complete"], + "value": "record_id", + "func": "count", + }, + "accessors": { + "filterby": { + "name": "Site", + "field": "siteid", + "missing_value": missing_value_generic, + "astype": str, + }, + "group": { + "name": "Physical Assessment", + "field": "physical_assessment_complete", + "missing_value": missing_value_generic, + "astype": str, + }, + "color": { + "name": "Physical Assessment", + "field": "physical_assessment_complete", + "missing_value": missing_value_generic, + "astype": str, + }, + "value": { + "name": "Count (N)", + "field": "record_id", + "missing_value": missing_value_generic, + "astype": int, + }, + }, + }, + { + "name": "BCVA", + "vtype": "SingleCategorical", + "method": { + "groups": ["siteid", "bcva_complete"], + "value": "record_id", + "func": "count", + }, + "accessors": { + "filterby": { + "name": "Site", + "field": "siteid", + "missing_value": missing_value_generic, + "astype": str, + }, + "group": { + "name": "BCVA", + "field": "bcva_complete", + "missing_value": missing_value_generic, + "astype": str, + }, + "color": { + "name": "BCVA", + "field": "bcva_complete", + "missing_value": missing_value_generic, + "astype": str, + }, + "value": { + "name": "Count (N)", + "field": "record_id", + "missing_value": missing_value_generic, + "astype": int, + }, + }, + }, + { + "name": "Photopic MARS", + "vtype": "SingleCategorical", + "method": { + "groups": ["siteid", "photopic_mars_complete"], + "value": "record_id", + "func": "count", + }, + "accessors": { + "filterby": { + "name": "Site", + "field": "siteid", + "missing_value": missing_value_generic, + "astype": str, + }, + "group": { + "name": "Photopic MARS", + "field": "photopic_mars_complete", + "missing_value": missing_value_generic, + "astype": str, + }, + "color": { + "name": "Photopic MARS", + "field": "photopic_mars_complete", + "missing_value": missing_value_generic, + "astype": str, + }, + "value": { + "name": "Count (N)", + "field": "record_id", + "missing_value": missing_value_generic, + "astype": int, + }, + }, + }, + { + "name": "Mesopic MARS", + "vtype": "SingleCategorical", + "method": { + "groups": ["siteid", "mesopic_mars_complete"], + "value": "record_id", + "func": "count", + }, + "accessors": { + "filterby": { + "name": "Site", + "field": "siteid", + "missing_value": missing_value_generic, + "astype": str, + }, + "group": { + "name": "Mesopic MARS", + "field": "mesopic_mars_complete", + "missing_value": missing_value_generic, + "astype": str, + }, + "color": { + "name": "Mesopic MARS", + "field": "mesopic_mars_complete", + "missing_value": missing_value_generic, + "astype": str, + }, + "value": { + "name": "Count (N)", + "field": "record_id", + "missing_value": missing_value_generic, + "astype": int, + }, + }, + }, + { + "name": "Monofilament", + "vtype": "SingleCategorical", + "method": { + "groups": ["siteid", "monofilament_complete"], + "value": "record_id", + "func": "count", + }, + "accessors": { + "filterby": { + "name": "Site", + "field": "siteid", + "missing_value": missing_value_generic, + "astype": str, + }, + "group": { + "name": "Monofilament", + "field": "monofilament_complete", + "missing_value": missing_value_generic, + "astype": str, + }, + "color": { + "name": "Monofilament", + "field": "monofilament_complete", + "missing_value": missing_value_generic, + "astype": str, + }, + "value": { + "name": "Count (N)", + "field": "record_id", + "missing_value": missing_value_generic, + "astype": int, + }, + }, + }, + { + "name": "MOCA", + "vtype": "SingleCategorical", + "method": { + "groups": ["siteid", "moca_complete"], + "value": "record_id", + "func": "count", + }, + "accessors": { + "filterby": { + "name": "Site", + "field": "siteid", + "missing_value": missing_value_generic, + "astype": str, + }, + "group": { + "name": "MOCA", + "field": "moca_complete", + "missing_value": missing_value_generic, + "astype": str, + }, + "color": { + "name": "MOCA", + "field": "moca_complete", + "missing_value": missing_value_generic, + "astype": str, + }, + "value": { + "name": "Count (N)", + "field": "record_id", + "missing_value": missing_value_generic, + "astype": int, + }, + }, + }, + { + "name": "ECG Survey", + "vtype": "SingleCategorical", + "method": { + "groups": ["siteid", "ecg_complete"], + "value": "record_id", + "func": "count", + }, + "accessors": { + "filterby": { + "name": "Site", + "field": "siteid", + "missing_value": missing_value_generic, + "astype": str, + }, + "group": { + "name": "ECG Survey", + "field": "ecg_complete", + "missing_value": missing_value_generic, + "astype": str, + }, + "color": { + "name": "ECG Survey", + "field": "ecg_complete", + "missing_value": missing_value_generic, + "astype": str, + }, + "value": { + "name": "Count (N)", + "field": "record_id", + "missing_value": missing_value_generic, + "astype": int, + }, + }, + }, + { + "name": "Lab Results Survey", + "vtype": "SingleCategorical", + "method": { + "groups": ["siteid", "lab_results_complete"], + "value": "record_id", + "func": "count", + }, + "accessors": { + "filterby": { + "name": "Site", + "field": "siteid", + "missing_value": missing_value_generic, + "astype": str, + }, + "group": { + "name": "Lab Results Survey", + "field": "lab_results_complete", + "missing_value": missing_value_generic, + "astype": str, + }, + "color": { + "name": "Lab Results Survey", + "field": "lab_results_complete", + "missing_value": missing_value_generic, + "astype": str, + }, + "value": { + "name": "Count (N)", + "field": "record_id", + "missing_value": missing_value_generic, + "astype": int, + }, + }, + }, + { + "name": "Specimen Management", + "vtype": "SingleCategorical", + "method": { + "groups": ["siteid", "specimen_management_complete"], + "value": "record_id", + "func": "count", + }, + "accessors": { + "filterby": { + "name": "Site", + "field": "siteid", + "missing_value": missing_value_generic, + "astype": str, + }, + "group": { + "name": "Specimen Management", + "field": "specimen_management_complete", + "missing_value": missing_value_generic, + "astype": str, + }, + "color": { + "name": "Specimen Management", + "field": "specimen_management_complete", + "missing_value": missing_value_generic, + "astype": str, + }, + "value": { + "name": "Count (N)", + "field": "record_id", + "missing_value": missing_value_generic, + "astype": int, + }, + }, + }, + { + "name": "Device Return", + "vtype": "SingleCategorical", + "method": { + "groups": ["siteid", "device_return_complete"], + "value": "record_id", + "func": "count", + }, + "accessors": { + "filterby": { + "name": "Site", + "field": "siteid", + "missing_value": missing_value_generic, + "astype": str, + }, + "group": { + "name": "Device Return", + "field": "device_return_complete", + "missing_value": missing_value_generic, + "astype": str, + }, + "color": { + "name": "Device Return", + "field": "device_return_complete", + "missing_value": missing_value_generic, + "astype": str, + }, + "value": { + "name": "Count (N)", + "field": "record_id", + "missing_value": missing_value_generic, + "astype": int, + }, + }, + }, + { + "name": "Disposition Survey", + "vtype": "SingleCategorical", + "method": { + "groups": ["siteid", "disposition_complete"], + "value": "record_id", + "func": "count", + }, + "accessors": { + "filterby": { + "name": "Site", + "field": "siteid", + "missing_value": missing_value_generic, + "astype": str, + }, + "group": { + "name": "Disposition Survey", + "field": "disposition_complete", + "missing_value": missing_value_generic, + "astype": str, + }, + "color": { + "name": "Disposition Survey", + "field": "disposition_complete", + "missing_value": missing_value_generic, + "astype": str, + }, + "value": { + "name": "Count (N)", + "field": "record_id", + "missing_value": missing_value_generic, + "astype": int, + }, + }, + }, + { + "name": "Data Management Survey", + "vtype": "SingleCategorical", + "method": { + "groups": ["siteid", "data_management_complete"], + "value": "record_id", + "func": "count", + }, + "accessors": { + "filterby": { + "name": "Site", + "field": "siteid", + "missing_value": missing_value_generic, + "astype": str, + }, + "group": { + "name": "Data Management Survey", + "field": "data_management_complete", + "missing_value": missing_value_generic, + "astype": str, + }, + "color": { + "name": "Data Management Survey", + "field": "data_management_complete", + "missing_value": missing_value_generic, + "astype": str, + }, + "value": { + "name": "Count (N)", + "field": "record_id", + "missing_value": missing_value_generic, + "astype": int, + }, + }, + }, + ], + }, + ) - # Study Waypoints - studyWaypointsTransform = ("compoundTransform", { - "key": "study-waypoints-transform", - "strict": True, - "transforms": [{ - "name": "Recruitment Survey", - "vtype": "SingleCategorical", - "method": { - "groups": ["siteid", "recruitment_survey_complete"], - "value": "record_id", - "func": "count" - }, - "accessors": { - "filterby": { - "name": "Site", - "field": "siteid", - "missing_value": missing_value_generic, - "astype": str - }, - "group": { - "name": "Recruitment Survey", - "field": "recruitment_survey_complete", - "missing_value": missing_value_generic, - "astype": str - }, - "color": { - "name": "Recruitment Survey", - "field": "recruitment_survey_complete", - "missing_value": missing_value_generic, - "astype": str - }, - "value": { - "name": "Count (N)", - "field": "record_id", - "missing_value": missing_value_generic, - "astype": int - } - } - }, { - "name": "FAQ Survey", - "vtype": "SingleCategorical", - "method": { - "groups": ["siteid", "faq_survey_complete"], - "value": "record_id", - "func": "count" - }, - "accessors": { - "filterby": { - "name": "Site", - "field": "siteid", - "missing_value": missing_value_generic, - "astype": str - }, - "group": { - "name": "FAQ Survey", - "field": "faq_survey_complete", - "missing_value": missing_value_generic, - "astype": str - }, - "color": { - "name": "FAQ Survey", - "field": "faq_survey_complete", - "missing_value": missing_value_generic, - "astype": str - }, - "value": { - "name": "Count (N)", - "field": "record_id", - "missing_value": missing_value_generic, - "astype": int - } - } - }, { - "name": "Screening Survey", - "vtype": "SingleCategorical", - "method": { - "groups": ["siteid", "screening_survey_complete"], - "value": "record_id", - "func": "count" - }, - "accessors": { - "filterby": { - "name": "Site", - "field": "siteid", - "missing_value": missing_value_generic, - "astype": str - }, - "group": { - "name": "Screening Survey", - "field": "screening_survey_complete", - "missing_value": missing_value_generic, - "astype": str - }, - "color": { - "name": "Screening Survey", - "field": "screening_survey_complete", - "missing_value": missing_value_generic, - "astype": str - }, - "value": { - "name": "Count (N)", - "field": "record_id", - "missing_value": missing_value_generic, - "astype": int - } - } - }, { - "name": "Preconsent Survey", - "vtype": "SingleCategorical", - "method": { - "groups": ["siteid", "preconsent_survey_complete"], - "value": "record_id", - "func": "count" - }, - "accessors": { - "filterby": { - "name": "Site", - "field": "siteid", - "missing_value": missing_value_generic, - "astype": str - }, - "group": { - "name": "Preconsent Survey", - "field": "preconsent_survey_complete", - "missing_value": missing_value_generic, - "astype": str - }, - "color": { - "name": "Preconsent Survey", - "field": "preconsent_survey_complete", - "missing_value": missing_value_generic, - "astype": str - }, - "value": { - "name": "Count (N)", - "field": "record_id", - "missing_value": missing_value_generic, - "astype": int - } - } - }, { - "name": "Consent Survey", - "vtype": "SingleCategorical", - "method": { - "groups": ["siteid", "consent_survey_complete"], - "value": "record_id", - "func": "count" - }, - "accessors": { - "filterby": { - "name": "Site", - "field": "siteid", - "missing_value": missing_value_generic, - "astype": str - }, - "group": { - "name": "Consent Survey", - "field": "consent_survey_complete", - "missing_value": missing_value_generic, - "astype": str - }, - "color": { - "name": "Consent Survey", - "field": "consent_survey_complete", - "missing_value": missing_value_generic, - "astype": str - }, - "value": { - "name": "Count (N)", - "field": "record_id", - "missing_value": missing_value_generic, - "astype": int - } - } - }, { - "name": "Staff Consent Attestation Survey", - "vtype": "SingleCategorical", - "method": { - "groups": ["siteid", "staff_consent_attestation_survey_complete"], - "value": "record_id", - "func": "count" - }, - "accessors": { - "filterby": { - "name": "Site", - "field": "siteid", - "missing_value": missing_value_generic, - "astype": str - }, - "group": { - "name": "Staff Consent Attestation Survey", - "field": "staff_consent_attestation_survey_complete", - "missing_value": missing_value_generic, - "astype": str - }, - "color": { - "name": "Staff Consent Attestation Survey", - "field": "staff_consent_attestation_survey_complete", - "missing_value": missing_value_generic, - "astype": str - }, - "value": { - "name": "Count (N)", - "field": "record_id", - "missing_value": missing_value_generic, - "astype": int - } - } - }, { - "name": "Demographics Survey", - "vtype": "SingleCategorical", - "method": { - "groups": ["siteid", "demographics_survey_complete"], - "value": "record_id", - "func": "count" - }, - "accessors": { - "filterby": { - "name": "Site", - "field": "siteid", - "missing_value": missing_value_generic, - "astype": str - }, - "group": { - "name": "Demographics Survey", - "field": "demographics_survey_complete", - "missing_value": missing_value_generic, - "astype": str - }, - "color": { - "name": "Demographics Survey", - "field": "demographics_survey_complete", - "missing_value": missing_value_generic, - "astype": str - }, - "value": { - "name": "Count (N)", - "field": "record_id", - "missing_value": missing_value_generic, - "astype": int - } - } - }, { - "name": "Health Survey", - "vtype": "SingleCategorical", - "method": { - "groups": ["siteid", "health_survey_complete"], - "value": "record_id", - "func": "count" - }, - "accessors": { - "filterby": { - "name": "Site", - "field": "siteid", - "missing_value": missing_value_generic, - "astype": str - }, - "group": { - "name": "Health Survey", - "field": "health_survey_complete", - "missing_value": missing_value_generic, - "astype": str - }, - "color": { - "name": "Health Survey", - "field": "health_survey_complete", - "missing_value": missing_value_generic, - "astype": str - }, - "value": { - "name": "Count (N)", - "field": "record_id", - "missing_value": missing_value_generic, - "astype": int - } - } - }, { - "name": "Substance Use Survey", - "vtype": "SingleCategorical", - "method": { - "groups": ["siteid", "substance_use_survey_complete"], - "value": "record_id", - "func": "count" - }, - "accessors": { - "filterby": { - "name": "Site", - "field": "siteid", - "missing_value": missing_value_generic, - "astype": str - }, - "group": { - "name": "Substance Use Survey", - "field": "substance_use_survey_complete", - "missing_value": missing_value_generic, - "astype": str - }, - "color": { - "name": "Substance Use Survey", - "field": "substance_use_survey_complete", - "missing_value": missing_value_generic, - "astype": str - }, - "value": { - "name": "Count (N)", - "field": "record_id", - "missing_value": missing_value_generic, - "astype": int - } - } - }, { - "name": "CES-D-10 Survey", - "vtype": "SingleCategorical", - "method": { - "groups": ["siteid", "cesd10_survey_complete"], - "value": "record_id", - "func": "count" - }, - "accessors": { - "filterby": { - "name": "Site", - "field": "siteid", - "missing_value": missing_value_generic, - "astype": str - }, - "group": { - "name": "CES-D-10 Survey", - "field": "cesd10_survey_complete", - "missing_value": missing_value_generic, - "astype": str - }, - "color": { - "name": "CES-D-10 Survey", - "field": "cesd10_survey_complete", - "missing_value": missing_value_generic, - "astype": str - }, - "value": { - "name": "Count (N)", - "field": "record_id", - "missing_value": missing_value_generic, - "astype": int - } - } - }, { - "name": "PAID-5 DM Survey", - "vtype": "SingleCategorical", - "method": { - "groups": ["siteid", "paid5_dm_survey_complete"], - "value": "record_id", - "func": "count" - }, - "accessors": { - "filterby": { - "name": "Site", - "field": "siteid", - "missing_value": missing_value_generic, - "astype": str - }, - "group": { - "name": "PAID-5 DM Survey", - "field": "paid5_dm_survey_complete", - "missing_value": missing_value_generic, - "astype": str - }, - "color": { - "name": "PAID-5 DM Survey", - "field": "paid5_dm_survey_complete", - "missing_value": missing_value_generic, - "astype": str - }, - "value": { - "name": "Count (N)", - "field": "record_id", - "missing_value": missing_value_generic, - "astype": int - } - } - }, { - "name": "Diabetes Survey", - "vtype": "SingleCategorical", - "method": { - "groups": ["siteid", "diabetes_survey_complete"], - "value": "record_id", - "func": "count" - }, - "accessors": { - "filterby": { - "name": "Site", - "field": "siteid", - "missing_value": missing_value_generic, - "astype": str - }, - "group": { - "name": "Diabetes Survey", - "field": "diabetes_survey_complete", - "missing_value": missing_value_generic, - "astype": str - }, - "color": { - "name": "Diabetes Survey", - "field": "diabetes_survey_complete", - "missing_value": missing_value_generic, - "astype": str - }, - "value": { - "name": "Count (N)", - "field": "record_id", - "missing_value": missing_value_generic, - "astype": int - } - } - }, { - "name": "Dietary Survey", - "vtype": "SingleCategorical", - "method": { - "groups": ["siteid", "dietary_survey_complete"], - "value": "record_id", - "func": "count" - }, - "accessors": { - "filterby": { - "name": "Site", - "field": "siteid", - "missing_value": missing_value_generic, - "astype": str - }, - "group": { - "name": "Dietary Survey", - "field": "dietary_survey_complete", - "missing_value": missing_value_generic, - "astype": str - }, - "color": { - "name": "Dietary Survey", - "field": "dietary_survey_complete", - "missing_value": missing_value_generic, - "astype": str - }, - "value": { - "name": "Count (N)", - "field": "record_id", - "missing_value": missing_value_generic, - "astype": int - } - } - }, { - "name": "Opthalmic Survey", - "vtype": "SingleCategorical", - "method": { - "groups": ["siteid", "ophthalmic_survey_complete"], - "value": "record_id", - "func": "count" - }, - "accessors": { - "filterby": { - "name": "Site", - "field": "siteid", - "missing_value": missing_value_generic, - "astype": str - }, - "group": { - "name": "Opthalmic Survey", - "field": "ophthalmic_survey_complete", - "missing_value": missing_value_generic, - "astype": str - }, - "color": { - "name": "Opthalmic Survey", - "field": "ophthalmic_survey_complete", - "missing_value": missing_value_generic, - "astype": str - }, - "value": { - "name": "Count (N)", - "field": "record_id", - "missing_value": missing_value_generic, - "astype": int - } - } - }, { - "name": "PhenX SDOH Combined Survey", - "vtype": "SingleCategorical", - "method": { - "groups": ["siteid", "px_sdoh_combined_survey_complete"], - "value": "record_id", - "func": "count" - }, - "accessors": { - "filterby": { - "name": "Site", - "field": "siteid", - "missing_value": missing_value_generic, - "astype": str - }, - "group": { - "name": "PhenX SDOH Combined Survey", - "field": "px_sdoh_combined_survey_complete", - "missing_value": missing_value_generic, - "astype": str - }, - "color": { - "name": "PhenX SDOH Combined Survey", - "field": "px_sdoh_combined_survey_complete", - "missing_value": missing_value_generic, - "astype": str - }, - "value": { - "name": "Count (N)", - "field": "record_id", - "missing_value": missing_value_generic, - "astype": int - } - } - }, { - "name": "PhenX Food Insecurity Survey", - "vtype": "SingleCategorical", - "method": { - "groups": ["siteid", "px_food_insecurity_survey_complete"], - "value": "record_id", - "func": "count" - }, - "accessors": { - "filterby": { - "name": "Site", - "field": "siteid", - "missing_value": missing_value_generic, - "astype": str - }, - "group": { - "name": "PhenX Food Insecurity Survey", - "field": "px_food_insecurity_survey_complete", - "missing_value": missing_value_generic, - "astype": str - }, - "color": { - "name": "PhenX Food Insecurity Survey", - "field": "px_food_insecurity_survey_complete", - "missing_value": missing_value_generic, - "astype": str - }, - "value": { - "name": "Count (N)", - "field": "record_id", - "missing_value": missing_value_generic, - "astype": int - } - } - }, { - "name": "PhenX Neighborhood Environment Survey", - "vtype": "SingleCategorical", - "method": { - "groups": ["siteid", "px_neighborhood_environment_survey_complete"], - "value": "record_id", - "func": "count" - }, - "accessors": { - "filterby": { - "name": "Site", - "field": "siteid", - "missing_value": missing_value_generic, - "astype": str - }, - "group": { - "name": "PhenX Neighborhood Environment Survey", - "field": "px_neighborhood_environment_survey_complete", - "missing_value": missing_value_generic, - "astype": str - }, - "color": { - "name": "PhenX Neighborhood Environment Survey", - "field": "px_neighborhood_environment_survey_complete", - "missing_value": missing_value_generic, - "astype": str - }, - "value": { - "name": "Count (N)", - "field": "record_id", - "missing_value": missing_value_generic, - "astype": int - } - } - }, { - "name": "PhenX Racial and Ethnic Discrimination Survey", - "vtype": "SingleCategorical", - "method": { - "groups": ["siteid", "px_racial_ethnic_discrimination_survey_complete"], - "value": "record_id", - "func": "count" - }, - "accessors": { - "filterby": { - "name": "Site", - "field": "siteid", - "missing_value": missing_value_generic, - "astype": str - }, - "group": { - "name": "PhenX Racial and Ethnic Discrimination Survey", - "field": "px_racial_ethnic_discrimination_survey_complete", - "missing_value": missing_value_generic, - "astype": str - }, - "color": { - "name": "PhenX Racial and Ethnic Discrimination Survey", - "field": "px_racial_ethnic_discrimination_survey_complete", - "missing_value": missing_value_generic, - "astype": str - }, - "value": { - "name": "Count (N)", - "field": "record_id", - "missing_value": missing_value_generic, - "astype": int - } - } - }, { - "name": "Decline Participation Survey", - "vtype": "SingleCategorical", - "method": { - "groups": ["siteid", "decline_participation_survey_complete"], - "value": "record_id", - "func": "count" - }, - "accessors": { - "filterby": { - "name": "Site", - "field": "siteid", - "missing_value": missing_value_generic, - "astype": str - }, - "group": { - "name": "Decline Participation Survey", - "field": "decline_participation_survey_complete", - "missing_value": missing_value_generic, - "astype": str - }, - "color": { - "name": "Decline Participation Survey", - "field": "decline_participation_survey_complete", - "missing_value": missing_value_generic, - "astype": str - }, - "value": { - "name": "Count (N)", - "field": "record_id", - "missing_value": missing_value_generic, - "astype": int - } - } - }, { - "name": "Study Enrollment Survey", - "vtype": "SingleCategorical", - "method": { - "groups": ["siteid", "study_enrollment_complete"], - "value": "record_id", - "func": "count" - }, - "accessors": { - "filterby": { - "name": "Site", - "field": "siteid", - "missing_value": missing_value_generic, - "astype": str - }, - "group": { - "name": "Study Enrollment Survey", - "field": "study_enrollment_complete", - "missing_value": missing_value_generic, - "astype": str - }, - "color": { - "name": "Study Enrollment Survey", - "field": "study_enrollment_complete", - "missing_value": missing_value_generic, - "astype": str - }, - "value": { - "name": "Count (N)", - "field": "record_id", - "missing_value": missing_value_generic, - "astype": int - } - } - }, { - "name": "Driving Record", - "vtype": "SingleCategorical", - "method": { - "groups": ["siteid", "driving_record_complete"], - "value": "record_id", - "func": "count" - }, - "accessors": { - "filterby": { - "name": "Site", - "field": "siteid", - "missing_value": missing_value_generic, - "astype": str - }, - "group": { - "name": "Driving Record", - "field": "driving_record_complete", - "missing_value": missing_value_generic, - "astype": str - }, - "color": { - "name": "Driving Record", - "field": "driving_record_complete", - "missing_value": missing_value_generic, - "astype": str - }, - "value": { - "name": "Count (N)", - "field": "record_id", - "missing_value": missing_value_generic, - "astype": int - } - } - }, { - "name": "Device Distribution", - "vtype": "SingleCategorical", - "method": { - "groups": ["siteid", "device_distribution_complete"], - "value": "record_id", - "func": "count" - }, - "accessors": { - "filterby": { - "name": "Site", - "field": "siteid", - "missing_value": missing_value_generic, - "astype": str - }, - "group": { - "name": "Device Distribution", - "field": "device_distribution_complete", - "missing_value": missing_value_generic, - "astype": str - }, - "color": { - "name": "Device Distribution", - "field": "device_distribution_complete", - "missing_value": missing_value_generic, - "astype": str - }, - "value": { - "name": "Count (N)", - "field": "record_id", - "missing_value": missing_value_generic, - "astype": int - } - } - }, { - "name": "Medications Assessment", - "vtype": "SingleCategorical", - "method": { - "groups": ["siteid", "meds_assessment_complete"], - "value": "record_id", - "func": "count" - }, - "accessors": { - "filterby": { - "name": "Site", - "field": "siteid", - "missing_value": missing_value_generic, - "astype": str - }, - "group": { - "name": "Medications Assessment", - "field": "meds_assessment_complete", - "missing_value": missing_value_generic, - "astype": str - }, - "color": { - "name": "Medications Assessment", - "field": "meds_assessment_complete", - "missing_value": missing_value_generic, - "astype": str - }, - "value": { - "name": "Count (N)", - "field": "record_id", - "missing_value": missing_value_generic, - "astype": int - } - } - }, { - "name": "Physical Assessment", - "vtype": "SingleCategorical", - "method": { - "groups": ["siteid", "physical_assessment_complete"], - "value": "record_id", - "func": "count" - }, - "accessors": { - "filterby": { - "name": "Site", - "field": "siteid", - "missing_value": missing_value_generic, - "astype": str - }, - "group": { - "name": "Physical Assessment", - "field": "physical_assessment_complete", - "missing_value": missing_value_generic, - "astype": str - }, - "color": { - "name": "Physical Assessment", - "field": "physical_assessment_complete", - "missing_value": missing_value_generic, - "astype": str - }, - "value": { - "name": "Count (N)", - "field": "record_id", - "missing_value": missing_value_generic, - "astype": int - } - } - }, { - "name": "BCVA", - "vtype": "SingleCategorical", - "method": { - "groups": ["siteid", "bcva_complete"], - "value": "record_id", - "func": "count" - }, - "accessors": { - "filterby": { - "name": "Site", - "field": "siteid", - "missing_value": missing_value_generic, - "astype": str - }, - "group": { - "name": "BCVA", - "field": "bcva_complete", - "missing_value": missing_value_generic, - "astype": str - }, - "color": { - "name": "BCVA", - "field": "bcva_complete", - "missing_value": missing_value_generic, - "astype": str - }, - "value": { - "name": "Count (N)", - "field": "record_id", - "missing_value": missing_value_generic, - "astype": int - } - } - }, { - "name": "Photopic MARS", - "vtype": "SingleCategorical", - "method": { - "groups": ["siteid", "photopic_mars_complete"], - "value": "record_id", - "func": "count" - }, - "accessors": { - "filterby": { - "name": "Site", - "field": "siteid", - "missing_value": missing_value_generic, - "astype": str - }, - "group": { - "name": "Photopic MARS", - "field": "photopic_mars_complete", - "missing_value": missing_value_generic, - "astype": str - }, - "color": { - "name": "Photopic MARS", - "field": "photopic_mars_complete", - "missing_value": missing_value_generic, - "astype": str - }, - "value": { - "name": "Count (N)", - "field": "record_id", - "missing_value": missing_value_generic, - "astype": int - } - } - }, { - "name": "Mesopic MARS", - "vtype": "SingleCategorical", - "method": { - "groups": ["siteid", "mesopic_mars_complete"], - "value": "record_id", - "func": "count" - }, - "accessors": { - "filterby": { - "name": "Site", - "field": "siteid", - "missing_value": missing_value_generic, - "astype": str - }, - "group": { - "name": "Mesopic MARS", - "field": "mesopic_mars_complete", - "missing_value": missing_value_generic, - "astype": str - }, - "color": { - "name": "Mesopic MARS", - "field": "mesopic_mars_complete", - "missing_value": missing_value_generic, - "astype": str - }, - "value": { - "name": "Count (N)", - "field": "record_id", - "missing_value": missing_value_generic, - "astype": int - } - } - }, { - "name": "Monofilament", - "vtype": "SingleCategorical", - "method": { - "groups": ["siteid", "monofilament_complete"], - "value": "record_id", - "func": "count" - }, - "accessors": { - "filterby": { - "name": "Site", - "field": "siteid", - "missing_value": missing_value_generic, - "astype": str - }, - "group": { - "name": "Monofilament", - "field": "monofilament_complete", - "missing_value": missing_value_generic, - "astype": str - }, - "color": { - "name": "Monofilament", - "field": "monofilament_complete", - "missing_value": missing_value_generic, - "astype": str - }, - "value": { - "name": "Count (N)", - "field": "record_id", - "missing_value": missing_value_generic, - "astype": int - } - } - }, { - "name": "MOCA", - "vtype": "SingleCategorical", - "method": { - "groups": ["siteid", "moca_complete"], - "value": "record_id", - "func": "count" - }, - "accessors": { - "filterby": { - "name": "Site", - "field": "siteid", - "missing_value": missing_value_generic, - "astype": str - }, - "group": { - "name": "MOCA", - "field": "moca_complete", - "missing_value": missing_value_generic, - "astype": str - }, - "color": { - "name": "MOCA", - "field": "moca_complete", - "missing_value": missing_value_generic, - "astype": str - }, - "value": { - "name": "Count (N)", - "field": "record_id", - "missing_value": missing_value_generic, - "astype": int - } - } - }, { - "name": "ECG Survey", - "vtype": "SingleCategorical", - "method": { - "groups": ["siteid", "ecg_complete"], - "value": "record_id", - "func": "count" - }, - "accessors": { - "filterby": { - "name": "Site", - "field": "siteid", - "missing_value": missing_value_generic, - "astype": str - }, - "group": { - "name": "ECG Survey", - "field": "ecg_complete", - "missing_value": missing_value_generic, - "astype": str - }, - "color": { - "name": "ECG Survey", - "field": "ecg_complete", - "missing_value": missing_value_generic, - "astype": str - }, - "value": { - "name": "Count (N)", - "field": "record_id", - "missing_value": missing_value_generic, - "astype": int - } - } - }, { - "name": "Lab Results Survey", - "vtype": "SingleCategorical", - "method": { - "groups": ["siteid", "lab_results_complete"], - "value": "record_id", - "func": "count" - }, - "accessors": { - "filterby": { - "name": "Site", - "field": "siteid", - "missing_value": missing_value_generic, - "astype": str - }, - "group": { - "name": "Lab Results Survey", - "field": "lab_results_complete", - "missing_value": missing_value_generic, - "astype": str - }, - "color": { - "name": "Lab Results Survey", - "field": "lab_results_complete", - "missing_value": missing_value_generic, - "astype": str - }, - "value": { - "name": "Count (N)", - "field": "record_id", - "missing_value": missing_value_generic, - "astype": int - } - } - }, { - "name": "Specimen Management", - "vtype": "SingleCategorical", - "method": { - "groups": ["siteid", "specimen_management_complete"], - "value": "record_id", - "func": "count" - }, - "accessors": { - "filterby": { - "name": "Site", - "field": "siteid", - "missing_value": missing_value_generic, - "astype": str - }, - "group": { - "name": "Specimen Management", - "field": "specimen_management_complete", - "missing_value": missing_value_generic, - "astype": str - }, - "color": { - "name": "Specimen Management", - "field": "specimen_management_complete", - "missing_value": missing_value_generic, - "astype": str - }, - "value": { - "name": "Count (N)", - "field": "record_id", - "missing_value": missing_value_generic, - "astype": int - } - } - }, { - "name": "Device Return", - "vtype": "SingleCategorical", - "method": { - "groups": ["siteid", "device_return_complete"], - "value": "record_id", - "func": "count" - }, - "accessors": { - "filterby": { - "name": "Site", - "field": "siteid", - "missing_value": missing_value_generic, - "astype": str - }, - "group": { - "name": "Device Return", - "field": "device_return_complete", - "missing_value": missing_value_generic, - "astype": str - }, - "color": { - "name": "Device Return", - "field": "device_return_complete", - "missing_value": missing_value_generic, - "astype": str - }, - "value": { - "name": "Count (N)", - "field": "record_id", - "missing_value": missing_value_generic, - "astype": int - } - } - }, { - "name": "Disposition Survey", - "vtype": "SingleCategorical", - "method": { - "groups": ["siteid", "disposition_complete"], - "value": "record_id", - "func": "count" - }, - "accessors": { - "filterby": { - "name": "Site", - "field": "siteid", - "missing_value": missing_value_generic, - "astype": str - }, - "group": { - "name": "Disposition Survey", - "field": "disposition_complete", - "missing_value": missing_value_generic, - "astype": str - }, - "color": { - "name": "Disposition Survey", - "field": "disposition_complete", - "missing_value": missing_value_generic, - "astype": str - }, - "value": { - "name": "Count (N)", - "field": "record_id", - "missing_value": missing_value_generic, - "astype": int - } - } - }, { - "name": "Data Management Survey", - "vtype": "SingleCategorical", - "method": { - "groups": ["siteid", "data_management_complete"], - "value": "record_id", - "func": "count" - }, - "accessors": { - "filterby": { - "name": "Site", - "field": "siteid", - "missing_value": missing_value_generic, - "astype": str - }, - "group": { - "name": "Data Management Survey", - "field": "data_management_complete", - "missing_value": missing_value_generic, - "astype": str - }, - "color": { - "name": "Data Management Survey", - "field": "data_management_complete", - "missing_value": missing_value_generic, - "astype": str - }, - "value": { - "name": "Count (N)", - "field": "record_id", - "missing_value": missing_value_generic, - "astype": int - } - } - }] - }) + # Sex & Gender Counts by Site + mixedTransform = ( + "mixedTransform", + { + "key": "mixed-transform-test", + "strict": True, + "transforms": [ + { + "name": "Sex & Gender", + "vtype": "DoubleCategorical", + "method": { + "groups": ["siteid", "scrsex", "genderid"], + "value": "record_id", + "func": "count", + }, + "accessors": { + "filterby": { + "name": "Site", + "field": "siteid", + "missing_value": missing_value_generic, + }, + "group": { + "name": "Sex", + "field": "scrsex", + "missing_value": missing_value_generic, + "astype": str, + }, + "subgroup": { + "name": "Gender", + "field": "genderid", + "missing_value": missing_value_generic, + "astype": str, + }, + "color": { + "name": "Gender", + "field": "genderid", + "missing_value": missing_value_generic, + "astype": str, + }, + "value": { + "name": "Count (N)", + "field": "record_id", + "missing_value": missing_value_generic, + "astype": int, + }, + }, + }, + { + "name": "Race & Ethnicity", + "vtype": "DoubleCategorical", + "method": { + "groups": ["siteid", "race", "ethnic"], + "value": "record_id", + "func": "count", + }, + "accessors": { + "filterby": { + "name": "Site", + "field": "siteid", + "missing_value": missing_value_generic, + "astype": str, + }, + "group": { + "name": "Race", + "field": "race", + "missing_value": missing_value_generic, + "astype": str, + }, + "subgroup": { + "name": "Ethnicity", + "field": "ethnic", + "missing_value": missing_value_generic, + "astype": str, + }, + "color": { + "name": "Ethnicity", + "field": "ethnic", + "missing_value": missing_value_generic, + "astype": str, + }, + "value": { + "name": "Count (N)", + "field": "record_id", + "missing_value": missing_value_generic, + "astype": int, + }, + }, + }, + { + "name": "Type II Diabetes", + "vtype": "SingleCategorical", + "method": { + "groups": ["siteid", "mhterm_dm2"], + "value": "record_id", + "func": "count", + }, + "accessors": { + "filterby": { + "name": "Site", + "field": "siteid", + "missing_value": missing_value_generic, + "astype": str, + }, + "group": { + "name": "Phenotype", + "field": "mhterm_dm2", + "missing_value": missing_value_generic, + "astype": str, + }, + "color": { + "name": "Phenotype", + "field": "mhterm_dm2", + "missing_value": missing_value_generic, + "astype": str, + }, + "value": { + "name": "Count (N)", + "field": "record_id", + "missing_value": missing_value_generic, + "astype": int, + }, + }, + }, + ], + }, + ) - # Sex & Gender Counts by Site - mixedTransform = ("mixedTransform", { - "key": "mixed-transform-test", - "strict": True, - "transforms": [{ - "name": "Sex & Gender", - "vtype": "DoubleCategorical", - "method": { - "groups": ["siteid", "scrsex", "genderid"], - "value": "record_id", - "func": "count" - }, - "accessors": { - "filterby": { - "name": "Site", - "field": "siteid", - "missing_value": missing_value_generic - }, - "group": { - "name": "Sex", - "field": "scrsex", - "missing_value": missing_value_generic, - "astype": str - }, - "subgroup": { - "name": "Gender", - "field": "genderid", - "missing_value": missing_value_generic, - "astype": str - }, - "color": { - "name": "Gender", - "field": "genderid", - "missing_value": missing_value_generic, - "astype": str - }, - "value": { - "name": "Count (N)", - "field": "record_id", - "missing_value": missing_value_generic, - "astype": int - } - } - }, { - "name": "Race & Ethnicity", - "vtype": "DoubleCategorical", - "method": { - "groups": ["siteid", "race", "ethnic"], - "value": "record_id", - "func": "count" - }, - "accessors": { - "filterby": { - "name": "Site", - "field": "siteid", - "missing_value": missing_value_generic, - "astype": str - }, - "group": { - "name": "Race", - "field": "race", - "missing_value": missing_value_generic, - "astype": str - }, - "subgroup": { - "name": "Ethnicity", - "field": "ethnic", - "missing_value": missing_value_generic, - "astype": str - }, - "color": { - "name": "Ethnicity", - "field": "ethnic", - "missing_value": missing_value_generic, - "astype": str - }, - "value": { - "name": "Count (N)", - "field": "record_id", - "missing_value": missing_value_generic, - "astype": int - } - } - }, { - "name": "Type II Diabetes", - "vtype": "SingleCategorical", - "method": { - "groups": ["siteid", "mhterm_dm2"], - "value": "record_id", - "func": "count" - }, - "accessors": { - "filterby": { - "name": "Site", - "field": "siteid", - "missing_value": missing_value_generic, - "astype": str - }, - "group": { - "name": "Phenotype", - "field": "mhterm_dm2", - "missing_value": missing_value_generic, - "astype": str - }, - "color": { - "name": "Phenotype", - "field": "mhterm_dm2", - "missing_value": missing_value_generic, - "astype": str - }, - "value": { - "name": "Count (N)", - "field": "record_id", - "missing_value": missing_value_generic, - "astype": int - } - } - }] - }) + extract = REDCapTransform(config=redcapTransformConfig).merged - extract = REDCapTransform( - config = redcapTransformConfig - ).merged + extract.to_csv("merged-transform.tsv", sep="\t") - extract.to_csv("merged-transform.tsv", sep = "\t") + transforms = [ + sexGenderTransform, + raceEthnicityTransform, + phenotypeTransform, + studyWaypointsTransform, + mixedTransform, + ] - transforms = [ - sexGenderTransform, - raceEthnicityTransform, - phenotypeTransform, - studyWaypointsTransform, - mixedTransform - ] - - # Print - for module_method, config in transforms: - transformer = getattr(ModuleTransform(config), module_method)(extract) - if type(transformer.transformed) == list: - for record in transformer.transformed: - print(record) - print("\n") - if type(transformer.transformed) == dict: - for key, transform in transformer.transformed.items(): - print(key) - for record in transform: - print(record) + # Print + for module_method, config in transforms: + transformer = getattr(ModuleTransform(config), module_method)(extract) + if type(transformer.transformed) == list: + for record in transformer.transformed: + print(record) + print("\n") + if type(transformer.transformed) == dict: + for key, transform in transformer.transformed.items(): + print(key) + for record in transform: + print(record) else: - pass + pass diff --git a/modules/etl/transforms/module_transform.py b/modules/etl/transforms/module_transform.py index 06a9e203..f65a9b78 100644 --- a/modules/etl/transforms/module_transform.py +++ b/modules/etl/transforms/module_transform.py @@ -3,235 +3,286 @@ from datetime import datetime import logging, re import vtypes + # Third-Party Modules import pandas as pd -class ModuleTransform (object): - def __init__(self: object, config: Dict[str, Dict[str, Any]], logging_config: Dict[str, str] = {}) -> None: - - # - # Logging - # - - # Logging Config Checks - self.logging_config = {} - self.logging_config["encoding"] = logging_config["encoding"] \ - if "encoding" in logging_config \ - else "utf-8" - self.logging_config["filename"] = logging_config["filename"] \ - if "filename" in logging_config \ - else "REDCapETL.log" - self.logging_config["level"] = getattr(logging, logging_config["level"].upper) \ - if "level" in logging_config \ - else logging.DEBUG - - # Configure Logging - logging.basicConfig(**self.logging_config) - self.logger = logging.getLogger("VizModTransform") - - # - # References - # - - self.valid = True - - # - # Visualization Variables - # - - # Flag Indicating Whether to Use Strict Typing on Vtype Mapping - self.strict = config["strict"] \ - if "strict" in config \ - else True - - self.key = config["key"] \ - if "key" in config \ - else None - - self.transforms = config["transforms"] \ - if "transforms" in config \ - else None - - if self.transforms is None: - self.valid = False - raise ValueError(f"ModuleTransform instantiation missing transforms argument") - - # Normalize Transforms List Type, Check Validity, and Warn on Missing Attributes - self.transformList = self.transforms if (type(self.transforms) == list) else [self.transforms] - for transform in enumerate(self.transformList): - self.valid = True if self._transformIsValid(transform) else False - if (self.strict and not self.valid): - raise ValueError(f"{self.key}:Missing properties in transforms argument, see log at {self.logging_config['filename']} for details") - - self.logger.info(f"{self.key}:Initialized") - - return - - def _transformIsValid (self: object, transform: Tuple[int, Dict[str, Any]]) -> bool: - """ - Transform validator - """ - index, transform = transform - valid = True - if "name" not in transform: - self.logger.error(f"{self.key}:Transform at index {index} in transforms list missing name property") - valid = False - if "vtype" not in transform: - self.logger.error(f"{self.key}:Transform at index {index} in transforms list missing vtype property") - valid = False - if "method" not in transform: - self.logger.error(f"{self.key}:Transform at index {index} in transforms list missing method property") - valid = False - if "accessors" not in transform: - self.logger.error(f"{self.key}:Transform at index {index} in transforms list missing accessors property") - valid = False - return valid - - - - def _setValueType (self: object, vtype: Any, record: Dict[str, Any], key: str, accessor: Dict[str, Dict[str, str|Callable]]) -> Any: - """ - Element-wise type setting method. If value of - element is not the missing value, we cast the - value as the type defined for property in the - vtype. - """ - for pname, _ptype in vtype.props: - if pname == key: - # Accessor Typing - ptype = _ptype \ - if "astype" not in accessor \ - else accessor["astype"] - if ptype != _ptype: - self.logger.warning(f"Accessor `{pname}` with type `{ptype}` conflicts with VType definition requiring {_ptype}") - if self.strict: - raise ValueError(f"Accessor `{pname}` with type `{ptype}` conflicts with VType definition requiring {_ptype}") - # Accessor Name - pvalue = record[accessor["field"]] - if pvalue != accessor["missing_value"]: - try: - pvalue = ptype(pvalue) - except (RuntimeError, TypeError) as error: - if self.strict: - self.logger.warning(f"Unable to cast value {record[key]} to {ptype}") - raise error + +class ModuleTransform(object): + def __init__( + self: object, + config: Dict[str, Dict[str, Any]], + logging_config: Dict[str, str] = {}, + ) -> None: + # + # Logging + # + + # Logging Config Checks + self.logging_config = {} + self.logging_config["encoding"] = ( + logging_config["encoding"] if "encoding" in logging_config else "utf-8" + ) + self.logging_config["filename"] = ( + logging_config["filename"] + if "filename" in logging_config + else "REDCapETL.log" + ) + self.logging_config["level"] = ( + getattr(logging, logging_config["level"].upper) + if "level" in logging_config + else logging.DEBUG + ) + + # Configure Logging + logging.basicConfig(**self.logging_config) + self.logger = logging.getLogger("VizModTransform") + + # + # References + # + + self.valid = True + + # + # Visualization Variables + # + + # Flag Indicating Whether to Use Strict Typing on Vtype Mapping + self.strict = config["strict"] if "strict" in config else True + + self.key = config["key"] if "key" in config else None + + self.transforms = config["transforms"] if "transforms" in config else None + + if self.transforms is None: + self.valid = False + raise ValueError( + f"ModuleTransform instantiation missing transforms argument" + ) + + # Normalize Transforms List Type, Check Validity, and Warn on Missing Attributes + self.transformList = ( + self.transforms if (type(self.transforms) == list) else [self.transforms] + ) + for transform in enumerate(self.transformList): + self.valid = True if self._transformIsValid(transform) else False + if self.strict and not self.valid: + raise ValueError( + f"{self.key}:Missing properties in transforms argument, see log at {self.logging_config['filename']} for details" + ) + + self.logger.info(f"{self.key}:Initialized") + + return + + def _transformIsValid(self: object, transform: Tuple[int, Dict[str, Any]]) -> bool: + """ + Transform validator + """ + index, transform = transform + valid = True + if "name" not in transform: + self.logger.error( + f"{self.key}:Transform at index {index} in transforms list missing name property" + ) + valid = False + if "vtype" not in transform: + self.logger.error( + f"{self.key}:Transform at index {index} in transforms list missing vtype property" + ) + valid = False + if "method" not in transform: + self.logger.error( + f"{self.key}:Transform at index {index} in transforms list missing method property" + ) + valid = False + if "accessors" not in transform: + self.logger.error( + f"{self.key}:Transform at index {index} in transforms list missing accessors property" + ) + valid = False + return valid + + def _setValueType( + self: object, + vtype: Any, + record: Dict[str, Any], + key: str, + accessor: Dict[str, Dict[str, str | Callable]], + ) -> Any: + """ + Element-wise type setting method. If value of + element is not the missing value, we cast the + value as the type defined for property in the + vtype. + """ + for pname, _ptype in vtype.props: + if pname == key: + # Accessor Typing + ptype = _ptype if "astype" not in accessor else accessor["astype"] + if ptype != _ptype: + self.logger.warning( + f"Accessor `{pname}` with type `{ptype}` conflicts with VType definition requiring {_ptype}" + ) + if self.strict: + raise ValueError( + f"Accessor `{pname}` with type `{ptype}` conflicts with VType definition requiring {_ptype}" + ) + # Accessor Name + pvalue = record[accessor["field"]] + if pvalue != accessor["missing_value"]: + try: + pvalue = ptype(pvalue) + except (RuntimeError, TypeError) as error: + if self.strict: + self.logger.warning( + f"Unable to cast value {record[key]} to {ptype}" + ) + raise error + else: + self.logger.warning( + f"Unable to cast value {record[key]} to {ptype}" + ) + continue + + return pvalue + + def simpleTransform(self: object, df: pd.DataFrame) -> object: + """ + Performs a pd.DataFrame.groupby transform. The + df is first subset to the relevant fields. A + groupby function is then applied to the subset + to create a multi-index (hierarchy) by the + groups. An aggregate function is then applied + to the non-grouped column (e.g. count, sum). + + One transform for one VType. + """ + transform = self.transformList.pop() + name, _vtype, method, accessors = ( + transform["name"], + transform["vtype"], + transform["method"], + transform["accessors"], + ) + vtype = getattr(vtypes, _vtype)() + + self.transformed = [] + if vtype.isvalid(df, accessors): + temp = df[ + list(set(accessor["field"] for key, accessor in accessors.items())) + ] + groups, value, func = method["groups"], method["value"], method["func"] + grouped = temp.groupby(groups, as_index=False) + transformed = getattr(grouped, func)() + + for record in transformed.to_dict("records"): + record = { + key: self._setValueType(vtype, record, key, accessor) + for key, accessor in accessors.items() + } + record = {"name": name} | record + self.transformed.append(record) + + else: + for error in vtype.validation_errors: + self.logger.warning(f"{error}") + + return self + + def compoundTransform(self: object, df: pd.DataFrame) -> object: + """ + For each transform, performs a pd.DataFrame.groupby + transform. The df is first subset to the relevant + fields. A groupby function is then applied to the + subset to create a multi-index (hierarchy) by the + groups. An aggregate function is then applied to the + non-grouped column (e.g. count, sum). + + All transforms are combined into a single flat + transform. Transforms must be identical VType, + e.g. [transformA, transformB, ...] + """ + self.transformed = [] + + for transform in self.transformList: + name, vtype, method, accessors = ( + transform["name"], + getattr(vtypes, transform["vtype"])(), + transform["method"], + transform["accessors"], + ) + if vtype.isvalid(df, accessors): + temp = df[ + list(set(accessor["field"] for key, accessor in accessors.items())) + ] + groups, value, func = method["groups"], method["value"], method["func"] + grouped = temp.groupby(groups, as_index=False) + transformed = getattr(grouped, func)() + + for record in transformed.to_dict("records"): + record = { + key: self._setValueType(vtype, record, key, accessor) + for key, accessor in accessors.items() + } + record = {"name": name} | record + self.transformed.append(record) + else: - self.logger.warning(f"Unable to cast value {record[key]} to {ptype}") - continue - - return pvalue - - def simpleTransform (self: object, df: pd.DataFrame) -> object: - """ - Performs a pd.DataFrame.groupby transform. The - df is first subset to the relevant fields. A - groupby function is then applied to the subset - to create a multi-index (hierarchy) by the - groups. An aggregate function is then applied - to the non-grouped column (e.g. count, sum). - - One transform for one VType. - """ - transform = self.transformList.pop() - name, _vtype, method, accessors = transform["name"], transform["vtype"], transform["method"], transform["accessors"] - vtype = getattr(vtypes, _vtype)() - - self.transformed = [] - if vtype.isvalid(df, accessors): - temp = df[list(set(accessor["field"] for key, accessor in accessors.items()))] - groups, value, func = method["groups"], method["value"], method["func"] - grouped = temp.groupby(groups, as_index = False) - transformed = getattr(grouped, func)() - - for record in transformed.to_dict("records"): - record = {key: self._setValueType(vtype, record, key, accessor) for key, accessor in accessors.items()} - record = {"name": name} | record - self.transformed.append(record) - - else: - - for error in vtype.validation_errors: - self.logger.warning(f"{error}") - - return self - - def compoundTransform (self: object, df: pd.DataFrame) -> object: - """ - For each transform, performs a pd.DataFrame.groupby - transform. The df is first subset to the relevant - fields. A groupby function is then applied to the - subset to create a multi-index (hierarchy) by the - groups. An aggregate function is then applied to the - non-grouped column (e.g. count, sum). - - All transforms are combined into a single flat - transform. Transforms must be identical VType, - e.g. [transformA, transformB, ...] - """ - self.transformed = [] - - for transform in self.transformList: - - name, vtype, method, accessors = transform["name"], getattr(vtypes, transform["vtype"])(), transform["method"], transform["accessors"] - if vtype.isvalid(df, accessors): - temp = df[list(set(accessor["field"] for key, accessor in accessors.items()))] - groups, value, func = method["groups"], method["value"], method["func"] - grouped = temp.groupby(groups, as_index = False) - transformed = getattr(grouped, func)() - - for record in transformed.to_dict("records"): - record = {key: self._setValueType(vtype, record, key, accessor) for key, accessor in accessors.items()} - record = {"name": name} | record - self.transformed.append(record) - - else: - - for error in vtype.validation_errors: - self.logger.warning(f"{error}") - - return self - - def mixedTransform (self: object, df: pd.DataFrame) -> object: - """ - For each transform, performs a pd.DataFrame.groupby - transform. The df is first subset to the relevant - fields. A groupby function is then applied to the - subset to create a multi-index (hierarchy) by the - groups. An aggregate function is then applied to the - non-grouped column (e.g. count, sum). - - Transforms are kept distinct inserted into a dictionary, - e.g. {nameA: transformA, nameB: transformB, ...}. - Transforms can be heterogenous VTypes. - """ - self.transformed = {} - - for transform in self.transformList: - - name, vtype, method, accessors = transform["name"], getattr(vtypes, transform["vtype"])(), transform["method"], transform["accessors"] - if vtype.isvalid(df, accessors): - temp = df[list(set(accessor["field"] for key, accessor in accessors.items()))] - groups, value, func = method["groups"], method["value"], method["func"] - grouped = temp.groupby(groups, as_index = False) - transformed = getattr(grouped, func)() - - subtransform = [] - for record in transformed.to_dict("records"): - record = {key: self._setValueType(vtype, record, key, accessor) for key, accessor in accessors.items()} - record = {"name": name} | record - subtransform.append(record) - self.transformed[name] = subtransform - - else: - - for error in vtype.validation_errors: - self.logger.warning(f"{error}") - - return self + for error in vtype.validation_errors: + self.logger.warning(f"{error}") + + return self + + def mixedTransform(self: object, df: pd.DataFrame) -> object: + """ + For each transform, performs a pd.DataFrame.groupby + transform. The df is first subset to the relevant + fields. A groupby function is then applied to the + subset to create a multi-index (hierarchy) by the + groups. An aggregate function is then applied to the + non-grouped column (e.g. count, sum). + + Transforms are kept distinct inserted into a dictionary, + e.g. {nameA: transformA, nameB: transformB, ...}. + Transforms can be heterogenous VTypes. + """ + self.transformed = {} + + for transform in self.transformList: + name, vtype, method, accessors = ( + transform["name"], + getattr(vtypes, transform["vtype"])(), + transform["method"], + transform["accessors"], + ) + if vtype.isvalid(df, accessors): + temp = df[ + list(set(accessor["field"] for key, accessor in accessors.items())) + ] + groups, value, func = method["groups"], method["value"], method["func"] + grouped = temp.groupby(groups, as_index=False) + transformed = getattr(grouped, func)() + + subtransform = [] + for record in transformed.to_dict("records"): + record = { + key: self._setValueType(vtype, record, key, accessor) + for key, accessor in accessors.items() + } + record = {"name": name} | record + subtransform.append(record) + self.transformed[name] = subtransform + + else: + for error in vtype.validation_errors: + self.logger.warning(f"{error}") + + return self if __name__ == "__main__": - pass + pass else: - pass - + pass diff --git a/modules/etl/transforms/redcap_transform.py b/modules/etl/transforms/redcap_transform.py index 667c7444..d5e39983 100644 --- a/modules/etl/transforms/redcap_transform.py +++ b/modules/etl/transforms/redcap_transform.py @@ -19,52 +19,54 @@ def __init__(self, config: dict) -> None: self.redcap_api_key = config["redcap_api_key"] # Set Transform Key - self.key = config["key"] \ - if "key" in config \ - else "redcap-transform" + self.key = config["key"] if "key" in config else "redcap-transform" # Data Config - self.index_columns = config["index_columns"] \ - if "index_columns" in config \ - else ["record_id"] + self.index_columns = ( + config["index_columns"] if "index_columns" in config else ["record_id"] + ) # REDCap Reports Config - self.reports_configs = config["reports"] \ - if "reports" in config \ - else [] + self.reports_configs = config["reports"] if "reports" in config else [] # Report Merging - self.merge_transformed_reports = config["merge_transformed_reports"] \ - if "merge_transformed_reports" in config \ + self.merge_transformed_reports = ( + config["merge_transformed_reports"] + if "merge_transformed_reports" in config else [] + ) # Post Merge Transforms - self.post_merge_transforms = config["post_merge_transforms"] \ - if "post_merge_transforms" in config \ - else [] + self.post_merge_transforms = ( + config["post_merge_transforms"] if "post_merge_transforms" in config else [] + ) # Column Value Separator - self.multivalue_separator = config["multivalue_separator"] \ - if "multivalue_separator" in config \ - else "|" + self.multivalue_separator = ( + config["multivalue_separator"] if "multivalue_separator" in config else "|" + ) # CSV Float Format (Default: "%.2f") - self.csv_float_format = config["csv_float_format"] \ - if "csv_float_format" in config \ - else "%.2f" + self.csv_float_format = ( + config["csv_float_format"] if "csv_float_format" in config else "%.2f" + ) - self.missing_value_generic = config["missing_value_generic"] \ - if "missing_value_generic" in config \ + self.missing_value_generic = ( + config["missing_value_generic"] + if "missing_value_generic" in config else "Value Unavailable" + ) # Logging Config - self.logging_config = config["logging_config"] \ - if "logging_config" in config \ + self.logging_config = ( + config["logging_config"] + if "logging_config" in config else { "encoding": "utf-8", "filename": "REDCapETL.log", "level": logging.DEBUG, } + ) # Configure Logging logging.basicConfig(**self.logging_config) @@ -94,13 +96,22 @@ def __init__(self, config: dict) -> None: self._field_rgx["calc"] = None # General Parsing Variables - self.none_values = [np.nan, pd.NaT, None, "nan", "NaN", "-", "", self.missing_value_generic] + self.none_values = [ + np.nan, + pd.NaT, + None, + "nan", + "NaN", + "-", + "", + self.missing_value_generic, + ] self.none_map = {key: self.missing_value_generic for key in self.none_values} self.survey_instrument_map = { "2": "Complete", "1": "Unverified", "0": "Incomplete", - "" : self.missing_value_generic + "": self.missing_value_generic, } self.logger.info(f"Initialized") @@ -451,9 +462,17 @@ def remap_values_by_columns( # Transform - Map Missing Values By Columns # - def _map_missing_values_by_columns (self: object, df: pd.DataFrame, columns: List[str], missing_value: Any = None, annotation: List[Dict[str, Any]] = []) -> pd.DataFrame: - columns = self._resolve_columns_with_dataframe(df = df, columns = columns) - missing_value = missing_value if missing_value is not None else self.missing_value_generic + def _map_missing_values_by_columns( + self: object, + df: pd.DataFrame, + columns: List[str], + missing_value: Any = None, + annotation: List[Dict[str, Any]] = [], + ) -> pd.DataFrame: + columns = self._resolve_columns_with_dataframe(df=df, columns=columns) + missing_value = ( + missing_value if missing_value is not None else self.missing_value_generic + ) for column in columns: for i, value in enumerate(df[column]): if (len(str(value)) == 0) or (value in self.none_map.keys()): @@ -464,12 +483,16 @@ def _map_missing_values_by_columns (self: object, df: pd.DataFrame, columns: Lis return df @classmethod - def map_missing_values_by_columns (self: object, df: pd.DataFrame, columns: List[str], missing_value: Any) -> pd.DataFrame: + def map_missing_values_by_columns( + self: object, df: pd.DataFrame, columns: List[str], missing_value: Any + ) -> pd.DataFrame: """ Replace 0-length values or values with keys in self.none_map with self.missing_value_generic. """ - return self._map_missing_values_by_columns(df = df, columns = columns, missing_value = missing_value) + return self._map_missing_values_by_columns( + df=df, columns=columns, missing_value=missing_value + ) # # Transforms - Rows @@ -510,15 +533,21 @@ def drop_rows( # Transforms - Aggregate Repeat Instruments by Index # - def _aggregate_repeat_instrument_column_by_index (self: object, df: pd.DataFrame, aggregator: str = "max", dtype: Callable = float, annotation: List[Dict[str, Any]] = []) -> pd.DataFrame: + def _aggregate_repeat_instrument_column_by_index( + self: object, + df: pd.DataFrame, + aggregator: str = "max", + dtype: Callable = float, + annotation: List[Dict[str, Any]] = [], + ) -> pd.DataFrame: new_columns = df["redcap_repeat_instrument"].unique() pivot = pd.pivot_table( df, - index = self.index_columns, - columns = ["redcap_repeat_instrument"], - values = "redcap_repeat_instance", - aggfunc = aggregator, - fill_value = self.missing_value_generic + index=self.index_columns, + columns=["redcap_repeat_instrument"], + values="redcap_repeat_instance", + aggfunc=aggregator, + fill_value=self.missing_value_generic, ) df = df.merge(pivot, how="inner", on=self.index_columns) df = df.drop_duplicates(self.index_columns, keep="first") @@ -527,7 +556,9 @@ def _aggregate_repeat_instrument_column_by_index (self: object, df: pd.DataFrame return df @classmethod - def aggregate_repeat_instrument_by_index (self: object, df: pd.DataFrame, aggregator: str = "max", dtype: Callable = float) -> pd.DataFrame: + def aggregate_repeat_instrument_by_index( + self: object, df: pd.DataFrame, aggregator: str = "max", dtype: Callable = float + ) -> pd.DataFrame: """ Pre-processing REDCap repeat_instrument so each instrument has its own column and the value. The value is computed