From f37a654c155417bd0219ca57aee0b356e108e221 Mon Sep 17 00:00:00 2001 From: Greenstick Date: Thu, 7 Nov 2024 15:43:01 -0800 Subject: [PATCH] feat: year 2 dashboard updates --- apis/dashboard.py | 4 +- apis/redcap.py | 83 ---- modules/etl/config/aireadi_config.py | 396 ++++++++++++++++-- modules/etl/transforms/module_transform.py | 2 +- .../etl/transforms/redcap_live_transform.py | 17 +- .../transforms/redcap_release_transform.py | 17 +- modules/etl/vtypes/vtype.py | 18 - 7 files changed, 372 insertions(+), 165 deletions(-) diff --git a/apis/dashboard.py b/apis/dashboard.py index 061abf5a..70c60d1b 100644 --- a/apis/dashboard.py +++ b/apis/dashboard.py @@ -387,7 +387,7 @@ def get(self, study_id: str, dashboard_id: str): if not is_granted("view", study): return "Access denied, you can not view this dashboard", 403 - # Retrieve Dashboard Redis Cache if Available + # # Retrieve Dashboard Redis Cache if Available # cached_redcap_project_dashboard = caching.cache.get( # f"$study_id#{study_id}$dashboard_id#{dashboard_id}" # ) @@ -657,7 +657,7 @@ def get(self, study_id: str): # Public Dashboard ID dashboard_id = redcap_project_dashboard["id"] - # Retrieve Dashboard Redis Cache if Available + # # Retrieve Dashboard Redis Cache if Available # cached_redcap_project_dashboard = caching.cache.get( # f"$study_id#{study_id}$dashboard_id#{dashboard_id}#public" # ) diff --git a/apis/redcap.py b/apis/redcap.py index 7c24d120..edb3d65e 100644 --- a/apis/redcap.py +++ b/apis/redcap.py @@ -247,86 +247,3 @@ def delete(self, study_id: str, redcap_id: str): model.db.session.commit() return 204 - -# @api.route("/study//redcap") -# class EditRedcapProjectAPI(Resource): -# @api.doc(parser=project_parser) -# @api.response(200, "Success") -# @api.response(400, "Validation Error") -# @api.marshal_with(redcap_project_api_view_model) -# def put(self, study_id: int): -# """Update REDCap project API link""" -# study = model.Study.query.get(study_id) -# if not is_granted("update_redcap", study): -# return "Access denied, you can not modify this redcap project", 403 -# # Schema validation -# data: Union[Any, dict] = request.json -# schema = { -# "type": "object", -# "additionalProperties": False, -# "required": [ -# "api_pid", -# "title", -# "api_url", -# "api_active", -# ], -# "properties": { -# "api_pid": {"type": "string", "minLength": 1, "maxLength": 12}, -# "title": {"type": "string", "minLength": 1}, -# "api_url": {"type": "string", "minLength": 1}, -# "api_active": {"type": "boolean"}, -# }, -# } -# try: -# validate(request.json, schema) -# except ValidationError as e: -# return e.message, 400 - -# if len(data["api_pid"]) < 1: -# return ( -# f"""redcap api_pid is required for redcap access: -# {data['api_pid']}""", -# 400, -# ) -# if len(data["title"]) < 1: -# return ( -# f"""redcap title is required for redcap access: -# {data['title']}""", -# 400, -# ) -# if len(data["api_url"]) < 1: -# return ( -# f"""redcap api_url is required for redcap access: -# {data['api_url']}""", -# 400, -# ) -# if not isinstance(data["api_active"], bool): -# return ( -# f"""redcap api_active is required for redcap access: -# {data['api_active']}""", -# 400, -# ) -# update_redcap_project_view = model.StudyRedcap.query.get( -# data["api_pid"] -# ) -# update_redcap_project_view.update(data) -# model.db.session.commit() -# update_redcap_project_view = update_redcap_project_view.to_dict() -# return update_redcap_project_view, 201 - - -# @api.route("/study//redcap") -# class DeleteRedcapProjectAPI(Resource): -# @api.doc(parser=project_parser) -# @api.response(200, "Success") -# @api.response(400, "Validation Error") -# @api.marshal_with(redcap_project_api_view_model) -# def delete(self, study_id: int): -# """Delete REDCap project API link""" -# study = model.Study.query.get(study_id) -# if not is_granted("delete_redcap", study): -# return "Access denied, you can not delete this redcap project", 403 -# api_pid = project_parser.parse_args()["api_pid"] -# model.StudyRedcap.query.filter_by(api_pid=api_pid).delete() -# model.db.session.commit() -# return 204 diff --git a/modules/etl/config/aireadi_config.py b/modules/etl/config/aireadi_config.py index 7ab320a5..419fecb1 100644 --- a/modules/etl/config/aireadi_config.py +++ b/modules/etl/config/aireadi_config.py @@ -49,7 +49,12 @@ "cmtrt_insln", "cmtrt_glcs", "cmtrt_lfst", - "dricmpdat", + "pacmpdat", +] + +phase_2_columns: List = [ + "race_db", + "export_group", ] computed_columns: List = [ @@ -119,7 +124,7 @@ "2": "Complete", "1": "Unverified", "0": "Incomplete", - "": "Value Unavailable", + "": missing_value_generic, } phenotypes_column_map: Dict[str, str] = { @@ -128,6 +133,21 @@ # "mh_a1c": "Elevated A1C", } +race_db_map: Dict[str, str] = { + "white": "White", + "black": "Black", + "hispanic": "Hispanic or Latino", + "asian": "Asian", + "unknown": "Unknown", + "": "Value Unavailable", + "Value Unavailable": "Value Unavailable", +} + +export_group_map: Dict[str, str] = { + "pilot": "Pilot", + "year2": "Year 2", +} + # sex_column_map: Dict[str, str] = { # "M": "Male", # "F": "Female", @@ -173,7 +193,7 @@ { "key": "participant-list", "filepath": "AI-READI/REDCap", - "filename": "Redcap_data_report_247884.csv", + "filename": "Redcap_data_report_307916.csv", "kwdargs": { "raw_or_label": "raw", "raw_or_label_headers": "raw", @@ -186,7 +206,7 @@ { "key": "participant-values", "filepath": "AI-READI/REDCap", - "filename": "Redcap_data_report_242544.csv", + "filename": "Redcap_data_report_307918.csv", "kwdargs": { "raw_or_label": "raw", "raw_or_label_headers": "raw", @@ -196,11 +216,13 @@ }, "transforms": [ ("remap_values_by_columns", {"columns": data_columns}), - ("map_missing_values_by_columns", {"columns": data_columns}), + ("remap_values_by_columns", {"columns": ["export_group"], "value_map": export_group_map}), + ("remap_values_by_columns", {"columns": ["race_db"], "value_map": race_db_map}), + ("map_missing_values_by_columns", {"columns": data_columns + phase_2_columns}), ( "transform_values_by_column", { - "column": "dricmpdat", + "column": "pacmpdat", "new_column_name": "visitweek", # ISO 8601 string format token for front-end: %V "transform": lambda x: datetime.strptime(x, "%Y-%m-%d").isocalendar().week, @@ -210,7 +232,7 @@ ( "transform_values_by_column", { - "column": "dricmpdat", + "column": "pacmpdat", "new_column_name": "visityear", # ISO 8601 string format token for front-end: %Y "transform": lambda x: datetime.strptime(x, "%Y-%m-%d").isocalendar().year, @@ -220,7 +242,7 @@ ( "transform_values_by_column", { - "column": "dricmpdat", + "column": "pacmpdat", "new_column_name": "visitdate", # ISO 8601 string format token for front-end: %Y "transform": lambda x: datetime.strptime(x, "%Y-%m-%d"), @@ -247,14 +269,14 @@ ), ( "keep_columns", - {"columns": index_columns + data_columns + computed_columns}, + {"columns": index_columns + data_columns + computed_columns + phase_2_columns}, ), ], }, { "key": "instrument-status", "filepath": "AI-READI/REDCap", - "filename": "Redcap_data_report_251954.csv", + "filename": "Redcap_data_report_307920.csv", "kwdargs": { "raw_or_label": "raw", "raw_or_label_headers": "raw", @@ -274,7 +296,7 @@ { "key": "repeat-instrument", "filepath": "AI-READI/REDCap", - "filename": "Redcap_data_report_259920.csv", + "filename": "Redcap_data_report_307922.csv", "kwdargs": { "raw_or_label": "raw", "raw_or_label_headers": "raw", @@ -325,7 +347,7 @@ { "key": "participant-list", "filepath": "AI-READI/REDCap", - "filename": "Redcap_data_report_247884.csv", + "filename": "Redcap_data_report_307916.csv", "kwdargs": { "raw_or_label": "raw", "raw_or_label_headers": "raw", @@ -338,7 +360,7 @@ { "key": "participant-values", "filepath": "AI-READI/REDCap", - "filename": "Redcap_data_report_242544.csv", + "filename": "Redcap_data_report_307918.csv", "kwdargs": { "raw_or_label": "raw", "raw_or_label_headers": "raw", @@ -348,11 +370,13 @@ }, "transforms": [ ("remap_values_by_columns", {"columns": data_columns}), - ("map_missing_values_by_columns", {"columns": data_columns}), + ("remap_values_by_columns", {"columns": ["export_group"], "value_map": export_group_map}), + ("remap_values_by_columns", {"columns": ["race_db"], "value_map": race_db_map}), + ("map_missing_values_by_columns", {"columns": data_columns + phase_2_columns}), ( "transform_values_by_column", { - "column": "dricmpdat", + "column": "pacmpdat", "new_column_name": "visitweek", # ISO 8601 string format token for front-end: %V "transform": lambda x: datetime.strptime(x, "%Y-%m-%d").isocalendar().week, @@ -362,7 +386,7 @@ ( "transform_values_by_column", { - "column": "dricmpdat", + "column": "pacmpdat", "new_column_name": "visityear", # ISO 8601 string format token for front-end: %Y "transform": lambda x: datetime.strptime(x, "%Y-%m-%d").isocalendar().year, @@ -372,7 +396,7 @@ ( "transform_values_by_column", { - "column": "dricmpdat", + "column": "pacmpdat", "new_column_name": "visitdate", # ISO 8601 string format token for front-end: %Y "transform": lambda x: datetime.strptime(x, "%Y-%m-%d"), @@ -399,14 +423,14 @@ ), ( "keep_columns", - {"columns": index_columns + data_columns + computed_columns}, + {"columns": index_columns + data_columns + computed_columns + phase_2_columns}, ), ], }, { "key": "instrument-status", "filepath": "AI-READI/REDCap", - "filename": "Redcap_data_report_251954.csv", + "filename": "Redcap_data_report_307920.csv", "kwdargs": { "raw_or_label": "raw", "raw_or_label_headers": "raw", @@ -426,7 +450,7 @@ { "key": "repeat-instrument", "filepath": "AI-READI/REDCap", - "filename": "Redcap_data_report_259920.csv", + "filename": "Redcap_data_report_307922.csv", "kwdargs": { "raw_or_label": "raw", "raw_or_label_headers": "raw", @@ -1466,6 +1490,103 @@ }, ) +# Phenotype Recruitment Counts by Phase +phenotypeRecruitmentByPhaseTransformConfig: Tuple[str, Dict[str, Any]] = ( + "simpleTransform", + { + "key": "phenotype-recruitment-by-phase", + "strict": True, + "transforms": [ + { + "name": "Phenotype Recruitment by Phase", + "vtype": "DoubleDiscreteTimeseries", + "methods": [ + { + "groups": ["export_group", "phenotypes", "visitdate"], + "value": "record_id", + "func": "count", + } + ], + "accessors": { + "filterby": { + "name": "Phase", + "field": "export_group", + "missing_value": missing_value_generic, + "astype": str, + }, + "group": { + "name": "Phenotype", + "field": "phenotypes", + "missing_value": missing_value_generic, + "astype": str, + }, + "x": { + "name": "Week of the Year", + "field": "visitdate", + "missing_value": missing_value_generic, + "astype": str, + }, + "y": { + "name": "Cumulative Count (N)", + "field": "record_id", + "missing_value": missing_value_generic, + "astype": int, + }, + }, + }, + ], + }, +) + +# Race Recruitment Counts by Phase +raceRecruitmentByPhaseTransformConfig: Tuple[str, Dict[str, Any]] = ( + "simpleTransform", + { + "key": "race-recruitment-by-phase", + "strict": True, + "transforms": [ + { + "name": "Race Recruitment by Phase", + "vtype": "DoubleDiscreteTimeseries", + "methods": [ + { + "groups": ["export_group", "race_db", "visitdate"], + "value": "record_id", + "func": "count", + } + ], + "accessors": { + "filterby": { + "name": "Phase", + "field": "export_group", + "missing_value": missing_value_generic, + "astype": str, + }, + "group": { + "name": "Race", + "field": "race_db", + "missing_value": missing_value_generic, + "astype": str, + }, + "x": { + "name": "Week of the Year", + "field": "visitdate", + "missing_value": missing_value_generic, + "astype": str, + }, + "y": { + "name": "Cumulative Count (N)", + "field": "record_id", + "missing_value": missing_value_generic, + "astype": int, + }, + }, + }, + ], + }, +) + + # Race Recruitment Counts raceRecruitmentTransformConfig: Tuple[str, Dict[str, Any]] = ( "simpleTransform", @@ -1478,7 +1599,7 @@ "vtype": "DoubleDiscreteTimeseries", "methods": [ { - "groups": ["race", "visitdate"], + "groups": ["race_db", "visitdate"], "value": "record_id", "func": "count", } @@ -1486,13 +1607,13 @@ "accessors": { "filterby": { "name": "Race", - "field": "race", + "field": "race_db", "missing_value": missing_value_generic, "astype": str, }, "group": { "name": "Race", - "field": "race", + "field": "race_db", "missing_value": missing_value_generic, "astype": str, }, @@ -1526,7 +1647,7 @@ "vtype": "DoubleDiscreteTimeseries", "methods": [ { - "groups": ["siteid", "race", "visitdate"], + "groups": ["siteid", "race_db", "visitdate"], "value": "record_id", "func": "count", } @@ -1540,7 +1661,7 @@ }, "group": { "name": "Race", - "field": "race", + "field": "race_db", "missing_value": missing_value_generic, "astype": str, }, @@ -1658,6 +1779,56 @@ }, ) +# Sex Counts by Phase +sexRecruitmentByPhaseTransformConfig: Tuple[str, Dict[str, Any]] = ( + "simpleTransform", + { + "key": "sex-recruitment-by-phase", + "strict": True, + "transforms": [ + { + "name": "Sex Recruitment by Phase", + "vtype": "DoubleDiscreteTimeseries", + "methods": [ + { + "groups": ["export_group", "scrsex", "visitdate"], + "value": "record_id", + "func": "count", + } + ], + "accessors": { + "filterby": { + "name": "Phase", + "field": "export_group", + "missing_value": missing_value_generic, + "astype": str, + }, + "group": { + "name": "Sex", + "field": "scrsex", + "missing_value": missing_value_generic, + "astype": str, + }, + "x": { + "name": "Week of the Year", + "field": "visitdate", + "missing_value": missing_value_generic, + "astype": str, + }, + "y": { + "name": "Cumulative Count (N)", + "field": "record_id", + "missing_value": missing_value_generic, + "astype": int, + }, + }, + }, + ], + }, +) + + + # Race & Sex Counts by Race raceSexBySiteTransformConfig: Tuple[str, Dict[str, Any]] = ( "simpleTransform", @@ -1670,7 +1841,7 @@ "vtype": "DoubleCategorical", "methods": [ { - "groups": ["scrsex", "race", "siteid"], + "groups": ["scrsex", "race_db", "siteid"], "value": "record_id", "func": "count", } @@ -1689,7 +1860,7 @@ }, "subgroup": { "name": "Race", - "field": "race", + "field": "race_db", "missing_value": missing_value_generic, "astype": str, }, @@ -1811,7 +1982,7 @@ "vtype": "DoubleCategorical", "methods": [ { - "groups": ["phenotypes", "race", "scrsex"], + "groups": ["phenotypes", "race_db", "scrsex"], "value": "record_id", "func": "count", } @@ -1830,7 +2001,54 @@ }, "subgroup": { "name": "Race", - "field": "race", + "field": "race_db", + "missing_value": missing_value_generic, + "astype": str, + }, + "value": { + "name": "Count (N)", + "field": "record_id", + "missing_value": missing_value_generic, + "astype": int, + }, + }, + }, + ], + }, +) + +# Phenotype & Race Counts by Phase +phenotypeRaceByPhaseTransformConfig: Tuple[str, Dict[str, Any]] = ( + "simpleTransform", + { + "key": "phenotype-race-by-phase", + "strict": True, + "transforms": [ + { + "name": "Phenotype & Race by Sex", + "vtype": "DoubleCategorical", + "methods": [ + { + "groups": ["phenotypes", "race_db", "export_group"], + "value": "record_id", + "func": "count", + } + ], + "accessors": { + "filterby": { + "name": "Phase", + "field": "export_group", + "missing_value": missing_value_generic, + }, + "group": { + "name": "Phenotype", + "field": "phenotypes", + "missing_value": missing_value_generic, + "astype": str, + }, + "subgroup": { + "name": "Race", + "field": "race_db", "missing_value": missing_value_generic, "astype": str, }, @@ -1858,7 +2076,7 @@ "vtype": "DoubleCategorical", "methods": [ { - "groups": ["phenotypes", "race", "scrsex"], + "groups": ["phenotypes", "race_db", "scrsex"], "value": "record_id", "func": "count", } @@ -1866,7 +2084,7 @@ "accessors": { "filterby": { "name": "Race", - "field": "race", + "field": "race_db", "missing_value": missing_value_generic, }, "group": { @@ -1905,7 +2123,7 @@ "vtype": "DoubleCategorical", "methods": [ { - "groups": ["phenotypes", "race", "scrsex"], + "groups": ["phenotypes", "race_db", "scrsex"], "value": "record_id", "func": "count", } @@ -1913,7 +2131,7 @@ "accessors": { "filterby": { "name": "Race", - "field": "race", + "field": "race_db", "missing_value": missing_value_generic, }, "group": { @@ -1952,7 +2170,7 @@ "vtype": "DoubleCategorical", "methods": [ { - "groups": ["phenotypes", "race", "scrsex"], + "groups": ["phenotypes", "race_db", "scrsex"], "value": "record_id", "func": "count", } @@ -1971,7 +2189,54 @@ }, "subgroup": { "name": "Race", - "field": "race", + "field": "race_db", + "missing_value": missing_value_generic, + "astype": str, + }, + "value": { + "name": "Count (N)", + "field": "record_id", + "missing_value": missing_value_generic, + "astype": int, + }, + }, + }, + ], + }, +) + +# Race & Sex Counts by Phase +raceSexByPhaseTransformConfig: Tuple[str, Dict[str, Any]] = ( + "simpleTransform", + { + "key": "race-sex-by-phase", + "strict": True, + "transforms": [ + { + "name": "Race & Sex by Phase", + "vtype": "DoubleCategorical", + "methods": [ + { + "groups": ["export_group", "race_db", "scrsex"], + "value": "record_id", + "func": "count", + } + ], + "accessors": { + "filterby": { + "name": "Phase", + "field": "export_group", + "missing_value": missing_value_generic, + }, + "group": { + "name": "Race", + "field": "race_db", + "missing_value": missing_value_generic, + "astype": str, + }, + "subgroup": { + "name": "Sex", + "field": "scrsex", "missing_value": missing_value_generic, "astype": str, }, @@ -1999,7 +2264,7 @@ "vtype": "DoubleCategorical", "methods": [ { - "groups": ["phenotypes", "race", "scrsex"], + "groups": ["phenotypes", "race_db", "scrsex"], "value": "record_id", "func": "count", } @@ -2012,7 +2277,7 @@ }, "group": { "name": "Race", - "field": "race", + "field": "race_db", "missing_value": missing_value_generic, "astype": str, }, @@ -2046,7 +2311,7 @@ "vtype": "DoubleCategorical", "methods": [ { - "groups": ["phenotypes", "race", "scrsex"], + "groups": ["phenotypes", "race_db", "scrsex"], "value": "record_id", "func": "count", } @@ -2059,7 +2324,54 @@ }, "group": { "name": "Race", - "field": "race", + "field": "race_db", + "missing_value": missing_value_generic, + "astype": str, + }, + "subgroup": { + "name": "Phenotype", + "field": "phenotypes", + "missing_value": missing_value_generic, + "astype": str, + }, + "value": { + "name": "Count (N)", + "field": "record_id", + "missing_value": missing_value_generic, + "astype": int, + }, + }, + }, + ], + }, +) + +# Race & Phenotype Counts by Phase +racePhenotypeByPhaseTransformConfig: Tuple[str, Dict[str, Any]] = ( + "simpleTransform", + { + "key": "race-phenotype-by-phase", + "strict": True, + "transforms": [ + { + "name": "Race & Phenotype by Phase", + "vtype": "DoubleCategorical", + "methods": [ + { + "groups": ["phenotypes", "race_db", "export_group"], + "value": "record_id", + "func": "count", + } + ], + "accessors": { + "filterby": { + "name": "Phase", + "field": "export_group", + "missing_value": missing_value_generic, + }, + "group": { + "name": "Race", + "field": "race_db", "missing_value": missing_value_generic, "astype": str, }, @@ -4345,17 +4657,23 @@ "phenotype-sex-by-site": phenotypeSexBySiteTransformConfig, "phenotype-site-by-sex": phenotypeSiteBySexTransformConfig, "phenotype-race-by-sex": phenotypeRaceBySexTransformConfig, + "phenotype-race-by-phase": phenotypeRaceByPhaseTransformConfig, "phenotype-sex-by-race": phenotypeSexByRaceTransformConfig, "race-phenotype-by-sex": racePhenotypeBySexTransformConfig, + "race-phenotype-by-phase": racePhenotypeByPhaseTransformConfig, + "race-sex-by-phase": raceSexByPhaseTransformConfig, "race-sex-by-phenotype": raceSexByPhenotypeTransformConfig, "sex-phenotype-by-race": sexPhenotypeByRaceTransformConfig, "sex-race-by-phenotype": sexRaceByPhenotypeTransformConfig, "phenotype-recruitment": phenotypeRecruitmentTransformConfig, "phenotype-recruitment-by-site": phenotypeRecruitmentBySiteTransformConfig, + "phenotype-recruitment-by-phase": phenotypeRecruitmentByPhaseTransformConfig, "race-recruitment": raceRecruitmentTransformConfig, "race-recruitment-by-site": raceRecruitmentBySiteTransformConfig, + "race-recruitment-by-phase": raceRecruitmentByPhaseTransformConfig, "sex-recruitment": sexRecruitmentTransformConfig, "sex-recruitment-by-site": sexRecruitmentBySiteTransformConfig, + "sex-recruitment-by-phase": sexRecruitmentByPhaseTransformConfig, "race-sex-by-site": raceSexBySiteTransformConfig, "current-medications-by-site": currentMedicationsBySiteTransformConfig, } diff --git a/modules/etl/transforms/module_transform.py b/modules/etl/transforms/module_transform.py index b568aa4b..9da71265 100644 --- a/modules/etl/transforms/module_transform.py +++ b/modules/etl/transforms/module_transform.py @@ -67,7 +67,7 @@ def __init__( # Normalize Transforms to List Type, Check Validity, and Warn on Missing Attributes for indexed_transform in enumerate(self.transforms): - self.valid = True if self._transformIsValid(indexed_transform) else False + self.valid: bool = self._transformIsValid(indexed_transform) if self.strict and not self.valid: raise ValueError( f"{self.key}:Missing properties in transforms argument, see log at {self.logging_config['filename']} for details" diff --git a/modules/etl/transforms/redcap_live_transform.py b/modules/etl/transforms/redcap_live_transform.py index 47afdc24..da7c1635 100644 --- a/modules/etl/transforms/redcap_live_transform.py +++ b/modules/etl/transforms/redcap_live_transform.py @@ -105,12 +105,6 @@ def __init__(self, config: dict) -> None: self.missing_value_generic, ] self.none_map = {key: self.missing_value_generic for key in self.none_values} - self.survey_instrument_map = { - "2": "Complete", - "1": "Unverified", - "0": "Incomplete", - "": self.missing_value_generic, - } self.logger.info(f"Initialized") @@ -235,7 +229,8 @@ def _merge_reports( ] if len(merge_steps) > 0: - for providing_report_key, merge_kwdargs in merge_steps: + for merge_step in merge_steps: + providing_report_key, merge_kwdargs = merge_step df_providing_report = self.reports[providing_report_key]["transformed"] df_receiving_report = df_receiving_report.merge( df_providing_report, **merge_kwdargs @@ -432,13 +427,14 @@ def _remap_values_by_columns( for subvalue in str(value).split(",") if len(subvalue) > 0 ] - df.loc[i, column] = self.multivalue_separator.join( + remapped_value = self.multivalue_separator.join( [ value_map[subvalue] for subvalue in subvalues if subvalue in value_map.keys() ] ) + df.loc[i, column] = remapped_value return df @@ -863,10 +859,9 @@ def export_transformed( # Export Merged Transforms def export_merged_transformed( - self, path: str = "", separator: str = "\t", filetype: str = ".tsv" + self, filepath: str = "transformed-merged_redcap-extract.tsv", separator: str = "\t" ) -> object: - filename = f"transformed-merged_redcap-extract{filetype}" - filepath = os.path.join(self.cwd, path, filename) + filepath = os.path.join(self.cwd, filepath) self.merged.to_csv( filepath, sep=separator, diff --git a/modules/etl/transforms/redcap_release_transform.py b/modules/etl/transforms/redcap_release_transform.py index 552e549d..9cabd1cb 100644 --- a/modules/etl/transforms/redcap_release_transform.py +++ b/modules/etl/transforms/redcap_release_transform.py @@ -106,12 +106,6 @@ def __init__(self, config: dict) -> None: self.missing_value_generic, ] self.none_map = {key: self.missing_value_generic for key in self.none_values} - self.survey_instrument_map = { - "2": "Complete", - "1": "Unverified", - "0": "Incomplete", - "": self.missing_value_generic, - } self.logger.info(f"Initialized") @@ -268,7 +262,8 @@ def _merge_reports( ] if len(merge_steps) > 0: - for providing_report_key, merge_kwdargs in merge_steps: + for merge_step in merge_steps: + providing_report_key, merge_kwdargs = merge_step df_providing_report = self.reports[providing_report_key]["transformed"] df_receiving_report = df_receiving_report.merge( df_providing_report, **merge_kwdargs @@ -465,13 +460,14 @@ def _remap_values_by_columns( for subvalue in str(value).split(",") if len(subvalue) > 0 ] - df.loc[i, column] = self.multivalue_separator.join( + remapped_value = self.multivalue_separator.join( [ value_map[subvalue] for subvalue in subvalues if subvalue in value_map.keys() ] ) + df.loc[i, column] = remapped_value return df @@ -897,10 +893,9 @@ def export_transformed( # Export Merged Transforms def export_merged_transformed( - self, path: str = "", separator: str = "\t", filetype: str = ".tsv" + self, filepath: str = "transformed-merged_redcap-extract.tsv", separator: str = "\t" ) -> object: - filename = f"transformed-merged_redcap-extract{filetype}" - filepath = os.path.join(self.cwd, path, filename) + filepath = os.path.join(self.cwd, filepath) self.merged.to_csv( filepath, sep=separator, diff --git a/modules/etl/vtypes/vtype.py b/modules/etl/vtypes/vtype.py index b565f829..7e1bb6b8 100644 --- a/modules/etl/vtypes/vtype.py +++ b/modules/etl/vtypes/vtype.py @@ -55,24 +55,6 @@ def __init__( def __str__(self): return f"{self.__dict__}" - # def isvalid( - # self, df: pd.DataFrame, accessorsList: List[Dict[str, Dict[str, str]]] - # ) -> bool: - # """ - # Extends the VType.isvalid method to operate on a list - # of pd.DataFrames and accessors. - # """ - # valid = True - # for accessors in accessorsList: - # if not super(Compound, self).isvalid(df, accessors): - # self.validation_errors.append( - # f"VType {self.name.title()} has invalid accessors. See additional details above." - # ) - # valid = False - # else: - # continue - # return valid - def isvalid( self, df: pd.DataFrame, accessorsList: List[Dict[str, Dict[str, str]]] ) -> bool: