From 37f512cabc058fca8e0ea7732a986197570620cf Mon Sep 17 00:00:00 2001 From: joseph-sentry <136376984+joseph-sentry@users.noreply.github.com> Date: Mon, 9 Sep 2024 09:40:44 -0400 Subject: [PATCH] fix: deduplicate component measurements (#693) --- services/tests/test_timeseries.py | 6 ++++ services/timeseries.py | 53 ++++++++++++++++++++++++------- 2 files changed, 48 insertions(+), 11 deletions(-) diff --git a/services/tests/test_timeseries.py b/services/tests/test_timeseries.py index 4d806ba2c..30b8b7451 100644 --- a/services/tests/test_timeseries.py +++ b/services/tests/test_timeseries.py @@ -422,6 +422,12 @@ def test_commit_measurement_insert_components( "flag_regexes": [], "paths": [r"folder/*"], }, + { # testing duplicate component on purpose this was causing crashes + "component_id": "all_settings", + "name": "all settings", + "flag_regexes": [], + "paths": [r"folder/*"], + }, { "component_id": "path_not_found", "name": "no expected covarage", diff --git a/services/timeseries.py b/services/timeseries.py index bae4b95f3..b7af8a963 100644 --- a/services/timeseries.py +++ b/services/timeseries.py @@ -131,7 +131,7 @@ def save_commit_measurements( if MeasurementName.component_coverage.value in dataset_names: components = current_yaml.get_components() if components: - measurements = [] + measurements = dict() for component in components: if component.paths or component.flag_regexes: @@ -143,19 +143,50 @@ def save_commit_measurements( ) if filtered_report.totals.coverage is not None: - measurements.append( - dict( - name=MeasurementName.component_coverage.value, - owner_id=commit.repository.ownerid, - repo_id=commit.repoid, - branch=commit.branch, - commit_sha=commit.commitid, - timestamp=commit.timestamp, - measurable_id=f"{component.component_id}", - value=float(filtered_report.totals.coverage), + measurement_key = ( + MeasurementName.component_coverage.value, + commit.repository.ownerid, + commit.repoid, + f"{component.component_id}", + commit.commitid, + commit.timestamp, + ) + if ( + existing_measurement := measurements.get(measurement_key) + ) is not None: + log.warning( + "Duplicate measurement keys being added to measurements", + extra=dict( + repoid=commit.repoid, + commit_id=commit.id_, + commitid=commit.commitid, + measurement_key=measurement_key, + existing_value=existing_measurement.get("value"), + new_value=float(filtered_report.totals.coverage), + ), + ) + + measurements[ + ( + MeasurementName.component_coverage.value, + commit.repository.ownerid, + commit.repoid, + f"{component.component_id}", + commit.commitid, + commit.timestamp, ) + ] = dict( + name=MeasurementName.component_coverage.value, + owner_id=commit.repository.ownerid, + repo_id=commit.repoid, + branch=commit.branch, + commit_sha=commit.commitid, + timestamp=commit.timestamp, + measurable_id=f"{component.component_id}", + value=float(filtered_report.totals.coverage), ) + measurements = list(measurements.values()) if len(measurements) > 0: log.info( "Upserting component coverage measurements",