From 56babf4e93c53a2d1e1f073c3a3e5d812bd8cf46 Mon Sep 17 00:00:00 2001 From: Vladimir Milosevic <157983820+vmilosevic@users.noreply.github.com> Date: Wed, 27 Nov 2024 14:21:35 +0100 Subject: [PATCH] Workaround for unique constraint (#3) --- .github/actions/collect_data/src/cicd.py | 13 +++++++++++ .../collect_data/src/unittest_parser.py | 8 ++++++- .../collect_data/test/test_generate_data.py | 23 +++++++++++++++++++ 3 files changed, 43 insertions(+), 1 deletion(-) diff --git a/.github/actions/collect_data/src/cicd.py b/.github/actions/collect_data/src/cicd.py index 9f503cb..b98c65b 100644 --- a/.github/actions/collect_data/src/cicd.py +++ b/.github/actions/collect_data/src/cicd.py @@ -3,11 +3,14 @@ # SPDX-License-Identifier: Apache-2.0 import json from loguru import logger +from datetime import datetime, timedelta +import random from utils import ( get_pipeline_row_from_github_info, get_job_rows_from_github_info, get_data_pipeline_datetime_from_datetime, + get_datetime_from_github_datetime, ) from workflows import ( get_github_job_id_to_test_reports, @@ -54,6 +57,16 @@ def create_cicd_json_for_data_analysis( logger.info(f"Found {len(tests_in_report)} tests in report {test_report_path}") tests.extend(tests_in_report) logger.info(f"Found {len(tests)} tests total for job {github_job_id}") + raw_job["job_start_ts"] = alter_time(raw_job["job_start_ts"]) jobs.append(pydantic_models.Job(**raw_job, tests=tests)) return pydantic_models.Pipeline(**raw_pipeline, jobs=jobs) + + +def alter_time(timestamp): + # Workarpound for the fact that we don't have milliseconds in the timestamp + # Add a random number of milliseconds to the timestamp to make it unique + original_timestamp = get_datetime_from_github_datetime(timestamp) + altered_time = original_timestamp + timedelta(milliseconds=random.randint(0, 999)) + altered_time_str = altered_time.isoformat(sep=" ", timespec="milliseconds") + return altered_time_str diff --git a/.github/actions/collect_data/src/unittest_parser.py b/.github/actions/collect_data/src/unittest_parser.py index d8d265b..db35530 100644 --- a/.github/actions/collect_data/src/unittest_parser.py +++ b/.github/actions/collect_data/src/unittest_parser.py @@ -11,6 +11,7 @@ def get_tests(test_report_path): with open(test_report_path) as f: data = f.read() dict_data = xmltodict.parse(data) + previous_test_end_ts = None for testsuite in dict_data["testsuites"]["testsuite"]: # testcases can be dict or list @@ -22,7 +23,6 @@ def get_tests(test_report_path): message = None test_start_ts = testcase["@timestamp"] duration = testcase["@time"] - test_end_ts = add_time(test_start_ts, duration) skipped = testcase.get("skipped", False) error = testcase.get("error", False) failure = testcase.get("failure", False) @@ -37,6 +37,11 @@ def get_tests(test_report_path): message += "\n" + testcase["failure"]["@message"] message += "\n" + testcase["failure"]["#text"] + # Workaround: Data team requres unique test_start_ts + if previous_test_end_ts: + test_start_ts = max(test_start_ts, previous_test_end_ts) + test_end_ts = add_time(test_start_ts, duration) + test = Test( test_start_ts=test_start_ts, test_end_ts=test_end_ts, @@ -53,6 +58,7 @@ def get_tests(test_report_path): tags=None, ) tests.append(test) + previous_test_end_ts = test_end_ts return tests diff --git a/.github/actions/collect_data/test/test_generate_data.py b/.github/actions/collect_data/test/test_generate_data.py index f794277..bb6f392 100644 --- a/.github/actions/collect_data/test/test_generate_data.py +++ b/.github/actions/collect_data/test/test_generate_data.py @@ -27,3 +27,26 @@ def test_create_pipeline_json(run_id): with open(filename, "r") as file: data = json.load(file) assert data["jobs"][0]["card_type"] in ["N300", "N150", "E150"] + + # validate constrains + assert check_constraint(pipeline) + + +def check_constraint(pipeline): + # check if the pipeline has the correct constraints + # unique cicd_job_id, full_test_name, test_start_ts + unique_tests = set() + for job in pipeline.jobs: + for test in job.tests: + key = (job.github_job_id, test.full_test_name, test.test_start_ts) + if key in unique_tests: + raise ValueError("Job already exists: ", key) + unique_tests.add(key) + # unique cicd_pipeline_id, name, job_submission_ts, job_start_ts, job_end_ts + unique_jobs = set() + for job in pipeline.jobs: + key = (pipeline.github_pipeline_id, job.name, job.job_submission_ts, job.job_start_ts, job.job_end_ts) + if key in unique_jobs: + raise ValueError("Job already exists: ", key) + unique_jobs.add(key) + return True