From 56babf4e93c53a2d1e1f073c3a3e5d812bd8cf46 Mon Sep 17 00:00:00 2001
From: Vladimir Milosevic <157983820+vmilosevic@users.noreply.github.com>
Date: Wed, 27 Nov 2024 14:21:35 +0100
Subject: [PATCH] Workaround for unique constraint (#3)

---
 .github/actions/collect_data/src/cicd.py      | 13 +++++++++++
 .../collect_data/src/unittest_parser.py       |  8 ++++++-
 .../collect_data/test/test_generate_data.py   | 23 +++++++++++++++++++
 3 files changed, 43 insertions(+), 1 deletion(-)

diff --git a/.github/actions/collect_data/src/cicd.py b/.github/actions/collect_data/src/cicd.py
index 9f503cb..b98c65b 100644
--- a/.github/actions/collect_data/src/cicd.py
+++ b/.github/actions/collect_data/src/cicd.py
@@ -3,11 +3,14 @@
 # SPDX-License-Identifier: Apache-2.0
 import json
 from loguru import logger
+from datetime import datetime, timedelta
+import random
 
 from utils import (
     get_pipeline_row_from_github_info,
     get_job_rows_from_github_info,
     get_data_pipeline_datetime_from_datetime,
+    get_datetime_from_github_datetime,
 )
 from workflows import (
     get_github_job_id_to_test_reports,
@@ -54,6 +57,16 @@ def create_cicd_json_for_data_analysis(
                 logger.info(f"Found {len(tests_in_report)} tests in report {test_report_path}")
                 tests.extend(tests_in_report)
             logger.info(f"Found {len(tests)} tests total for job {github_job_id}")
+        raw_job["job_start_ts"] = alter_time(raw_job["job_start_ts"])
         jobs.append(pydantic_models.Job(**raw_job, tests=tests))
 
     return pydantic_models.Pipeline(**raw_pipeline, jobs=jobs)
+
+
+def alter_time(timestamp):
+    # Workarpound for the fact that we don't have milliseconds in the timestamp
+    # Add a random number of milliseconds to the timestamp to make it unique
+    original_timestamp = get_datetime_from_github_datetime(timestamp)
+    altered_time = original_timestamp + timedelta(milliseconds=random.randint(0, 999))
+    altered_time_str = altered_time.isoformat(sep=" ", timespec="milliseconds")
+    return altered_time_str
diff --git a/.github/actions/collect_data/src/unittest_parser.py b/.github/actions/collect_data/src/unittest_parser.py
index d8d265b..db35530 100644
--- a/.github/actions/collect_data/src/unittest_parser.py
+++ b/.github/actions/collect_data/src/unittest_parser.py
@@ -11,6 +11,7 @@ def get_tests(test_report_path):
     with open(test_report_path) as f:
         data = f.read()
         dict_data = xmltodict.parse(data)
+        previous_test_end_ts = None
         for testsuite in dict_data["testsuites"]["testsuite"]:
 
             # testcases can be dict or list
@@ -22,7 +23,6 @@ def get_tests(test_report_path):
                 message = None
                 test_start_ts = testcase["@timestamp"]
                 duration = testcase["@time"]
-                test_end_ts = add_time(test_start_ts, duration)
                 skipped = testcase.get("skipped", False)
                 error = testcase.get("error", False)
                 failure = testcase.get("failure", False)
@@ -37,6 +37,11 @@ def get_tests(test_report_path):
                     message += "\n" + testcase["failure"]["@message"]
                     message += "\n" + testcase["failure"]["#text"]
 
+                # Workaround: Data team requres unique test_start_ts
+                if previous_test_end_ts:
+                    test_start_ts = max(test_start_ts, previous_test_end_ts)
+                test_end_ts = add_time(test_start_ts, duration)
+
                 test = Test(
                     test_start_ts=test_start_ts,
                     test_end_ts=test_end_ts,
@@ -53,6 +58,7 @@ def get_tests(test_report_path):
                     tags=None,
                 )
                 tests.append(test)
+                previous_test_end_ts = test_end_ts
     return tests
 
 
diff --git a/.github/actions/collect_data/test/test_generate_data.py b/.github/actions/collect_data/test/test_generate_data.py
index f794277..bb6f392 100644
--- a/.github/actions/collect_data/test/test_generate_data.py
+++ b/.github/actions/collect_data/test/test_generate_data.py
@@ -27,3 +27,26 @@ def test_create_pipeline_json(run_id):
     with open(filename, "r") as file:
         data = json.load(file)
         assert data["jobs"][0]["card_type"] in ["N300", "N150", "E150"]
+
+    # validate constrains
+    assert check_constraint(pipeline)
+
+
+def check_constraint(pipeline):
+    # check if the pipeline has the correct constraints
+    # unique cicd_job_id, full_test_name, test_start_ts
+    unique_tests = set()
+    for job in pipeline.jobs:
+        for test in job.tests:
+            key = (job.github_job_id, test.full_test_name, test.test_start_ts)
+            if key in unique_tests:
+                raise ValueError("Job already exists: ", key)
+            unique_tests.add(key)
+    # unique cicd_pipeline_id, name, job_submission_ts, job_start_ts, job_end_ts
+    unique_jobs = set()
+    for job in pipeline.jobs:
+        key = (pipeline.github_pipeline_id, job.name, job.job_submission_ts, job.job_start_ts, job.job_end_ts)
+        if key in unique_jobs:
+            raise ValueError("Job already exists: ", key)
+        unique_jobs.add(key)
+    return True