Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Workaround for unique constraint #3

Merged
merged 1 commit into from
Nov 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions .github/actions/collect_data/src/cicd.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,14 @@
# SPDX-License-Identifier: Apache-2.0
import json
from loguru import logger
from datetime import datetime, timedelta
import random

from utils import (
get_pipeline_row_from_github_info,
get_job_rows_from_github_info,
get_data_pipeline_datetime_from_datetime,
get_datetime_from_github_datetime,
)
from workflows import (
get_github_job_id_to_test_reports,
Expand Down Expand Up @@ -54,6 +57,16 @@ def create_cicd_json_for_data_analysis(
logger.info(f"Found {len(tests_in_report)} tests in report {test_report_path}")
tests.extend(tests_in_report)
logger.info(f"Found {len(tests)} tests total for job {github_job_id}")
raw_job["job_start_ts"] = alter_time(raw_job["job_start_ts"])
jobs.append(pydantic_models.Job(**raw_job, tests=tests))

return pydantic_models.Pipeline(**raw_pipeline, jobs=jobs)


def alter_time(timestamp):
# Workarpound for the fact that we don't have milliseconds in the timestamp
# Add a random number of milliseconds to the timestamp to make it unique
original_timestamp = get_datetime_from_github_datetime(timestamp)
altered_time = original_timestamp + timedelta(milliseconds=random.randint(0, 999))
altered_time_str = altered_time.isoformat(sep=" ", timespec="milliseconds")
return altered_time_str
8 changes: 7 additions & 1 deletion .github/actions/collect_data/src/unittest_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ def get_tests(test_report_path):
with open(test_report_path) as f:
data = f.read()
dict_data = xmltodict.parse(data)
previous_test_end_ts = None
for testsuite in dict_data["testsuites"]["testsuite"]:

# testcases can be dict or list
Expand All @@ -22,7 +23,6 @@ def get_tests(test_report_path):
message = None
test_start_ts = testcase["@timestamp"]
duration = testcase["@time"]
test_end_ts = add_time(test_start_ts, duration)
skipped = testcase.get("skipped", False)
error = testcase.get("error", False)
failure = testcase.get("failure", False)
Expand All @@ -37,6 +37,11 @@ def get_tests(test_report_path):
message += "\n" + testcase["failure"]["@message"]
message += "\n" + testcase["failure"]["#text"]

# Workaround: Data team requres unique test_start_ts
if previous_test_end_ts:
test_start_ts = max(test_start_ts, previous_test_end_ts)
test_end_ts = add_time(test_start_ts, duration)

test = Test(
test_start_ts=test_start_ts,
test_end_ts=test_end_ts,
Expand All @@ -53,6 +58,7 @@ def get_tests(test_report_path):
tags=None,
)
tests.append(test)
previous_test_end_ts = test_end_ts
return tests


Expand Down
23 changes: 23 additions & 0 deletions .github/actions/collect_data/test/test_generate_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,26 @@ def test_create_pipeline_json(run_id):
with open(filename, "r") as file:
data = json.load(file)
assert data["jobs"][0]["card_type"] in ["N300", "N150", "E150"]

# validate constrains
assert check_constraint(pipeline)


def check_constraint(pipeline):
# check if the pipeline has the correct constraints
# unique cicd_job_id, full_test_name, test_start_ts
unique_tests = set()
for job in pipeline.jobs:
for test in job.tests:
key = (job.github_job_id, test.full_test_name, test.test_start_ts)
if key in unique_tests:
raise ValueError("Job already exists: ", key)
unique_tests.add(key)
# unique cicd_pipeline_id, name, job_submission_ts, job_start_ts, job_end_ts
unique_jobs = set()
for job in pipeline.jobs:
key = (pipeline.github_pipeline_id, job.name, job.job_submission_ts, job.job_start_ts, job.job_end_ts)
if key in unique_jobs:
raise ValueError("Job already exists: ", key)
unique_jobs.add(key)
return True
Loading