Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add backfill one off scripts #734

Merged
merged 7 commits into from
Oct 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions one_off_script.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import os

Check warning on line 1 in one_off_script.py

View check run for this annotation

Codecov Notifications / codecov/patch

one_off_script.py#L1

Added line #L1 was not covered by tests

import django

Check warning on line 3 in one_off_script.py

View check run for this annotation

Codecov Notifications / codecov/patch

one_off_script.py#L3

Added line #L3 was not covered by tests

os.environ.setdefault("DJANGO_SETTINGS_MODULE", "django_scaffold.settings")
django.setup()

Check warning on line 6 in one_off_script.py

View check run for this annotation

Codecov Notifications / codecov/patch

one_off_script.py#L5-L6

Added lines #L5 - L6 were not covered by tests

if __name__ == "__main__":
from one_off_scripts.backfill_daily_test_rollups import run_impl
from one_off_scripts.backfill_test_flag_bridges import backfill_test_flag_bridges

Check warning on line 10 in one_off_script.py

View check run for this annotation

Codecov Notifications / codecov/patch

one_off_script.py#L8-L10

Added lines #L8 - L10 were not covered by tests

run_impl()
backfill_test_flag_bridges()

Check warning on line 13 in one_off_script.py

View check run for this annotation

Codecov Notifications / codecov/patch

one_off_script.py#L12-L13

Added lines #L12 - L13 were not covered by tests
6 changes: 6 additions & 0 deletions one_off_scripts/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
import os

import django

os.environ.setdefault("DJANGO_SETTINGS_MODULE", "django_scaffold.settings")
django.setup()
Comment on lines +5 to +6
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

any reason this is duplicated from the main script? does this actually run twice when you import one of the child scripts?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These scripts are meant to be run separately from the actual application running on a pod so we'd invoke it directly using python one_off_script.py and that means we have to setup django separately

201 changes: 201 additions & 0 deletions one_off_scripts/backfill_daily_test_rollups.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
import logging
from collections import defaultdict
from dataclasses import dataclass, field
from datetime import date, datetime, timedelta

from django.db import transaction as django_transaction
from shared.django_apps.core.models import Repository
from shared.django_apps.reports.models import DailyTestRollup, Flake, TestInstance
from test_results_parser import Outcome

logging.basicConfig(level=logging.INFO)
log = logging.getLogger()


@dataclass
class RollupObj:
pass_count: int
fail_count: int
skip_count: int
flaky_fail_count: int

sum_duration_seconds: float
last_duration_seconds: float

latest_run: datetime

commits_where_fail: set[str] = field(default_factory=set)


def get_test_analytics_repos(start_repoid):
# get all repos that have test_analytics_enabled == True
test_analytics_repos = Repository.objects.filter(
test_analytics_enabled=True
).order_by("repoid")

if start_repoid is not None:
test_analytics_repos = test_analytics_repos.filter(repoid__gte=start_repoid)

Check warning on line 37 in one_off_scripts/backfill_daily_test_rollups.py

View check run for this annotation

Codecov Notifications / codecov/patch

one_off_scripts/backfill_daily_test_rollups.py#L37

Added line #L37 was not covered by tests
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

smart


return test_analytics_repos


def process_instance(
rollup_dict: dict[tuple[str, str], RollupObj],
flake_dict: dict[str, list[tuple[datetime, datetime | None]]],
instance: TestInstance,
):
pass_count = 0
fail_count = 0
skip_count = 0
flaky_fail_count = 0
duration_seconds = instance.duration_seconds
created_at = instance.created_at
commitid = instance.commitid

match instance.outcome:
case Outcome.Pass:
pass_count = 1

Check warning on line 57 in one_off_scripts/backfill_daily_test_rollups.py

View check run for this annotation

Codecov Notifications / codecov/patch

one_off_scripts/backfill_daily_test_rollups.py#L57

Added line #L57 was not covered by tests
case Outcome.Skip:
skip_count = 1

Check warning on line 59 in one_off_scripts/backfill_daily_test_rollups.py

View check run for this annotation

Codecov Notifications / codecov/patch

one_off_scripts/backfill_daily_test_rollups.py#L59

Added line #L59 was not covered by tests
case _:
fail_count = 1
if (flaky_range_list := flake_dict.get(instance.test_id)) is not None:
for range in flaky_range_list:
if range[0] <= instance.created_at and (
range[1] is None or instance.created_at < range[1]
):
flaky_fail_count += 1
break

if (entry := rollup_dict.get((instance.test_id, instance.branch))) is not None:
entry.pass_count += pass_count
entry.fail_count += fail_count
entry.skip_count += skip_count
entry.flaky_fail_count += flaky_fail_count
entry.sum_duration_seconds += duration_seconds
entry.last_duration_seconds = duration_seconds
entry.latest_run = created_at
if commitid:
entry.commits_where_fail.add(commitid)

Check warning on line 79 in one_off_scripts/backfill_daily_test_rollups.py

View check run for this annotation

Codecov Notifications / codecov/patch

one_off_scripts/backfill_daily_test_rollups.py#L71-L79

Added lines #L71 - L79 were not covered by tests
else:
rollup_dict[(instance.test_id, instance.branch)] = RollupObj(
pass_count,
fail_count,
skip_count,
flaky_fail_count,
duration_seconds,
duration_seconds,
created_at,
set(),
)
if commitid:
rollup_dict[(instance.test_id, instance.branch)].commits_where_fail.add(
commitid
)


def save_rollups(rollup_dict, repoid, date):
rollups_to_create = []
for obj_key, obj in rollup_dict.items():
rollup = DailyTestRollup(
repoid=repoid,
date=date,
test_id=obj_key[0],
branch=obj_key[1],
pass_count=obj.pass_count,
fail_count=obj.fail_count,
skip_count=obj.skip_count,
flaky_fail_count=obj.flaky_fail_count,
commits_where_fail=list(obj.commits_where_fail),
latest_run=obj.latest_run,
last_duration_seconds=obj.last_duration_seconds,
avg_duration_seconds=obj.sum_duration_seconds
/ (obj.pass_count + obj.fail_count),
)

rollups_to_create.append(rollup)

DailyTestRollup.objects.bulk_create(rollups_to_create, 1000)


def backfill_test_rollups(
start_repoid: int | None = None,
start_date: str | None = None, # default is 2024-07-16
end_date: str | None = None, # default is 2024-09-17
) -> dict[str, bool]:
log.info(
"Updating test instances",
extra=dict(start_repoid=start_repoid, start_date=start_date, end_date=end_date),
)
test_analytics_repos = get_test_analytics_repos(start_repoid)
Swatinem marked this conversation as resolved.
Show resolved Hide resolved

chunk_size = 10000

log.info(
"Starting backfill for repos",
extra=dict(repos=[repo.repoid for repo in test_analytics_repos]),
)

for repo in test_analytics_repos:
repoid = repo.repoid
log.info("Starting backfill for repo", extra=dict(repoid=repoid))

curr_date = date.fromisoformat(start_date) if start_date else date(2024, 7, 16)
until_date = date.fromisoformat(end_date) if end_date else date(2024, 9, 17)

# delete all existing rollups for this day
DailyTestRollup.objects.filter(
repoid=repoid, date__gte=curr_date, date__lte=until_date
).delete()
django_transaction.commit()
log.info("Deleted rollups for repo", extra=dict(repoid=repoid))

# get flakes
flake_list = list(Flake.objects.filter(repository_id=repoid))

flake_dict: dict[str, list[tuple[datetime, datetime | None]]] = defaultdict(
list
)
for flake in flake_list:
flake_dict[flake.test_id].append((flake.start_date, flake.end_date))

while curr_date <= until_date:
log.info(
"Starting backfill for repo on date",
extra=dict(repoid=repoid, date=curr_date),
)

rollup_dict: dict[tuple[str, str], RollupObj] = {}

test_instances = TestInstance.objects.filter(
repoid=repoid, created_at__date=curr_date
).order_by("created_at")

num_test_instances = test_instances.count()
if num_test_instances == 0:
curr_date += timedelta(days=1)
continue

chunks = [
test_instances[i : i + chunk_size]
for i in range(0, num_test_instances, chunk_size)
]

for chunk in chunks:
for instance in chunk:
if instance.branch is None or instance.commitid is None:
continue

Check warning on line 187 in one_off_scripts/backfill_daily_test_rollups.py

View check run for this annotation

Codecov Notifications / codecov/patch

one_off_scripts/backfill_daily_test_rollups.py#L187

Added line #L187 was not covered by tests

process_instance(rollup_dict, flake_dict, instance)

save_rollups(rollup_dict, repoid, curr_date)
django_transaction.commit()
log.info(
"Committed repo for day",
extra=dict(repoid=repoid, date=curr_date),
)
curr_date += timedelta(days=1)

log.info("Finished backfill for repo", extra=dict(repoid=repoid))

return {"successful": True}
54 changes: 54 additions & 0 deletions one_off_scripts/backfill_test_flag_bridges.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import logging

from django.db import transaction as django_transaction
from shared.django_apps.core.models import Repository
from shared.django_apps.reports.models import (
RepositoryFlag,
Test,
TestFlagBridge,
TestInstance,
)

logging.basicConfig(level=logging.INFO)
log = logging.getLogger()


def backfill_test_flag_bridges(repoid=None):
log.info("Backfilling TestFlagBridge objects", extra=dict(repoid=repoid))
repos = Repository.objects.filter(test_analytics_enabled=True)
if repoid is not None:
repos = repos.filter(repoid=repoid)

Check warning on line 20 in one_off_scripts/backfill_test_flag_bridges.py

View check run for this annotation

Codecov Notifications / codecov/patch

one_off_scripts/backfill_test_flag_bridges.py#L20

Added line #L20 was not covered by tests

for repo in repos:
tests = Test.objects.filter(repository_id=repo.repoid)

flags = {
flag.flag_name: flag
for flag in RepositoryFlag.objects.filter(repository=repo)
}

bridges_to_create = []
for test in tests:
TestFlagBridge.objects.filter(test=test).delete()

first_test_instance = (
TestInstance.objects.filter(test_id=test.id)
.select_related("upload")
.first()
)

if first_test_instance is None:
continue

flag_names = first_test_instance.upload.flag_names

for flag_name in flag_names:
new_bridge = TestFlagBridge(test=test, flag=flags[flag_name])
bridges_to_create.append(new_bridge)

TestFlagBridge.objects.bulk_create(bridges_to_create, 1000)
log.info(
"Done creating flag bridges for repo",
extra=dict(repoid=repoid, num_tests=len(tests)),
)
django_transaction.commit()
Loading
Loading