-
Notifications
You must be signed in to change notification settings - Fork 10
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: add backfill one off scripts #734
Merged
Merged
Changes from all commits
Commits
Show all changes
7 commits
Select commit
Hold shift + click to select a range
9898109
feat: add backfill one off scripts
joseph-sentry 4730f1e
fix: address feedback and add a test for backfill daily test rollups
joseph-sentry 8f02930
test: write test for backfill_test_flag_bridges
joseph-sentry b584a44
fix: use first instead of an index access
joseph-sentry 66a56d1
update shared
joseph-sentry 4ca001c
fix: address feedback
joseph-sentry e00f450
test: cover case where test doesn't have any instances in backfill te…
joseph-sentry File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
import os | ||
|
||
import django | ||
|
||
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "django_scaffold.settings") | ||
django.setup() | ||
|
||
if __name__ == "__main__": | ||
from one_off_scripts.backfill_daily_test_rollups import run_impl | ||
from one_off_scripts.backfill_test_flag_bridges import backfill_test_flag_bridges | ||
|
||
run_impl() | ||
backfill_test_flag_bridges() | ||
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
import os | ||
|
||
import django | ||
|
||
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "django_scaffold.settings") | ||
django.setup() | ||
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,201 @@ | ||
import logging | ||
from collections import defaultdict | ||
from dataclasses import dataclass, field | ||
from datetime import date, datetime, timedelta | ||
|
||
from django.db import transaction as django_transaction | ||
from shared.django_apps.core.models import Repository | ||
from shared.django_apps.reports.models import DailyTestRollup, Flake, TestInstance | ||
from test_results_parser import Outcome | ||
|
||
logging.basicConfig(level=logging.INFO) | ||
log = logging.getLogger() | ||
|
||
|
||
@dataclass | ||
class RollupObj: | ||
pass_count: int | ||
fail_count: int | ||
skip_count: int | ||
flaky_fail_count: int | ||
|
||
sum_duration_seconds: float | ||
last_duration_seconds: float | ||
|
||
latest_run: datetime | ||
|
||
commits_where_fail: set[str] = field(default_factory=set) | ||
|
||
|
||
def get_test_analytics_repos(start_repoid): | ||
# get all repos that have test_analytics_enabled == True | ||
test_analytics_repos = Repository.objects.filter( | ||
test_analytics_enabled=True | ||
).order_by("repoid") | ||
|
||
if start_repoid is not None: | ||
test_analytics_repos = test_analytics_repos.filter(repoid__gte=start_repoid) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. smart |
||
|
||
return test_analytics_repos | ||
|
||
|
||
def process_instance( | ||
rollup_dict: dict[tuple[str, str], RollupObj], | ||
flake_dict: dict[str, list[tuple[datetime, datetime | None]]], | ||
instance: TestInstance, | ||
): | ||
pass_count = 0 | ||
fail_count = 0 | ||
skip_count = 0 | ||
flaky_fail_count = 0 | ||
duration_seconds = instance.duration_seconds | ||
created_at = instance.created_at | ||
commitid = instance.commitid | ||
|
||
match instance.outcome: | ||
case Outcome.Pass: | ||
pass_count = 1 | ||
case Outcome.Skip: | ||
skip_count = 1 | ||
case _: | ||
fail_count = 1 | ||
if (flaky_range_list := flake_dict.get(instance.test_id)) is not None: | ||
for range in flaky_range_list: | ||
if range[0] <= instance.created_at and ( | ||
range[1] is None or instance.created_at < range[1] | ||
): | ||
flaky_fail_count += 1 | ||
break | ||
|
||
if (entry := rollup_dict.get((instance.test_id, instance.branch))) is not None: | ||
entry.pass_count += pass_count | ||
entry.fail_count += fail_count | ||
entry.skip_count += skip_count | ||
entry.flaky_fail_count += flaky_fail_count | ||
entry.sum_duration_seconds += duration_seconds | ||
entry.last_duration_seconds = duration_seconds | ||
entry.latest_run = created_at | ||
if commitid: | ||
entry.commits_where_fail.add(commitid) | ||
else: | ||
rollup_dict[(instance.test_id, instance.branch)] = RollupObj( | ||
pass_count, | ||
fail_count, | ||
skip_count, | ||
flaky_fail_count, | ||
duration_seconds, | ||
duration_seconds, | ||
created_at, | ||
set(), | ||
) | ||
if commitid: | ||
rollup_dict[(instance.test_id, instance.branch)].commits_where_fail.add( | ||
commitid | ||
) | ||
|
||
|
||
def save_rollups(rollup_dict, repoid, date): | ||
rollups_to_create = [] | ||
for obj_key, obj in rollup_dict.items(): | ||
rollup = DailyTestRollup( | ||
repoid=repoid, | ||
date=date, | ||
test_id=obj_key[0], | ||
branch=obj_key[1], | ||
pass_count=obj.pass_count, | ||
fail_count=obj.fail_count, | ||
skip_count=obj.skip_count, | ||
flaky_fail_count=obj.flaky_fail_count, | ||
commits_where_fail=list(obj.commits_where_fail), | ||
latest_run=obj.latest_run, | ||
last_duration_seconds=obj.last_duration_seconds, | ||
avg_duration_seconds=obj.sum_duration_seconds | ||
/ (obj.pass_count + obj.fail_count), | ||
) | ||
|
||
rollups_to_create.append(rollup) | ||
|
||
DailyTestRollup.objects.bulk_create(rollups_to_create, 1000) | ||
|
||
|
||
def backfill_test_rollups( | ||
start_repoid: int | None = None, | ||
start_date: str | None = None, # default is 2024-07-16 | ||
end_date: str | None = None, # default is 2024-09-17 | ||
) -> dict[str, bool]: | ||
log.info( | ||
"Updating test instances", | ||
extra=dict(start_repoid=start_repoid, start_date=start_date, end_date=end_date), | ||
) | ||
test_analytics_repos = get_test_analytics_repos(start_repoid) | ||
Swatinem marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
chunk_size = 10000 | ||
|
||
log.info( | ||
"Starting backfill for repos", | ||
extra=dict(repos=[repo.repoid for repo in test_analytics_repos]), | ||
) | ||
|
||
for repo in test_analytics_repos: | ||
repoid = repo.repoid | ||
log.info("Starting backfill for repo", extra=dict(repoid=repoid)) | ||
|
||
curr_date = date.fromisoformat(start_date) if start_date else date(2024, 7, 16) | ||
until_date = date.fromisoformat(end_date) if end_date else date(2024, 9, 17) | ||
|
||
# delete all existing rollups for this day | ||
DailyTestRollup.objects.filter( | ||
repoid=repoid, date__gte=curr_date, date__lte=until_date | ||
).delete() | ||
django_transaction.commit() | ||
log.info("Deleted rollups for repo", extra=dict(repoid=repoid)) | ||
|
||
# get flakes | ||
flake_list = list(Flake.objects.filter(repository_id=repoid)) | ||
|
||
flake_dict: dict[str, list[tuple[datetime, datetime | None]]] = defaultdict( | ||
list | ||
) | ||
for flake in flake_list: | ||
flake_dict[flake.test_id].append((flake.start_date, flake.end_date)) | ||
|
||
while curr_date <= until_date: | ||
log.info( | ||
"Starting backfill for repo on date", | ||
extra=dict(repoid=repoid, date=curr_date), | ||
) | ||
|
||
rollup_dict: dict[tuple[str, str], RollupObj] = {} | ||
|
||
test_instances = TestInstance.objects.filter( | ||
repoid=repoid, created_at__date=curr_date | ||
).order_by("created_at") | ||
|
||
num_test_instances = test_instances.count() | ||
if num_test_instances == 0: | ||
curr_date += timedelta(days=1) | ||
continue | ||
|
||
chunks = [ | ||
test_instances[i : i + chunk_size] | ||
for i in range(0, num_test_instances, chunk_size) | ||
] | ||
|
||
for chunk in chunks: | ||
for instance in chunk: | ||
if instance.branch is None or instance.commitid is None: | ||
continue | ||
|
||
process_instance(rollup_dict, flake_dict, instance) | ||
|
||
save_rollups(rollup_dict, repoid, curr_date) | ||
django_transaction.commit() | ||
log.info( | ||
"Committed repo for day", | ||
extra=dict(repoid=repoid, date=curr_date), | ||
) | ||
curr_date += timedelta(days=1) | ||
|
||
log.info("Finished backfill for repo", extra=dict(repoid=repoid)) | ||
|
||
return {"successful": True} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
import logging | ||
|
||
from django.db import transaction as django_transaction | ||
from shared.django_apps.core.models import Repository | ||
from shared.django_apps.reports.models import ( | ||
RepositoryFlag, | ||
Test, | ||
TestFlagBridge, | ||
TestInstance, | ||
) | ||
|
||
logging.basicConfig(level=logging.INFO) | ||
log = logging.getLogger() | ||
|
||
|
||
def backfill_test_flag_bridges(repoid=None): | ||
log.info("Backfilling TestFlagBridge objects", extra=dict(repoid=repoid)) | ||
repos = Repository.objects.filter(test_analytics_enabled=True) | ||
if repoid is not None: | ||
repos = repos.filter(repoid=repoid) | ||
|
||
for repo in repos: | ||
tests = Test.objects.filter(repository_id=repo.repoid) | ||
|
||
flags = { | ||
flag.flag_name: flag | ||
for flag in RepositoryFlag.objects.filter(repository=repo) | ||
} | ||
|
||
bridges_to_create = [] | ||
for test in tests: | ||
TestFlagBridge.objects.filter(test=test).delete() | ||
|
||
first_test_instance = ( | ||
TestInstance.objects.filter(test_id=test.id) | ||
.select_related("upload") | ||
.first() | ||
) | ||
|
||
if first_test_instance is None: | ||
continue | ||
|
||
flag_names = first_test_instance.upload.flag_names | ||
|
||
for flag_name in flag_names: | ||
new_bridge = TestFlagBridge(test=test, flag=flags[flag_name]) | ||
bridges_to_create.append(new_bridge) | ||
|
||
TestFlagBridge.objects.bulk_create(bridges_to_create, 1000) | ||
log.info( | ||
"Done creating flag bridges for repo", | ||
extra=dict(repoid=repoid, num_tests=len(tests)), | ||
) | ||
django_transaction.commit() |
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
any reason this is duplicated from the
main
script? does this actually run twice when you import one of the child scripts?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These scripts are meant to be run separately from the actual application running on a pod so we'd invoke it directly using
python one_off_script.py
and that means we have to setup django separately