Skip to content

Commit

Permalink
Merge branch 'main' into may_01_rle
Browse files Browse the repository at this point in the history
  • Loading branch information
JerrySentry authored May 3, 2024
2 parents f23fef3 + 3aec683 commit f0bf20b
Show file tree
Hide file tree
Showing 40 changed files with 2,456 additions and 751 deletions.
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
24.4.1
24.5.1
7 changes: 4 additions & 3 deletions celery_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,9 +106,10 @@ def init_celery_tracing(*args, **kwargs):
hourly_check_task_name = "app.cron.hourly_check.HourlyCheckTask"
daily_plan_manager_task_name = "app.cron.daily.PlanManagerTask"

backfill_gh_app_installations_name = (
"app.tasks.backfill_gh_app_installations.BackfillGHAppInstallationsTask"
)
# Backfill GH Apps
backfill_existing_gh_app_installations_name = "app.tasks.backfill_existing_gh_app_installations.BackfillExistingGHAppInstallationsTask"
backfill_owners_without_gh_app_installations_name = "app.tasks.backfill_owners_without_gh_app_installations.BackfillOwnersWithoutGHAppInstallationsTask"

trial_expiration_task_name = "app.tasks.plan.TrialExpirationTask"
trial_expiration_cron_task_name = "app.cron.plan.TrialExpirationCronTask"

Expand Down
2 changes: 1 addition & 1 deletion conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,7 @@ def mock_feature(mocker, request):

from shared.rollouts import Feature

def check_value(self, *, owner_id=None, repo_id=None, default=False):
def check_value(self, identifier, default=False):
return default

return mocker.patch.object(Feature, "check_value", check_value)
4 changes: 4 additions & 0 deletions database/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,3 +68,7 @@ class FlakeSymptomType(Enum):
FAILED_IN_DEFAULT_BRANCH = "failed_in_default_branch"
CONSECUTIVE_DIFF_OUTCOMES = "consecutive_diff_outcomes"
UNRELATED_MATCHING_FAILURES = "unrelated_matching_failures"


class TestResultsProcessingError(Enum):
NO_SUCCESS = "no_success"
1 change: 1 addition & 0 deletions database/models/reports.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,3 +312,4 @@ class TestResultReportTotals(CodecovBaseModel, MixinBaseClass):
passed = Column(types.Integer)
skipped = Column(types.Integer)
failed = Column(types.Integer)
error = Column(types.String(100), nullable=True)
67 changes: 67 additions & 0 deletions helpers/backfills.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import logging

import shared.torngit as torngit
from asgiref.sync import async_to_sync
from sqlalchemy.orm.session import Session

from database.models.core import GithubAppInstallation, Repository

log = logging.getLogger(__name__)

# GH App Backfills
# Looping and adding all repositories in the installation app
def add_repos_service_ids_from_provider(
db_session: Session,
ownerid: int,
owner_service: torngit.base.TorngitBaseAdapter,
gh_app_installation: GithubAppInstallation,
):
repos = async_to_sync(owner_service.list_repos_using_installation)()

if repos:
# Fetching all repos service ids we have for that owner in the DB
repo_service_ids_in_db = [
repo.service_id
for repo in db_session.query(Repository.service_id)
.filter_by(ownerid=ownerid)
.all()
]

# Add service ids from provider that we have DB records for to a list
new_repo_service_ids = []
for repo in repos:
repo_data = repo["repo"]
service_id = repo_data["service_id"]
if service_id and service_id in repo_service_ids_in_db:
new_repo_service_ids.append(service_id)
log.info(
"Added the following repo service ids to this gh app installation",
extra=dict(
ownerid=ownerid,
installation_id=gh_app_installation.installation_id,
new_repo_service_ids=new_repo_service_ids,
),
)
gh_app_installation.repository_service_ids = new_repo_service_ids
db_session.commit()


# Check if gh selection is set to all and act accordingly
def maybe_set_installation_to_all_repos(
db_session: Session,
owner_service,
gh_app_installation: GithubAppInstallation,
):
remote_gh_app_installation = async_to_sync(owner_service.get_gh_app_installation)(
installation_id=gh_app_installation.installation_id
)
repository_selection = remote_gh_app_installation.get("repository_selection", "")
if repository_selection == "all":
gh_app_installation.repository_service_ids = None
db_session.commit()
log.info(
"Selection is set to all, no installation is needed",
extra=dict(ownerid=gh_app_installation.ownerid),
)
return True
return False
7 changes: 6 additions & 1 deletion helpers/checkpoint_logger/flows.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,11 @@
"NOTIF_ERROR_NO_REPORT",
)
@success_events(
"SKIPPING_NOTIFICATION", "NOTIFIED", "NO_PENDING_JOBS", "NOTIF_STALE_HEAD"
"SKIPPING_NOTIFICATION",
"NOTIFIED",
"NO_PENDING_JOBS",
"NOTIF_STALE_HEAD",
"NO_REPORTS_FOUND",
)
@subflows(
("time_before_processing", "UPLOAD_TASK_BEGIN", "PROCESSING_BEGIN"),
Expand All @@ -36,6 +40,7 @@
class UploadFlow(BaseFlow):
UPLOAD_TASK_BEGIN = auto()
NO_PENDING_JOBS = auto()
NO_REPORTS_FOUND = auto()
TOO_MANY_RETRIES = auto()
PROCESSING_BEGIN = auto()
INITIAL_PROCESSING_COMPLETE = auto()
Expand Down
2 changes: 2 additions & 0 deletions helpers/sentry.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,5 @@ def initialize_sentry() -> None:
],
release=os.getenv("SENTRY_RELEASE", version_str),
)
if os.getenv("CLUSTER_ENV"):
sentry_sdk.set_tag("cluster", os.getenv("CLUSTER_ENV"))
25 changes: 17 additions & 8 deletions helpers/telemetry.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import asyncio
import logging
from datetime import datetime

import django
from asgiref.sync import sync_to_async
from psycopg2 import OperationalError
from shared.django_apps.pg_telemetry.models import SimpleMetric as PgSimpleMetric
from shared.django_apps.ts_telemetry.models import SimpleMetric as TsSimpleMetric

Expand All @@ -11,6 +13,8 @@

from .timeseries import timeseries_enabled

log = logging.getLogger(__name__)


def fire_and_forget(fn):
"""
Expand Down Expand Up @@ -131,14 +135,19 @@ def log_simple_metric(self, name: str, value: float):
)

if timeseries_enabled():
TsSimpleMetric.objects.create(
timestamp=timestamp,
name=name,
value=value,
repo_slug=self.repo_slug,
owner_slug=self.owner_slug,
commit_slug=self.commit_slug,
)
try:
TsSimpleMetric.objects.create(
timestamp=timestamp,
name=name,
value=value,
repo_slug=self.repo_slug,
owner_slug=self.owner_slug,
commit_slug=self.commit_slug,
)
except OperationalError:
log.warning(
"Failed to create TsSimpleMetric object, Timescale may be unavailable. However we will continue the current task."
)

@fire_and_forget
async def attempt_log_simple_metric(self, name: str, value: float):
Expand Down
8 changes: 7 additions & 1 deletion helpers/tests/unit/test_sentry.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,13 @@
class TestSentry(object):
def test_initialize_sentry(self, mocker, mock_configuration):
mock_configuration._params["services"] = {"sentry": {"server_dsn": "this_dsn"}}
mocker.patch.dict(os.environ, {"RELEASE_VERSION": "FAKE_VERSION_FOR_YOU"})
cluster = "test_env"
mocker.patch.dict(
os.environ,
{"RELEASE_VERSION": "FAKE_VERSION_FOR_YOU", "CLUSTER_ENV": cluster},
)
mocked_init = mocker.patch("helpers.sentry.sentry_sdk.init")
mocked_set_tag = mocker.patch("helpers.sentry.sentry_sdk.set_tag")
assert initialize_sentry() is None
mocked_init.assert_called_with(
"this_dsn",
Expand All @@ -18,3 +23,4 @@ def test_initialize_sentry(self, mocker, mock_configuration):
environment="production",
integrations=[mocker.ANY, mocker.ANY, mocker.ANY, mocker.ANY],
)
mocked_set_tag.assert_called_with("cluster", cluster)
2 changes: 1 addition & 1 deletion requirements.in
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
https://github.com/codecov/shared/archive/a0b5d256b6d580a99dd3a1e900f5c9bdaf2d5c9e.tar.gz#egg=shared
https://github.com/codecov/shared/archive/039efcfca9c11a5d6d02e1e4d19b86d022a21a72.tar.gz#egg=shared
https://github.com/codecov/opentelem-python/archive/refs/tags/v0.0.4a1.tar.gz#egg=codecovopentelem
https://github.com/codecov/test-results-parser/archive/5515e960d5d38881036e9127f86320efca649f13.tar.gz#egg=test-results-parser
boto3>=1.34
Expand Down
18 changes: 3 additions & 15 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# This file is autogenerated by pip-compile with Python 3.9
# This file is autogenerated by pip-compile with Python 3.12
# by the following command:
#
# pip-compile requirements.in
Expand All @@ -18,8 +18,6 @@ anyio==3.6.1
# openai
asgiref==3.7.2
# via django
async-timeout==4.0.3
# via redis
backoff==1.6.0
# via analytics-python
billiard==4.2.0
Expand Down Expand Up @@ -119,8 +117,6 @@ ecdsa==0.18.0
# via tlslite-ng
excel-base==1.0.4
# via django-excel-response2
exceptiongroup==1.2.1
# via pytest
factory-boy==3.2.0
# via -r requirements.in
faker==8.8.2
Expand Down Expand Up @@ -361,9 +357,7 @@ requests==2.31.0
respx==0.20.2
# via -r requirements.in
rfc3986[idna2008]==1.4.0
# via
# httpx
# rfc3986
# via httpx
rsa==4.7.2
# via google-auth
s3transfer==0.10.1
Expand All @@ -374,7 +368,7 @@ sentry-sdk==1.40.0
# via
# -r requirements.in
# shared
shared @ https://github.com/codecov/shared/archive/a0b5d256b6d580a99dd3a1e900f5c9bdaf2d5c9e.tar.gz
shared @ https://github.com/codecov/shared/archive/039efcfca9c11a5d6d02e1e4d19b86d022a21a72.tar.gz
# via -r requirements.in
six==1.16.0
# via
Expand Down Expand Up @@ -418,18 +412,12 @@ timestring @ https://github.com/codecov/timestring/archive/d37ceacc5954dff3b5bd2
# via -r requirements.in
tlslite-ng==0.8.0b1
# via shared
tomli==2.0.1
# via
# coverage
# pytest
tqdm==4.66.1
# via openai
typing==3.7.4.3
# via shared
typing-extensions==4.6.3
# via
# asgiref
# kombu
# openai
# opentelemetry-sdk
# pydantic
Expand Down
97 changes: 62 additions & 35 deletions services/bots.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,60 +38,87 @@ def _get_installation_weight(installation: GithubAppInstallation) -> int:
return MAX_GITHUB_APP_SELECTION_WEIGHT
seconds_in_hour = 3600
age_hours = (age.seconds // seconds_in_hour) + age.days * 24
return age_hours + 2**age.days
# Prevent clock differences from making the weight negative
return max(1, age_hours + 2**age.days)


def _can_use_this_app(
app: GithubAppInstallation, installation_name: str, repository: Optional[Repository]
) -> bool:
return (
app.name == installation_name
# We ignore apps that are not configured because those can't be used
and app.is_configured()
and (
# If there is a repo we want only the apps that cover said repo
(repository and app.is_repo_covered_by_integration(repository))
# If there is no repo we still need some true value
or (not repository)
)
)


def _get_apps_from_weighted_selection(
owner: Owner, installation_name: str, repository: Optional[Repository]
) -> List[GithubAppInstallation]:
ghapp_installations_filter: List[GithubAppInstallation] = list(
filter(
lambda obj: (
obj.name == installation_name
and obj.is_configured()
and (
# If there is a repo we want only the apps that cover said repo
(repository and obj.is_repo_covered_by_integration(repository))
# If there is no repo we still need some true value
or (not repository)
)
),
"""This function returns an ordered list of GithubAppInstallations that can be used to communicate with GitHub
in behalf of the owner. The list is ordered in such a way that the 1st element is the app to be used in Torngit,
and the subsequent apps are selected as fallbacks.
IF the repository is provided, the selected apps also cover the repo.
IF installation_name is not the default one, than the default codecov installation
is also selected as a possible fallback app.
Apps are selected randomly but assigned weights based on how recently they were created.
This means that older apps are selected more frequently as the main app than newer ones.
(up to 10 days, when the probability of being chosen is the same)
The random selection is done so we can distribute request load more evenly among apps.
"""
# Map GithubAppInstallation.id --> GithubAppInstallation
ghapp_installations_filter: Dict[int, GithubAppInstallation] = {
obj.id: obj
for obj in filter(
lambda obj: _can_use_this_app(obj, installation_name, repository),
owner.github_app_installations or [],
)
)
}
# We assign weights to the apps based on how long ago they were created.
# The idea is that there's a greater chance that a change misconfigured the app,
# So apps recently created are selected less frequently than older apps
keys = list(ghapp_installations_filter.keys())
weights = [
min(MAX_GITHUB_APP_SELECTION_WEIGHT, _get_installation_weight(obj))
for obj in ghapp_installations_filter
min(
MAX_GITHUB_APP_SELECTION_WEIGHT,
_get_installation_weight(ghapp_installations_filter[key]),
)
for key in keys
]
# Random selection of size 3.
# If all apps have roughly the same probability of being selected, the array would have different entries.
# If 1 app dominates the probability of selection than it would probably be that app repeated 3 times, BUT
# from time to time the less frequent one would be selected.
apps_to_consider = (
random.choices(ghapp_installations_filter, weights=weights, k=3)
if len(ghapp_installations_filter) > 0
else []
)
# We pick apps one by one until all apps have been selected
# Obviously apps with a higher weight have a higher change of being selected as the main app (1st selection)
# But it's important that others are also selected so we can use them as fallbacks
apps_to_consider = []
apps_to_select = len(keys)
selections = 0
while selections < apps_to_select:
selected_app_id = random.choices(keys, weights, k=1)[0]
apps_to_consider.append(ghapp_installations_filter[selected_app_id])
# random.choices chooses with replacement
# which we are trying to avoid here. So we remove the key selected and its weight from the population.
key_idx = keys.index(selected_app_id)
keys.pop(key_idx)
weights.pop(key_idx)
selections += 1
if installation_name != GITHUB_APP_INSTALLATION_DEFAULT_NAME:
# Add the default app as the last fallback if the owner is using a different app for the task
default_apps = filter(
lambda obj: obj.name == GITHUB_APP_INSTALLATION_DEFAULT_NAME,
lambda obj: _can_use_this_app(
obj, GITHUB_APP_INSTALLATION_DEFAULT_NAME, repository
),
owner.github_app_installations,
)
if default_apps:
apps_to_consider.extend(default_apps)
# Now we de-duplicate the apps_to_consider list before returning
seen_ids = dict()
list_to_return = []
for app in apps_to_consider:
if seen_ids.get(app.id, False):
continue
seen_ids[app.id] = True
list_to_return.append(app)
return list_to_return
return apps_to_consider


def get_owner_installation_id(
Expand Down
Loading

0 comments on commit f0bf20b

Please sign in to comment.