Skip to content

Commit

Permalink
Implement get_stale_profiles and delete_stale_profiles (#23)
Browse files Browse the repository at this point in the history
* Add list_stale.py

* Rename list_stale_profiles to get_stale_profiles

* Add tests for delete_stale_profiles and get_stale_profiles

* Resolve comments

* Don't redeploy profiles-controller if it already exists

* Fix linting

* Remove debugging message

* Fix info message
  • Loading branch information
mvlassis authored Jan 14, 2025
1 parent 5bc78e4 commit b92f601
Show file tree
Hide file tree
Showing 9 changed files with 233 additions and 3 deletions.
4 changes: 1 addition & 3 deletions src/profiles_management/create_or_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,9 @@
from typing import Dict

from charmed_kubeflow_chisme.lightkube.batch import delete_many
from lightkube import Client
from lightkube.generic_resource import GenericGlobalResource

from profiles_management.helpers.k8s import get_name
from profiles_management.helpers.k8s import client, get_name
from profiles_management.helpers.kfam import (
list_contributor_authorization_policies,
list_contributor_rolebindings,
Expand All @@ -25,7 +24,6 @@
from profiles_management.pmr.classes import ProfilesManagementRepresentation

log = logging.getLogger(__name__)
client = Client(field_manager="profiles-automator-lightkube")


def remove_access_to_stale_profile(profile: GenericGlobalResource):
Expand Down
27 changes: 27 additions & 0 deletions src/profiles_management/delete_stale.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
"""Module responsible for deleting "stale" Profiles based on a PMR.
In this context, a "stale" Profile is a Profile that exists in the cluster but doesn't belong
in the PMR.
"""

import logging

from profiles_management.helpers import profiles
from profiles_management.helpers.k8s import client
from profiles_management.list_stale import list_stale_profiles
from profiles_management.pmr.classes import ProfilesManagementRepresentation

log = logging.getLogger(__name__)


def delete_stale_profiles(pmr: ProfilesManagementRepresentation):
"""Delete all profiles that exist in the cluster but do not belong in a given PMR.
Args:
pmr: The ProfilesManagementRepresentation expressing what Profiles and contributors
should exist in the cluster.
"""
stale_profiles = list_stale_profiles(pmr)
log.info("Deleting all stale Profiles.")
for existing_profile in stale_profiles.values():
profiles.remove_profile(existing_profile, client)
45 changes: 45 additions & 0 deletions src/profiles_management/helpers/k8s.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,22 @@
"""Generic helpers for manipulating K8s objects, via lightkube."""

import logging

import tenacity
from lightkube import Client
from lightkube.core.exceptions import ApiError
from lightkube.generic_resource import GenericGlobalResource, GenericNamespacedResource
from lightkube.resources.core_v1 import Namespace

log = logging.getLogger(__name__)
client = Client(field_manager="profiles-automator-lightkube")


# For errors when a Namespace exists while it shouldn't
class ObjectStillExistsError(Exception):
"""Exception for when a K8s object exists, while it should have been removed."""

pass


def get_name(res: GenericNamespacedResource | GenericGlobalResource) -> str:
Expand All @@ -22,3 +38,32 @@ def get_name(res: GenericNamespacedResource | GenericGlobalResource) -> str:
raise ValueError("Couldn't detect name, object has no name field: %s" % res)

return res.metadata.name


@tenacity.retry(stop=tenacity.stop_after_delay(300), wait=tenacity.wait_fixed(5), reraise=True)
def ensure_namespace_is_deleted(namespace: str, client: Client):
"""Check if the name doesn't exist with retries.
The function will keep retrying until the namespace is deleted, and handle the
404 error once it gets deleted.
Args:
namespace: The namespace to be checked if it is deleted.
client: The lightkube client to use for talking to K8s.
Raises:
ApiError: From lightkube, if there was an error aside from 404.
ObjectStillExistsError: If the Profile's namespace was not deleted after retries.
"""
log.info("Checking if namespace exists: %s", namespace)
try:
client.get(Namespace, name=namespace)
log.info('Namespace "%s" exists, retrying...', namespace)
raise ObjectStillExistsError("Namespace %s is not deleted.")
except ApiError as e:
if e.status.code == 404:
log.info('Namespace "%s" doesn\'t exist!', namespace)
return
else:
# Raise any other error
raise
27 changes: 27 additions & 0 deletions src/profiles_management/helpers/profiles.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,19 @@
"""Utility module for manipulating Profiles."""

import logging
from typing import Iterator

from lightkube import Client
from lightkube.generic_resource import GenericGlobalResource, create_global_resource

from profiles_management.helpers import k8s

Profile = create_global_resource(
group="kubeflow.org", version="v1", kind="Profile", plural="profiles"
)

log = logging.getLogger(__name__)


def list_profiles(client: Client) -> Iterator[GenericGlobalResource]:
"""Return all Profile CRs in the cluster.
Expand All @@ -20,3 +25,25 @@ def list_profiles(client: Client) -> Iterator[GenericGlobalResource]:
Iterator of Profiles in the cluster.
"""
return client.list(Profile)


def remove_profile(profile: GenericGlobalResource, client: Client, wait_namespace=True):
"""Remove a Profile from the cluster.
Args:
profile: The Profile ligthkube resource to remove from the cluster.
client: The lightkube client to use for talking to K8s.
wait_namespace: If the code should wait, with a timeout, for the namespace
to be deleted before returning.
Raises:
ApiError: From lightkube, if there was an error.
ObjectStillExistsError: If the Profile's namespace was not deleted after retries.
"""
nm = k8s.get_name(profile)
log.info("Removing Profile: %s", nm)
client.delete(Profile, nm)

if wait_namespace:
log.info("Waiting for created namespace to be deleted.")
k8s.ensure_namespace_is_deleted(nm, client)
40 changes: 40 additions & 0 deletions src/profiles_management/list_stale.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
"""Module responsible for listing "stale" Profiles based on a PMR.
In this context, a "stale" Profile is a Profile that exists in the cluster but doesn't belong
in the PMR.
"""

import logging

from lightkube.generic_resource import GenericGlobalResource

from profiles_management.helpers.k8s import client, get_name
from profiles_management.helpers.profiles import list_profiles
from profiles_management.pmr.classes import ProfilesManagementRepresentation

log = logging.getLogger(__name__)


def list_stale_profiles(pmr: ProfilesManagementRepresentation) -> dict[str, GenericGlobalResource]:
"""Find all profiles that exist in the cluster but do not belong in a given PMR.
Args:
pmr: The ProfilesManagementRepresentation expressing what Profiles and contributors
should exist in the cluster.
Returns:
The profiles that exist in the cluster but are not part of the given PMR.
"""
log.info("Fetching all Profiles in the cluster")
existing_profiles: dict[str, GenericGlobalResource] = {}
for profile in list_profiles(client):
existing_profiles[get_name(profile)] = profile

stale_profiles: dict[str, GenericGlobalResource] = {}
for profile_name, existing_profile in existing_profiles.items():
if not pmr.has_profile(profile_name):
logging.info(
"Profile %s not in PMR. Adding it to the list of stale Profiles.", profile_name
)
stale_profiles[profile_name] = existing_profile
return stale_profiles
4 changes: 4 additions & 0 deletions tests/integration/profiles_management/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@ async def deploy_profiles_controller(ops_test: OpsTest):
if not ops_test.model:
pytest.fail("ops_test has a None model", pytrace=False)

if PROFILES_CHARM in ops_test.model.applications:
log.info("Profiles Controller charm already exists, no need to re-deploy.")
return

log.info("Deploying the Profiles Controller charm.")
await ops_test.model.deploy(PROFILES_CHARM, channel=PROFILES_CHANNEL, trust=PROFILES_TRUST)

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import logging

import pytest
from lightkube import Client

from profiles_management.delete_stale import delete_stale_profiles
from profiles_management.helpers.profiles import list_profiles
from profiles_management.pmr import classes
from tests.integration.profiles_management.helpers import profiles

log = logging.getLogger(__name__)
client = Client(field_manager="profiles-automator-lightkube")

TESTS_YAMLS_PATH = "tests/integration/profiles_management/yamls"


@pytest.mark.asyncio
async def test_delete_stale_profiles(deploy_profiles_controller, lightkube_client: Client):
await deploy_profiles_controller

namespace = "test"
context = {"namespace": namespace}

profile_path = TESTS_YAMLS_PATH + "/profile.yaml"

# Load and apply all objects from files
profile_contents = profiles.load_profile_from_file(profile_path, context)

log.info("Creating Profile and waiting for Namespace to be created...")
profiles.apply_profile(profile_contents, lightkube_client)

# Create the PMR, which should not contain the above test profile
pmr = classes.ProfilesManagementRepresentation()

log.info(
"Running delete_stale_profiles() which should delete all Profiles we created earlier."
)
delete_stale_profiles(pmr)

# Check that the iterator returns no elements
assert all(False for _ in list_profiles(client))
47 changes: 47 additions & 0 deletions tests/integration/profiles_management/test_list_stale_profiles.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import logging

import pytest
from lightkube import Client
from lightkube.generic_resource import GenericGlobalResource

from profiles_management.helpers.k8s import get_name
from profiles_management.helpers.profiles import list_profiles
from profiles_management.list_stale import list_stale_profiles
from profiles_management.pmr import classes
from tests.integration.profiles_management.helpers import profiles

log = logging.getLogger(__name__)
client = Client(field_manager="profiles-automator-lightkube")

TESTS_YAMLS_PATH = "tests/integration/profiles_management/yamls"


@pytest.mark.asyncio
async def test_list_stale_profiles(deploy_profiles_controller, lightkube_client: Client):
await deploy_profiles_controller

namespace = "test"
context = {"namespace": namespace}

profile_path = TESTS_YAMLS_PATH + "/profile.yaml"

# Load and apply all objects from files
profile_contents = profiles.load_profile_from_file(profile_path, context)

log.info("Creating Profile and waiting for Namespace to be created...")
profile = profiles.apply_profile(profile_contents, lightkube_client)

existing_profiles: dict[str, GenericGlobalResource] = {}
for profile in list_profiles(client):
existing_profiles[get_name(profile)] = profile

# Create the PMR, which should not contain the above test profile
pmr = classes.ProfilesManagementRepresentation()

log.info("Running list_stale_profiles() which should return all Profiles we created earlier.")
stale_profiles = list_stale_profiles(pmr)

assert existing_profiles == stale_profiles

log.info("Removing test Profile and resources in it.")
profiles.remove_profile(profile, lightkube_client)
1 change: 1 addition & 0 deletions tests/integration/test_charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@


@pytest.mark.abort_on_fail
@pytest.mark.skip()
async def test_build_and_deploy(ops_test: OpsTest):
"""Build the github-profiles-automator charm and deploy it.
Expand Down

0 comments on commit b92f601

Please sign in to comment.