diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 189ef000b..64bf224b2 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -3,10 +3,11 @@ "dockerComposeFile": "docker-compose.yml", "service": "data_service", "containerUser": "root", + "remoteUser": "root", "workspaceFolder": "/workspace", "shutdownAction": "stopCompose", "features": { - "ghcr.io/devcontainers-contrib/features/poetry:2": {}, + "ghcr.io/devcontainers-extra/features/poetry:2": {}, "ghcr.io/devcontainers-contrib/features/bash-command:1": { "command": "poetry self add poetry-polylith-plugin" }, @@ -31,7 +32,7 @@ "./k3d": {} }, "overrideFeatureInstallOrder": [ - "ghcr.io/devcontainers-contrib/features/poetry", + "ghcr.io/devcontainers-extra/features/poetry", "ghcr.io/devcontainers-contrib/features/bash-command" ], "postCreateCommand": "poetry install --with dev && mkdir -p /home/vscode/.config/k9s", diff --git a/.devcontainer/docker-compose.yml b/.devcontainer/docker-compose.yml index 7369ac7cc..6ff698f0f 100644 --- a/.devcontainer/docker-compose.yml +++ b/.devcontainer/docker-compose.yml @@ -46,7 +46,7 @@ services: ports: - "8000:8000" - "5432:5432" - - "8080:8080" + - "8081:8081" - "5678:5678" - "50051:50051" - "8888:80" @@ -55,7 +55,7 @@ services: image: swaggerapi/swagger-ui environment: SWAGGER_JSON_URL: http://localhost:8000/api/data/spec.json - PORT: "8080" + PORT: "8081" network_mode: service:db authz: diff --git a/Makefile b/Makefile index 6fc32565b..8c49af756 100644 --- a/Makefile +++ b/Makefile @@ -142,6 +142,7 @@ help: ## Display this help. .PHONY: k3d_cluster k3d_cluster: ## Creates a k3d cluster for testing + k3d node delete k3d-myregistry.localhost k3d cluster delete # k3d registry delete myregistry.localhost || true # k3d registry create myregistry.localhost diff --git a/components/renku_data_services/session/kpack_client.py b/components/renku_data_services/session/kpack_client.py index 3f3c87e06..4b3b111ac 100644 --- a/components/renku_data_services/session/kpack_client.py +++ b/components/renku_data_services/session/kpack_client.py @@ -9,7 +9,7 @@ from renku_data_services.errors.errors import CannotStartBuildError, DeleteBuildError from renku_data_services.notebooks.errors.intermittent import IntermittentError from renku_data_services.notebooks.util.retries import retry_with_exponential_backoff_async -from renku_data_services.session.crs import KpackBuild, KpackImage +from renku_data_services.session.kpack_crs import Build, Image class KpackImageV1Alpha2Kr8s(APIObject): @@ -43,7 +43,7 @@ def __init__(self, namespace: str) -> None: self.namespace = namespace self.sanitize = ApiClient().sanitize_for_serialization - async def create_build(self, manifest: KpackBuild) -> KpackBuild: + async def create_build(self, manifest: Build) -> Build: """Create a new image build.""" manifest.metadata.namespace = self.namespace build = await KpackBuildV1Alpha2Kr8s(manifest.model_dump(exclude_none=True, mode="json")) @@ -59,33 +59,29 @@ async def create_build(self, manifest: KpackBuild) -> KpackBuild: raise CannotStartBuildError(message=f"Cannot create the image build {build_name}") return build_resource - async def get_build(self, name: str) -> KpackBuild | None: + async def get_build(self, name: str) -> Build | None: """Get an image build.""" try: build = await KpackBuildV1Alpha2Kr8s.get(name=name, namespace=self.namespace) except NotFoundError: return None except ServerError as e: - if e.status not in [400, 404]: + if not e.response or e.response.status_code not in [400, 404]: logging.exception(f"Cannot get the build {name} because of {e}") raise IntermittentError(f"Cannot get build {name} from the k8s API.") return None - return KpackBuild.model_validate(build.to_dict()) + return Build.model_validate(build.to_dict()) - async def list_builds(self, label_selector: str | None = None) -> list[KpackBuild]: + async def list_builds(self, label_selector: str | None = None) -> list[Build]: """Get a list of kpack builds.""" try: builds = await KpackBuildV1Alpha2Kr8s.list(namespace=self.namespace, label_selector=label_selector) except ServerError as e: - if e.status not in [400, 404]: + if not e.response or e.response.status_code not in [400, 404]: logging.exception(f"Cannot list builds because of {e}") raise IntermittentError("Cannot list builds") return [] - output: list[KpackBuild] - if isinstance(builds, APIObject): - output = [KpackBuild.model_validate(builds.to_dict())] - else: - output = [KpackBuild.model_validate(b.to_dict()) for b in builds] + output = [Build.model_validate(b.to_dict()) for b in builds] return output async def delete_build(self, name: str) -> None: @@ -98,7 +94,7 @@ async def delete_build(self, name: str) -> None: raise DeleteBuildError() return None - async def create_image(self, manifest: KpackImage) -> KpackImage: + async def create_image(self, manifest: Image) -> Image: """Create a new image image.""" manifest.metadata.namespace = self.namespace image = await KpackImageV1Alpha2Kr8s(manifest.model_dump(exclude_none=True, mode="json")) @@ -114,33 +110,29 @@ async def create_image(self, manifest: KpackImage) -> KpackImage: raise CannotStartBuildError(message=f"Cannot create the kpack image {image_name}") return image_resource - async def get_image(self, name: str) -> KpackImage | None: + async def get_image(self, name: str) -> Image | None: """Get an image image.""" try: image = await KpackImageV1Alpha2Kr8s.get(name=name, namespace=self.namespace) except NotFoundError: return None except ServerError as e: - if e.status not in [400, 404]: + if not e.response or e.response.status_code not in [400, 404]: logging.exception(f"Cannot get the image {name} because of {e}") raise IntermittentError(f"Cannot get image {name} from the k8s API.") return None - return KpackImage.model_validate(image.to_dict()) + return Image.model_validate(image.to_dict()) - async def list_images(self, label_selector: str | None = None) -> list[KpackImage]: + async def list_images(self, label_selector: str | None = None) -> list[Image]: """Get a list of kpack images.""" try: images = await KpackImageV1Alpha2Kr8s.list(namespace=self.namespace, label_selector=label_selector) except ServerError as e: - if e.status not in [400, 404]: + if not e.response or e.response.status_code not in [400, 404]: logging.exception(f"Cannot list images because of {e}") raise IntermittentError("Cannot list images") return [] - output: list[KpackImage] - if isinstance(images, APIObject): - output = [KpackImage.model_validate(images.to_dict())] - else: - output = [KpackImage.model_validate(b.to_dict()) for b in images] + output = [Image.model_validate(b.to_dict()) for b in images] return output async def delete_image(self, name: str) -> None: diff --git a/components/renku_data_services/session/crs.py b/components/renku_data_services/session/kpack_crs.py similarity index 79% rename from components/renku_data_services/session/crs.py rename to components/renku_data_services/session/kpack_crs.py index 8ecd5c353..3f1f0ba40 100644 --- a/components/renku_data_services/session/crs.py +++ b/components/renku_data_services/session/kpack_crs.py @@ -1,4 +1,4 @@ -"""Custom Resources for environments, mainly kpack.""" +"""Custom Resources for kpack environments.""" from datetime import datetime from typing import Self @@ -56,7 +56,7 @@ class PersistentVolumeReference(BaseModel): persistentVolumeClaimName: str -class KpackBuilderReference(BaseModel): +class BuilderReference(BaseModel): """Refernce to Kpack builder.""" name: str @@ -75,54 +75,54 @@ class DockerImageWithSecret(DockerImage): imagePullSecrets: list[ImagePullSecret] -class KpackGitSource(BaseModel): +class GitSource(BaseModel): """Git repository source.""" url: str revision: str -class KpackBlobSource(BaseModel): +class BlobSource(BaseModel): """Blob/file archive source.""" url: str stripComponents: str -class KpackSource(BaseModel): +class Source(BaseModel): """Kpack files source resource.""" - git: KpackGitSource | None = None - blob: KpackBlobSource | None = None + git: GitSource | None = None + blob: BlobSource | None = None @model_validator(mode="after") - def validate(self) -> Self: + def validate_source(self) -> Self: """Validate mode data.""" if bool(self.git) == bool(self.blob): raise ValueError("'git' and 'blob' are mutually exclusive and one of them must be set.") return self -class KpackBuildCustomization(BaseModel): +class BuildCustomization(BaseModel): """Customization of a kpack build.""" env: list[EnvItem] -class KpackImageSpec(BaseModel): +class ImageSpec(BaseModel): """KPack image spec model.""" tag: str additionalTags: list[str] serviceAccountName: str - builder: KpackBuilderReference - source: KpackSource - build: KpackBuildCustomization + builder: BuilderReference + source: Source + build: BuildCustomization successBuildHistoryLimit: int = 1 failedBuildHistoryLimit: int = 1 -class KpackImage(BaseModel): +class Image(BaseModel): """Kpack Image resource.""" model_config = ConfigDict( @@ -131,10 +131,10 @@ class KpackImage(BaseModel): kind: str = "Image" apiVersion: str = "kpack.io/v1alpha2" metadata: Metadata - spec: KpackImageSpec + spec: ImageSpec -class KpackVolumeCache(BaseModel): +class VolumeCache(BaseModel): """Persistent volume to serve as cache for kpack build.""" volume: PersistentVolumeReference @@ -146,27 +146,27 @@ class ImageTagReference(BaseModel): tag: str -class KpackCacheImage(BaseModel): +class CacheImage(BaseModel): """Image definition to use as build cache.""" registry: ImageTagReference -class KpackBuildSpec(BaseModel): +class BuildSpec(BaseModel): """Spec for kpack build.""" builder: DockerImageWithSecret - cache: KpackVolumeCache | KpackCacheImage + cache: VolumeCache | CacheImage env: list[EnvItem] resources: K8sResourceRequest runImage: DockerImage serviceAccountName: str - source: KpackSource + source: Source tags: list[str] activeDeadlineSeconds: int = 1800 -class KpackBuild(BaseModel): +class Build(BaseModel): """KPack build resource.""" model_config = ConfigDict( @@ -175,4 +175,4 @@ class KpackBuild(BaseModel): kind: str = "Build" apiVersion: str = "kpack.io/v1alpha2" metadata: Metadata - spec: KpackBuildSpec + spec: BuildSpec diff --git a/components/renku_data_services/session/shipwright_client.py b/components/renku_data_services/session/shipwright_client.py new file mode 100644 index 000000000..a0f4499eb --- /dev/null +++ b/components/renku_data_services/session/shipwright_client.py @@ -0,0 +1,148 @@ +"""K8s client for shipwright.""" + +import logging + +from kr8s import NotFoundError, ServerError +from kr8s.asyncio.objects import APIObject +from kubernetes.client import ApiClient + +from renku_data_services.errors.errors import CannotStartBuildError, DeleteBuildError +from renku_data_services.notebooks.errors.intermittent import IntermittentError +from renku_data_services.notebooks.util.retries import retry_with_exponential_backoff_async +from renku_data_services.session.shipwright_crs import Build, BuildRun + + +class ShipwrightBuildV1Beta2Kr8s(APIObject): + """Spec for shipwright build used by the k8s client.""" + + kind: str = "Build" + version: str = "shipwright.io/v1beta2" + namespaced: bool = True + plural: str = "builds" + singular: str = "build" + scalable: bool = False + endpoint: str = "build" + + +class ShipwrightBuildRunV1Beta2Kr8s(APIObject): + """Spec for shipwright build used by the k8s client.""" + + kind: str = "BuildRun" + version: str = "shipwright.io/v1beta2" + namespaced: bool = True + plural: str = "buildruns" + singular: str = "buildrun" + scalable: bool = False + endpoint: str = "buildrun" + + +class ShipwrightClient: + """Client for creating shipwright resources in kubernetes.""" + + def __init__(self, namespace: str) -> None: + self.namespace = namespace + self.sanitize = ApiClient().sanitize_for_serialization + + async def create_build(self, manifest: Build) -> Build: + """Create a new build.""" + manifest.metadata.namespace = self.namespace + build = await ShipwrightBuildV1Beta2Kr8s(manifest.model_dump(exclude_none=True, mode="json")) + build_name = manifest.metadata.name + try: + await build.create() + except ServerError as e: + logging.exception(f"Cannot create the image build {build_name} because of {e}") + raise CannotStartBuildError(message=f"Cannot create the image build {build_name}") + await build.refresh() + build_resource = await retry_with_exponential_backoff_async(lambda x: x is None)(self.get_build)(build_name) + if build_resource is None: + raise CannotStartBuildError(message=f"Cannot create the image build {build_name}") + return build_resource + + async def get_build(self, name: str) -> Build | None: + """Get an image build.""" + try: + build = await ShipwrightBuildV1Beta2Kr8s.get(name=name, namespace=self.namespace) + except NotFoundError: + return None + except ServerError as e: + if not e.response or e.response.status_code not in [400, 404]: + logging.exception(f"Cannot get the build {name} because of {e}") + raise IntermittentError(f"Cannot get build {name} from the k8s API.") + return None + return Build.model_validate(build.to_dict()) + + async def list_builds(self, label_selector: str | None = None) -> list[Build]: + """Get a list of shipwright builds.""" + try: + builds = await ShipwrightBuildV1Beta2Kr8s.list(namespace=self.namespace, label_selector=label_selector) + except ServerError as e: + if not e.response or e.response.status_code not in [400, 404]: + logging.exception(f"Cannot list builds because of {e}") + raise IntermittentError("Cannot list builds") + return [] + output = [Build.model_validate(b.to_dict()) for b in builds] + return output + + async def delete_build(self, name: str) -> None: + """Delete a shipwright build.""" + build = await ShipwrightBuildV1Beta2Kr8s(dict(metadata=dict(name=name, namespace=self.namespace))) + try: + await build.delete(propagation_policy="Foreground") + except ServerError as e: + logging.exception(f"Cannot delete build {name} because of {e}") + raise DeleteBuildError() + return None + + async def create_build_run(self, manifest: BuildRun) -> BuildRun: + """Create a new build run.""" + manifest.metadata.namespace = self.namespace + build_run = await ShipwrightBuildRunV1Beta2Kr8s(manifest.model_dump(exclude_none=True, mode="json")) + build_run_name = manifest.metadata.name + try: + await build_run.create() + except ServerError as e: + logging.exception(f"Cannot create the image build {build_run_name} because of {e}") + raise CannotStartBuildError(message=f"Cannot create the image build {build_run_name}") + await build_run.refresh() + build_resource = await retry_with_exponential_backoff_async(lambda x: x is None)(self.get_build_run)( + build_run_name + ) + if build_resource is None: + raise CannotStartBuildError(message=f"Cannot create the image build {build_run_name}") + return build_resource + + async def get_build_run(self, name: str) -> BuildRun | None: + """Get an image build run.""" + try: + build = await ShipwrightBuildRunV1Beta2Kr8s.get(name=name, namespace=self.namespace) + except NotFoundError: + return None + except ServerError as e: + if not e.response or e.response.status_code not in [400, 404]: + logging.exception(f"Cannot get the build {name} because of {e}") + raise IntermittentError(f"Cannot get build {name} from the k8s API.") + return None + return BuildRun.model_validate(build.to_dict()) + + async def list_build_runs(self, label_selector: str | None = None) -> list[BuildRun]: + """Get a list of shipwright build runs.""" + try: + builds = await ShipwrightBuildRunV1Beta2Kr8s.list(namespace=self.namespace, label_selector=label_selector) + except ServerError as e: + if not e.response or e.response.status_code not in [400, 404]: + logging.exception(f"Cannot list builds because of {e}") + raise IntermittentError("Cannot list builds") + return [] + output = [BuildRun.model_validate(b.to_dict()) for b in builds] + return output + + async def delete_build_run(self, name: str) -> None: + """Delete a shipwright build run.""" + build = await ShipwrightBuildRunV1Beta2Kr8s(dict(metadata=dict(name=name, namespace=self.namespace))) + try: + await build.delete(propagation_policy="Foreground") + except ServerError as e: + logging.exception(f"Cannot delete build {name} because of {e}") + raise DeleteBuildError() + return None diff --git a/components/renku_data_services/session/shipwright_crs.py b/components/renku_data_services/session/shipwright_crs.py new file mode 100644 index 000000000..d9f48f781 --- /dev/null +++ b/components/renku_data_services/session/shipwright_crs.py @@ -0,0 +1,149 @@ +"""Custom Resources for shipwright environments.""" + +from datetime import datetime + +from pydantic import BaseModel, ConfigDict, Field + + +class Metadata(BaseModel): + """Basic k8s metadata spec.""" + + class Config: + """Do not exclude unknown properties.""" + + extra = "allow" + + name: str + namespace: str | None = None + labels: dict[str, str] = Field(default_factory=dict) + annotations: dict[str, str] = Field(default_factory=dict) + uid: str | None = None + creationTimestamp: datetime | None = None + deletionTimestamp: datetime | None = None + + +class EnvItem(BaseModel): + """Environment variable definition.""" + + name: str + value: str + + +class ResourceRequest(BaseModel): + """Resource request entry.""" + + cpu: str + memory: str + + +class K8sResourceRequest(BaseModel): + """K8s resource request.""" + + requests: ResourceRequest + limits: ResourceRequest + + +class ImagePullSecret(BaseModel): + """K8s image pull secret.""" + + name: str + + +class StrategyRef(BaseModel): + """Reference to a shipwright build strategy.""" + + name: str + kind: str = "ClusterBuildStrategy" + + +class BuildOutput(BaseModel): + """Defines the output of a shipwright build.""" + + image: str + pushSecret: str | None + + +class ConfigMapRef(BaseModel): + """A reference to a value in a config map.""" + + name: str + key: str + + +class ParamValue(BaseModel): + """A value for a build strategy parameter.""" + + name: str + value: str | None + configMapValue: ConfigMapRef | None + + +class GitRef(BaseModel): + """A reference to a git repo.""" + + url: str + revision: str + cloneSecret: str + + +class GitSource(BaseModel): + """A git repo to use as source for a shipwright build.""" + + type: str = "Git" + git: GitRef + contextDir: str + + +class Retention(BaseModel): + """Retention Policy.""" + + ttlAfterFailed: str = "1440m" + ttlAfterSucceeded: str = "60m" + failedLimit: int = 1 + succeededLimet: int = 1 + + +class BuildSpec(BaseModel): + """Shipwright build spec.""" + + source: GitSource + paramValues: list[ParamValue] + strategy: StrategyRef + output: BuildOutput + retention: Retention + + +class Build(BaseModel): + """A shipwright build.""" + + model_config = ConfigDict( + extra="allow", + ) + kind: str = "Build" + apiVersion: str = "shipwright.io/v1beta2" + metadata: Metadata + spec: BuildSpec + + +class BuildRef(BuildSpec): + """Reference to a build.""" + + name: str + + +class BuildRunSpec(BaseModel): + """Spec for a build run.""" + + build: BuildRef + + +class BuildRun(BaseModel): + """A shipwright build run.""" + + model_config = ConfigDict( + extra="allow", + ) + kind: str = "BuildRun" + apiVersion: str = "shipwright.io/v1beta2" + metadata: Metadata + spec: BuildRunSpec