From f2d1e7a40144b277a59f40b5cb3dea2f88c6c38c Mon Sep 17 00:00:00 2001 From: Yehudit Kerido Date: Thu, 23 Jan 2025 10:03:50 +0200 Subject: [PATCH 1/2] sdk-tests-with-papermill Signed-off-by: Yehudit Kerido --- .../sdk-e2e-tests-with-papermill.yaml | 28 +++ .../template-e2e-notebook-test/action.yaml | 54 +++++ .../sdk/cmaes-and-resume-policies.ipynb | 223 +++++++++++++++--- .../v1beta1/sdk/tune-train-from-func.ipynb | 191 ++++++++++++--- .../v1beta1/scripts/gh-actions/build-load.sh | 21 +- .../scripts/gh-actions/run-notebook.sh | 71 ++++++ .../scripts/gh-actions/setup-minikube.sh | 3 +- 7 files changed, 514 insertions(+), 77 deletions(-) create mode 100644 .github/workflows/sdk-e2e-tests-with-papermill.yaml create mode 100644 .github/workflows/template-e2e-notebook-test/action.yaml create mode 100644 test/e2e/v1beta1/scripts/gh-actions/run-notebook.sh diff --git a/.github/workflows/sdk-e2e-tests-with-papermill.yaml b/.github/workflows/sdk-e2e-tests-with-papermill.yaml new file mode 100644 index 00000000000..b5e976c0181 --- /dev/null +++ b/.github/workflows/sdk-e2e-tests-with-papermill.yaml @@ -0,0 +1,28 @@ +name: E2E Tests with Notebooks + +on: + push: {} + pull_request: {} + workflow_dispatch: {} + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + e2e: + runs-on: ubuntu-22.04 + timeout-minutes: 30 + strategy: + fail-fast: true + matrix: + python-version: ["3.9", "3.10", "3.11"] + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Test Notebook + uses: ./.github/workflows/template-e2e-notebook-test + with: + python-version: ${{ matrix.python-version }} + notebook-input: "examples/v1beta1/sdk/cmaes-and-resume-policies.ipynb,examples/v1beta1/sdk/tune-train-from-func.ipynb" \ No newline at end of file diff --git a/.github/workflows/template-e2e-notebook-test/action.yaml b/.github/workflows/template-e2e-notebook-test/action.yaml new file mode 100644 index 00000000000..65585f1d933 --- /dev/null +++ b/.github/workflows/template-e2e-notebook-test/action.yaml @@ -0,0 +1,54 @@ +name: Notebook test template +description: A composite action to setup and run example notebooks using Papermill + +inputs: + python-version: + required: false + description: Python version + default: "3.9" + notebook-input: + description: 'List of paths to the input notebooks, separated by commas' + required: true + +runs: + using: composite + steps: + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: ${{ inputs.python-version }} + + - name: Install dependencies + shell: bash + run: | + python -m pip install --upgrade pip + pip install papermill kubeflow-katib jupyter ipykernel + python -m ipykernel install --user --name python3 --display-name "Python 3" + + - name: Setup Minikube Cluster + uses: medyagh/setup-minikube@v0.0.18 + with: + network-plugin: cni + cni: flannel + driver: none + kubernetes-version: v1.29.2 + minikube-version: 1.34.0 + start-args: --wait-timeout=120s + + - name: Setup Minikube + shell: bash + run: ./test/e2e/v1beta1/scripts/gh-actions/setup-minikube.sh true true "" "" "cmaes" + + - name: Setup Katib + shell: bash + run: ./test/e2e/v1beta1/scripts/gh-actions/setup-katib.sh true true postgres + + - name: Run Jupyter Notebook with Papermill + shell: bash + run: | + IFS=',' read -r -a NOTEBOOK_ARRAY <<< "${{ inputs.notebook-input }}" + for NOTEBOOK in "${NOTEBOOK_ARRAY[@]}"; do + echo "Running notebook: $NOTEBOOK" + ./test/e2e/v1beta1/scripts/gh-actions/run-notebook.sh -i "$NOTEBOOK" -n kubeflow + done diff --git a/examples/v1beta1/sdk/cmaes-and-resume-policies.ipynb b/examples/v1beta1/sdk/cmaes-and-resume-policies.ipynb index d6876f2b864..a23933a14ed 100644 --- a/examples/v1beta1/sdk/cmaes-and-resume-policies.ipynb +++ b/examples/v1beta1/sdk/cmaes-and-resume-policies.ipynb @@ -2,7 +2,11 @@ "cells": [ { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "# HyperParameter tunning using CMA-ES\n", "\n", @@ -17,7 +21,11 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "## Install Katib SDK\n", "\n", @@ -27,7 +35,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# TODO (andreyvelich): Change to release version when SDK with the new APIs is published.\n", @@ -36,7 +48,11 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "## Import required packages" ] @@ -44,7 +60,11 @@ { "cell_type": "code", "execution_count": 2, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "import copy\n", @@ -64,7 +84,11 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "## Define your Experiment\n", "\n", @@ -73,13 +97,43 @@ }, { "cell_type": "code", - "execution_count": 46, - "metadata": {}, + "execution_count": null, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "parameters" + ], + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# Experiment name and namespace.\n", "namespace = \"kubeflow-user-example-com\"\n", - "experiment_name = \"cmaes-example\"\n", + "experiment_name = \"cmaes-example\"" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "parameters" + ], + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ "\n", "metadata = V1ObjectMeta(\n", " name=experiment_name,\n", @@ -188,7 +242,11 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "## Define Experiments with resume policy\n", "\n", @@ -202,7 +260,11 @@ { "cell_type": "code", "execution_count": 47, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "experiment_never_resume_name = \"never-resume-cmaes\"\n", @@ -224,7 +286,11 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "You can print the Experiment's info to verify it before submission." ] @@ -233,7 +299,10 @@ "cell_type": "code", "execution_count": 48, "metadata": { - "scrolled": true + "scrolled": true, + "pycharm": { + "name": "#%%\n" + } }, "outputs": [ { @@ -264,7 +333,11 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "## Create your Experiment\n", "\n", @@ -274,7 +347,16 @@ { "cell_type": "code", "execution_count": 49, - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [], + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [ { "name": "stdout", @@ -298,7 +380,7 @@ ], "source": [ "# Create Katib client.\n", - "kclient = KatibClient()\n", + "kclient = KatibClient(namespace=namespace)\n", "\n", "# Create your Experiment.\n", "kclient.create_experiment(experiment,namespace=namespace)" @@ -306,7 +388,11 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "### Create other Experiments" ] @@ -314,7 +400,11 @@ { "cell_type": "code", "execution_count": 50, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [ { "name": "stdout", @@ -364,7 +454,11 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "## Get your Experiment\n", "\n", @@ -375,7 +469,10 @@ "cell_type": "code", "execution_count": 51, "metadata": { - "scrolled": true + "scrolled": true, + "pycharm": { + "name": "#%%\n" + } }, "outputs": [ { @@ -585,7 +682,11 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "## Get all Experiments\n", "\n", @@ -595,7 +696,11 @@ { "cell_type": "code", "execution_count": 52, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [ { "name": "stdout", @@ -617,7 +722,11 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "## Get the current Experiment conditions\n", "\n", @@ -627,7 +736,11 @@ { "cell_type": "code", "execution_count": 53, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [ { "data": { @@ -659,7 +772,10 @@ "cell_type": "code", "execution_count": 54, "metadata": { - "scrolled": true + "scrolled": true, + "pycharm": { + "name": "#%%\n" + } }, "outputs": [ { @@ -679,7 +795,11 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "## List of the current Trials\n", "\n", @@ -690,7 +810,10 @@ "cell_type": "code", "execution_count": 55, "metadata": { - "scrolled": true + "scrolled": true, + "pycharm": { + "name": "#%%\n" + } }, "outputs": [ { @@ -733,7 +856,11 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "## Get the optimal HyperParameters\n", "\n", @@ -743,7 +870,11 @@ { "cell_type": "code", "execution_count": 56, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [ { "data": { @@ -774,7 +905,11 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "## Status for the Suggestion objects\n", "\n", @@ -786,7 +921,11 @@ { "cell_type": "code", "execution_count": 59, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [ { "name": "stdout", @@ -813,7 +952,11 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "## Delete your Experiments\n", "\n", @@ -823,7 +966,11 @@ { "cell_type": "code", "execution_count": 61, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [ { "name": "stdout", @@ -844,7 +991,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [] } @@ -865,9 +1016,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.10" + "version": "3.9.7" } }, "nbformat": 4, "nbformat_minor": 4 -} +} \ No newline at end of file diff --git a/examples/v1beta1/sdk/tune-train-from-func.ipynb b/examples/v1beta1/sdk/tune-train-from-func.ipynb index e47ed47d0e7..a6e3b14f89b 100644 --- a/examples/v1beta1/sdk/tune-train-from-func.ipynb +++ b/examples/v1beta1/sdk/tune-train-from-func.ipynb @@ -4,7 +4,10 @@ "cell_type": "markdown", "id": "bf9ab16d-fbf6-4385-a7f8-133e4562e1e7", "metadata": { - "tags": [] + "tags": [], + "pycharm": { + "name": "#%% md\n" + } }, "source": [ "# Tune and Train with Kubeflow Katib and Training Operator\n", @@ -19,7 +22,11 @@ { "cell_type": "markdown", "id": "62d91e3d-904a-4a3c-b4e7-573324ba625e", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "## Install Kubeflow Python SDKs\n", "\n", @@ -30,21 +37,50 @@ "cell_type": "code", "execution_count": null, "id": "5de885ca-e96a-4d59-9e78-75f6fc6f5ce7", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ - "!pip install tensorflow==2.9.1\n", + "!pip install tensorflow==2.16.1\n", "\n", "# TODO (andreyvelich): Change to release version when SDK with the new APIs is published.\n", "!pip install git+https://github.com/kubeflow/katib.git#subdirectory=sdk/python/v1beta1\n", "!pip install git+https://github.com/kubeflow/training-operator.git#subdirectory=sdk/python" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "15ea90f2-ad99-4bf4-ae07-cc35bcbe6884", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "parameters" + ], + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "# Experiment namespace\n", + "namespace = \"default\" " + ] + }, { "cell_type": "markdown", "id": "881aae2f-d08e-4439-bef9-1684ff87556d", "metadata": { - "tags": [] + "tags": [], + "pycharm": { + "name": "#%% md\n" + } }, "source": [ "## Create Train Script for CNN Model\n", @@ -64,7 +100,10 @@ "shell.execute_reply": "2022-09-12T18:46:59.248292Z", "shell.execute_reply.started": "2022-09-12T18:46:59.051211Z" }, - "tags": [] + "tags": [], + "pycharm": { + "name": "#%%\n" + } }, "outputs": [], "source": [ @@ -168,7 +207,10 @@ "cell_type": "markdown", "id": "38c81974-0ea6-480e-844d-03ad02f068fa", "metadata": { - "tags": [] + "tags": [], + "pycharm": { + "name": "#%% md\n" + } }, "source": [ "## Run Training Locally in the Notebook\n", @@ -190,7 +232,10 @@ "shell.execute_reply": "2022-09-12T18:50:45.645056Z", "shell.execute_reply.started": "2022-09-12T18:47:19.717438Z" }, - "tags": [] + "tags": [], + "pycharm": { + "name": "#%%\n" + } }, "outputs": [ { @@ -254,7 +299,10 @@ "cell_type": "markdown", "id": "05c878de-718f-4f5b-af52-7cb222f43dfd", "metadata": { - "tags": [] + "tags": [], + "pycharm": { + "name": "#%% md\n" + } }, "source": [ "## Start Model Tuning with Katib\n", @@ -271,6 +319,7 @@ "execution_count": 13, "id": "c1e479e7-7fe1-4d72-91ba-4d8e55321882", "metadata": { + "editable": true, "execution": { "iopub.execute_input": "2022-09-12T19:01:45.433380Z", "iopub.status.busy": "2022-09-12T19:01:45.432517Z", @@ -278,7 +327,13 @@ "shell.execute_reply": "2022-09-12T19:01:45.545424Z", "shell.execute_reply.started": "2022-09-12T19:01:45.433308Z" }, - "tags": [] + "slideshow": { + "slide_type": "" + }, + "tags": [], + "pycharm": { + "name": "#%%\n" + } }, "outputs": [ { @@ -314,7 +369,7 @@ "\n", "# Start the Katib Experiment.\n", "exp_name = \"tune-mnist\"\n", - "katib_client = katib.KatibClient()\n", + "katib_client = katib.KatibClient(namespace=namespace)\n", "\n", "katib_client.tune(\n", " name=exp_name,\n", @@ -344,7 +399,10 @@ "shell.execute_reply": "2022-09-01T15:51:29.443320Z", "shell.execute_reply.started": "2022-09-01T15:51:29.342591Z" }, - "tags": [] + "tags": [], + "pycharm": { + "name": "#%% md\n" + } }, "source": [ "### Access to Katib UI\n", @@ -357,7 +415,11 @@ { "cell_type": "markdown", "id": "8e64470f-e80d-4230-bef1-ce31c81fc910", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "### Get the Best HyperParameters from the Katib Experiment\n", "\n", @@ -369,6 +431,7 @@ "execution_count": 14, "id": "c01fba11-3627-4a6d-88a6-3f1508637934", "metadata": { + "editable": true, "execution": { "iopub.execute_input": "2022-09-12T19:08:48.997327Z", "iopub.status.busy": "2022-09-12T19:08:48.995976Z", @@ -376,7 +439,13 @@ "shell.execute_reply": "2022-09-12T19:08:49.143625Z", "shell.execute_reply.started": "2022-09-12T19:08:48.997288Z" }, - "tags": [] + "slideshow": { + "slide_type": "" + }, + "tags": [], + "pycharm": { + "name": "#%%\n" + } }, "outputs": [ { @@ -402,10 +471,11 @@ } ], "source": [ - "status = katib_client.is_experiment_succeeded(exp_name)\n", + "katib_client.wait_for_experiment_condition(exp_name, namespace=namespace)\n", + "status = katib_client.is_experiment_succeeded(exp_name, namespace=namespace)\n", "print(f\"Katib Experiment is Succeeded: {status}\\n\")\n", "\n", - "best_hps = katib_client.get_optimal_hyperparameters(exp_name)\n", + "best_hps = katib_client.get_optimal_hyperparameters(exp_name, namespace=namespace)\n", "\n", "if best_hps != None:\n", " print(\"Current Optimal Trial\\n\")\n", @@ -422,7 +492,10 @@ "cell_type": "markdown", "id": "1ee6b88d-6dfa-4924-8c67-86e019c78012", "metadata": { - "tags": [] + "tags": [], + "pycharm": { + "name": "#%% md\n" + } }, "source": [ "## Start Distributive Training with TFJob\n", @@ -435,6 +508,7 @@ "execution_count": 15, "id": "6289e27f-325d-4433-9379-7e97bc8aae69", "metadata": { + "editable": true, "execution": { "iopub.execute_input": "2022-09-12T19:08:51.055746Z", "iopub.status.busy": "2022-09-12T19:08:51.054605Z", @@ -442,7 +516,13 @@ "shell.execute_reply": "2022-09-12T19:08:51.244919Z", "shell.execute_reply.started": "2022-09-12T19:08:51.055713Z" }, - "tags": [] + "slideshow": { + "slide_type": "" + }, + "tags": [], + "pycharm": { + "name": "#%%\n" + } }, "outputs": [ { @@ -454,7 +534,7 @@ } ], "source": [ - "from kubeflow.training import TFJobClient\n", + "from kubeflow.training import TrainingClient\n", "\n", "# Set Parameters for Distributed Training with TFJob.\n", "parameters = {\n", @@ -466,20 +546,28 @@ "\n", "# Start TFJob Training.\n", "tfjob_name = \"train-mnist\"\n", - "tfjob_client = TFJobClient()\n", + "tfjob_client = TrainingClient(namespace=namespace)\n", "\n", - "tfjob_client.create_tfjob_from_func(\n", + "#create_tfjob_from_func\n", + "tfjob_client.create_job(\n", " name=tfjob_name,\n", - " func=train_mnist_model,\n", + " namespace=namespace,\n", + " job_kind=\"TFJob\",\n", + " train_func=train_mnist_model,\n", " parameters=parameters, # Input parameters for the train function.\n", - " num_worker_replicas=5, # How many TFJob Workers will be run.\n", + " num_workers=5, # How many TFJob Workers will be run.\n", + " base_image=\"tensorflow/tensorflow:2.10.0\", # Use TensorFlow image\n", ")" ] }, { "cell_type": "markdown", "id": "d5d465e8-0310-4c72-ad36-209259ad5c34", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "### Get TFJob Status and Training Logs\n", "\n", @@ -491,6 +579,7 @@ "execution_count": 16, "id": "53859cf4-7a35-4fc4-b5ee-9ba774635df0", "metadata": { + "editable": true, "execution": { "iopub.execute_input": "2022-09-12T19:10:06.862146Z", "iopub.status.busy": "2022-09-12T19:10:06.861177Z", @@ -498,7 +587,13 @@ "shell.execute_reply": "2022-09-12T19:10:06.943629Z", "shell.execute_reply.started": "2022-09-12T19:10:06.862104Z" }, - "tags": [] + "slideshow": { + "slide_type": "" + }, + "tags": [], + "pycharm": { + "name": "#%%\n" + } }, "outputs": [ { @@ -510,7 +605,7 @@ } ], "source": [ - "print(f\"TFJob status: {tfjob_client.get_job_status(tfjob_name)}\")" + "print(f\"TFJob status: {tfjob_client.get_job_conditions(tfjob_name, namespace=namespace, job_kind='TFJob')}\")" ] }, { @@ -518,6 +613,7 @@ "execution_count": 17, "id": "f247670e-0bd4-4336-a40c-605ce32fad23", "metadata": { + "editable": true, "execution": { "iopub.execute_input": "2022-09-12T19:10:11.765592Z", "iopub.status.busy": "2022-09-12T19:10:11.764384Z", @@ -525,7 +621,13 @@ "shell.execute_reply": "2022-09-12T19:10:14.248518Z", "shell.execute_reply.started": "2022-09-12T19:10:11.765560Z" }, - "tags": [] + "slideshow": { + "slide_type": "" + }, + "tags": [], + "pycharm": { + "name": "#%%\n" + } }, "outputs": [ { @@ -659,7 +761,7 @@ } ], "source": [ - "tfjob_client.get_logs(name=tfjob_name, master=True, follow=True)" + "tfjob_client.get_job_logs(name=tfjob_name, namespace=namespace, is_master=True, follow=True, job_kind=\"TFJob\")" ] }, { @@ -672,6 +774,9 @@ "iopub.status.idle": "2022-08-09T23:50:29.599222Z", "shell.execute_reply": "2022-08-09T23:50:29.598674Z", "shell.execute_reply.started": "2022-08-09T23:50:29.596363Z" + }, + "pycharm": { + "name": "#%% md\n" } }, "source": [ @@ -692,7 +797,10 @@ "shell.execute_reply": "2022-09-12T19:10:19.143396Z", "shell.execute_reply.started": "2022-09-12T19:10:19.053607Z" }, - "tags": [] + "tags": [], + "pycharm": { + "name": "#%%\n" + } }, "outputs": [ { @@ -712,6 +820,7 @@ "execution_count": 19, "id": "025fa4af-256d-4027-99ba-ba44c1409541", "metadata": { + "editable": true, "execution": { "iopub.execute_input": "2022-09-12T19:10:19.532471Z", "iopub.status.busy": "2022-09-12T19:10:19.531949Z", @@ -719,7 +828,13 @@ "shell.execute_reply": "2022-09-12T19:10:19.549103Z", "shell.execute_reply.started": "2022-09-12T19:10:19.532441Z" }, - "tags": [] + "slideshow": { + "slide_type": "" + }, + "tags": [], + "pycharm": { + "name": "#%%\n" + } }, "outputs": [ { @@ -731,23 +846,27 @@ } ], "source": [ - "tfjob_client.delete(tfjob_name)" + "tfjob_client.delete_job(tfjob_name, namespace=namespace, job_kind=\"TFJob\")" ] }, { "cell_type": "code", "execution_count": null, "id": "e238a638-cf77-423f-a346-f763fc8b1582", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { - "display_name": "Kubeflow Tensorflow", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "kurek2tw49o8" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -759,9 +878,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.13" + "version": "3.9.7" } }, "nbformat": 4, "nbformat_minor": 5 -} +} \ No newline at end of file diff --git a/test/e2e/v1beta1/scripts/gh-actions/build-load.sh b/test/e2e/v1beta1/scripts/gh-actions/build-load.sh index cb0ea03cd5a..fd77fad35b0 100755 --- a/test/e2e/v1beta1/scripts/gh-actions/build-load.sh +++ b/test/e2e/v1beta1/scripts/gh-actions/build-load.sh @@ -29,6 +29,7 @@ DEPLOY_KATIB_UI=${1:-false} TUNE_API=${2:-false} TRIAL_IMAGES=${3:-""} EXPERIMENTS=${4:-""} +ALGORITHMS=${5:-""} REGISTRY="docker.io/kubeflowkatib" TAG="e2e-test" @@ -38,6 +39,7 @@ SPECIFIED_DEVICE_TYPE_IMAGES=("enas-cnn-cifar10-cpu" "darts-cnn-cifar10-cpu" "py IFS="," read -r -a TRIAL_IMAGE_ARRAY <<< "$TRIAL_IMAGES" IFS="," read -r -a EXPERIMENT_ARRAY <<< "$EXPERIMENTS" +IFS=',' read -r -a ALGORITHM_ARRAY <<< "$ALGORITHMS" _build_containers() { CONTAINER_NAME=${1:-"katib-controller"} @@ -74,18 +76,29 @@ run() { suggestions=() +get_suggestion_image() { + local algorithm_name="$1" + + suggestion_image_name="$(algorithm_name=$algorithm_name yq eval '.runtime.suggestions.[] | select(.algorithmName == env(algorithm_name)) | .image' \ + manifests/v1beta1/installs/katib-standalone/katib-config.yaml | cut -d: -f1)" + + suggestion_name="$(basename "$suggestion_image_name")" + suggestions+=("$suggestion_name") +} + # Search for Suggestion Images required for Trial. for exp_name in "${EXPERIMENT_ARRAY[@]}"; do exp_path=$(find examples/v1beta1 -name "${exp_name}.yaml") algorithm_name="$(yq eval '.spec.algorithm.algorithmName' "$exp_path")" - suggestion_image_name="$(algorithm_name=$algorithm_name yq eval '.runtime.suggestions.[] | select(.algorithmName == env(algorithm_name)) | .image' \ - manifests/v1beta1/installs/katib-standalone/katib-config.yaml | cut -d: -f1)" - suggestion_name="$(basename "$suggestion_image_name")" + get_suggestion_image "$algorithm_name" - suggestions+=("$suggestion_name") + done + # Loop through each algorithm in the array + for algorithm_name in "${ALGORITHM_ARRAY[@]}"; do + get_suggestion_image "$algorithm_name" done for s in "${suggestions[@]}"; do diff --git a/test/e2e/v1beta1/scripts/gh-actions/run-notebook.sh b/test/e2e/v1beta1/scripts/gh-actions/run-notebook.sh new file mode 100644 index 00000000000..c0db09f5f9b --- /dev/null +++ b/test/e2e/v1beta1/scripts/gh-actions/run-notebook.sh @@ -0,0 +1,71 @@ +#!/bin/bash + +# Copyright 2024 The Kubeflow Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This bash script is used to run the example notebooks + +set -o errexit +set -o nounset +set -o pipefail + +NOTEBOOK_INPUT="" +NOTEBOOK_OUTPUT="-" # outputs to console +NAMESPACE="default" +TRAINING_PYTHON_SDK="./sdk/python" + +usage() { + echo "Usage: $0 -i -o [-p \" \"...] [-y ]" + echo "Options:" + echo " -i Input notebook (required)" + echo " -o Output notebook (required)" + echo " -k Kubeflow Training Operator Python SDK (optional)" + echo " -n Kubernetes namespace used by tests (optional)" + echo " -h Show this help message" + echo "NOTE: papermill, jupyter and ipykernel are required Python dependencies to run Notebooks" + exit 1 +} + +while getopts "i:o:p:k:n:r:d:h:" opt; do + case "$opt" in + i) NOTEBOOK_INPUT="$OPTARG" ;; # -i for notebook input path + o) NOTEBOOK_OUTPUT="$OPTARG" ;; # -o for notebook output path + k) TRAINING_PYTHON_SDK="$OPTARG" ;; # -k for training operator python sdk + n) NAMESPACE="$OPTARG" ;; # -n for kubernetes namespace used by tests + h) usage ;; # -h for help (usage) + *) usage; exit 1 ;; + esac +done + +if [ -z "$NOTEBOOK_INPUT" ]; then + echo "Error: -i notebook input path is required." + exit 1 +fi + +papermill_cmd="papermill $NOTEBOOK_INPUT $NOTEBOOK_OUTPUT -p training_python_sdk $TRAINING_PYTHON_SDK -p namespace $NAMESPACE" + +if ! command -v papermill &> /dev/null; then + echo "Error: papermill is not installed. Please install papermill to proceed." + exit 1 +fi + +echo "Running command: $papermill_cmd" +$papermill_cmd + +if [ $? -ne 0 ]; then + echo "Error: papermill execution failed." >&2 + exit 1 +fi + +echo "Notebook execution completed successfully" \ No newline at end of file diff --git a/test/e2e/v1beta1/scripts/gh-actions/setup-minikube.sh b/test/e2e/v1beta1/scripts/gh-actions/setup-minikube.sh index b890a40d41b..abe0f91a772 100755 --- a/test/e2e/v1beta1/scripts/gh-actions/setup-minikube.sh +++ b/test/e2e/v1beta1/scripts/gh-actions/setup-minikube.sh @@ -25,6 +25,7 @@ DEPLOY_KATIB_UI=${1:-false} TUNE_API=${2:-false} TRIAL_IMAGES=${3:-""} EXPERIMENTS=${4:-""} +ALGORITHMS=${5:-""} echo "Start to setup Minikube Kubernetes Cluster" kubectl version @@ -32,4 +33,4 @@ kubectl cluster-info kubectl get nodes echo "Build and Load container images" -./build-load.sh "$DEPLOY_KATIB_UI" "$TUNE_API" "$TRIAL_IMAGES" "$EXPERIMENTS" +./build-load.sh "$DEPLOY_KATIB_UI" "$TUNE_API" "$TRIAL_IMAGES" "$EXPERIMENTS" "$ALGORITHMS" From 683608f6a61e7b10218b7084f310af42334d8e65 Mon Sep 17 00:00:00 2001 From: Yehudit Kerido Date: Thu, 23 Jan 2025 10:16:53 +0200 Subject: [PATCH 2/2] sdk tests with papermill Signed-off-by: Yehudit Kerido --- test/e2e/v1beta1/scripts/gh-actions/run-notebook.sh | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 test/e2e/v1beta1/scripts/gh-actions/run-notebook.sh diff --git a/test/e2e/v1beta1/scripts/gh-actions/run-notebook.sh b/test/e2e/v1beta1/scripts/gh-actions/run-notebook.sh old mode 100644 new mode 100755