From db3ba9c4c7551b6a3af7c909889323c54818cab6 Mon Sep 17 00:00:00 2001 From: Andrey Velichkevich Date: Thu, 16 Jan 2025 02:21:02 +0000 Subject: [PATCH 01/22] Remove Training Operator V1 files Signed-off-by: Andrey Velichkevich --- .gitignore | 44 +- .pre-commit-config.yaml | 12 - CONTRIBUTING.md | 23 + Makefile | 17 +- PROJECT | 38 - ROADMAP.md | 45 +- build/images/kubectl-delivery/Dockerfile | 16 - build/images/training-operator/Dockerfile | 23 - cmd/training-operator.v1/main.go | 263 - cmd/training-operator.v2alpha1/main.go | 4 +- docs/api/autogen/config.yaml | 2 - docs/api/autogen/templates/gv_details.tpl | 20 - docs/api/autogen/templates/gv_list.tpl | 19 - docs/api/autogen/templates/type.tpl | 35 - docs/api/autogen/templates/type_members.tpl | 8 - docs/api/kubeflow.org_v1_generated.asciidoc | 887 -- examples/jax/cpu-demo/Dockerfile | 26 - examples/jax/cpu-demo/demo.yaml | 19 - examples/jax/cpu-demo/train.py | 43 - examples/mpi/tensorflow-mnist-elastic.yaml | 43 - examples/mpi/tensorflow-mnist.yaml | 53 - examples/paddlepaddle/simple-cpu.yaml | 25 - examples/paddlepaddle/simple-gpu.yaml | 35 - examples/pytorch/README.md | 39 - examples/pytorch/cpu-demo/Dockerfile | 7 - examples/pytorch/cpu-demo/README.MD | 7 - examples/pytorch/cpu-demo/demo.py | 11 - examples/pytorch/cpu-demo/demo.yaml | 29 - examples/pytorch/deepspeed-demo/Dockerfile | 11 - examples/pytorch/deepspeed-demo/README.md | 37 - .../pytorch_deepspeed_demo.yaml | 38 - .../pytorch/deepspeed-demo/requirements.txt | 6 - .../pytorch/deepspeed-demo/train_bert_ds.py | 829 -- examples/pytorch/elastic/echo/Dockerfile | 8 - examples/pytorch/elastic/echo/echo.py | 42 - examples/pytorch/elastic/echo/echo.yaml | 28 - examples/pytorch/elastic/etcd.yaml | 74 - .../pytorch/elastic/imagenet/.dockerignore | 1 - examples/pytorch/elastic/imagenet/Dockerfile | 20 - examples/pytorch/elastic/imagenet/imagenet.py | 578 -- .../pytorch/elastic/imagenet/imagenet.yaml | 43 - .../pytorch/fsdp/fine-tune-t5-with-fsdp.ipynb | 509 - .../Train-CNN-with-FashionMNIST.ipynb | 658 -- .../create-pytorchjob.ipynb | 557 -- .../train_api_hf_dataset.ipynb | 145 - .../train_api_s3_dataset.ipynb | 162 - examples/pytorch/mnist/Dockerfile | 15 - examples/pytorch/mnist/Dockerfile-mpi | 15 - examples/pytorch/mnist/Makefile | 55 - examples/pytorch/mnist/README.md | 26 - examples/pytorch/mnist/mnist.py | 224 - .../mnist/v1/pytorch_job_mnist_gloo.yaml | 32 - .../mnist/v1/pytorch_job_mnist_mpi.yaml | 32 - .../mnist/v1/pytorch_job_mnist_nccl.yaml | 30 - examples/pytorch/simple.yaml | 33 - examples/pytorch/smoke-dist/Dockerfile | 8 - examples/pytorch/smoke-dist/README.md | 17 - examples/pytorch/smoke-dist/dist_sendrecv.py | 56 - .../smoke-dist/pytorch_job_sendrecv.yaml | 22 - .../Fine-Tune-BERT-LLM.ipynb | 882 -- examples/tensorflow/dist-mnist/Dockerfile | 24 - examples/tensorflow/dist-mnist/README.md | 18 - examples/tensorflow/dist-mnist/dist_mnist.py | 194 - .../tensorflow/dist-mnist/tf_job_mnist.yaml | 32 - .../distribution_strategy/Dockerfile | 11 - .../distribution_strategy/README.md | 29 - .../multi_worker_strategy-with-keras.py | 168 - .../multi_worker_tfjob.yaml | 26 - .../tensorflow/distribution_strategy/pvc.yaml | 13 - .../image-classification/create-tfjob.ipynb | 405 - examples/tensorflow/mnist_utils.py | 140 - .../mnist_with_summaries/Dockerfile | 19 - .../tensorflow/mnist_with_summaries/README.md | 16 - .../mnist_with_summaries.py | 105 - .../mnist_with_summaries/tf_job_mnist.yaml | 30 - .../tfevent-volume/tfevent-pv.yaml | 15 - .../tfevent-volume/tfevent-pvc.yaml | 14 - examples/tensorflow/simple.yaml | 18 - examples/tensorflow/tf_sample/Dockerfile | 5 - examples/tensorflow/tf_sample/Makefile | 38 - examples/tensorflow/tf_sample/setup.py | 26 - examples/tensorflow/tf_sample/tf_smoke.py | 88 - examples/xgboost/lightgbm-dist/Dockerfile | 47 - examples/xgboost/lightgbm-dist/README.md | 203 - examples/xgboost/lightgbm-dist/main.py | 78 - .../xgboost/lightgbm-dist/requirements.txt | 4 - examples/xgboost/lightgbm-dist/train.py | 26 - examples/xgboost/lightgbm-dist/utils.py | 91 - .../xgboostjob_v1_lightgbm_dist_training.yaml | 74 - examples/xgboost/smoke-dist/Dockerfile | 14 - examples/xgboost/smoke-dist/README.md | 87 - examples/xgboost/smoke-dist/requirements.txt | 6 - examples/xgboost/smoke-dist/tracker.py | 527 -- .../xgboost/smoke-dist/xgboost_smoke_test.py | 117 - .../smoke-dist/xgboostjob_v1_rabit_test.yaml | 30 - .../xgboostjob_v1alpha1_rabit_test.yaml | 34 - examples/xgboost/xgboost-dist/Dockerfile | 12 - examples/xgboost/xgboost-dist/README.md | 489 - examples/xgboost/xgboost-dist/local_test.py | 102 - examples/xgboost/xgboost-dist/main.py | 81 - examples/xgboost/xgboost-dist/predict.py | 39 - .../xgboost/xgboost-dist/requirements.txt | 10 - examples/xgboost/xgboost-dist/tracker.py | 527 -- examples/xgboost/xgboost-dist/train.py | 97 - examples/xgboost/xgboost-dist/utils.py | 294 - .../xgboostjob_v1_iris_predict.yaml | 40 - .../xgboostjob_v1_iris_predict_local.yaml | 52 - .../xgboostjob_v1_iris_train.yaml | 42 - .../xgboostjob_v1_iris_train_local.yaml | 58 - examples/xgboost/xgboostjob.yaml | 42 - .../e2e-run-notebook.sh | 19 +- .../e2e-setup-cluster.sh | 24 +- hack/generate-apidoc.sh | 39 - hack/python-sdk-v2/gen-sdk.sh | 2 +- hack/python-sdk/gen-sdk.sh | 53 - hack/python-sdk/post_gen.py | 83 - hack/python-sdk/swagger.json | 787 -- hack/python-sdk/swagger_config.json | 8 - hack/scripts/cleanup_clusters.sh | 21 - hack/scripts/update-changelog.sh | 45 - hack/swagger-v2/main.go | 2 + hack/swagger/go.mod | 44 - hack/swagger/go.sum | 136 - hack/swagger/main.go | 84 - hack/update-codegen.sh | 31 +- hack/verify-codegen.sh | 28 +- hack/violation_exception_v1.list | 6 - manifests/base/crds/kubeflow.org_jaxjobs.yaml | 7901 --------------- manifests/base/crds/kubeflow.org_mpijobs.yaml | 7907 ---------------- .../base/crds/kubeflow.org_paddlejobs.yaml | 8394 ---------------- .../base/crds/kubeflow.org_pytorchjobs.yaml | 8431 ----------------- manifests/base/crds/kubeflow.org_tfjobs.yaml | 7907 ---------------- .../base/crds/kubeflow.org_xgboostjobs.yaml | 7889 --------------- manifests/base/crds/kustomization.yaml | 9 - manifests/base/deployment.yaml | 64 - manifests/base/kustomization.yaml | 10 - manifests/base/rbac/cluster-role-binding.yaml | 14 - manifests/base/rbac/role.yaml | 292 - manifests/base/rbac/service-account.yaml | 6 - manifests/base/service.yaml | 22 - manifests/base/webhook/kustomization.yaml | 17 - manifests/base/webhook/kustomizeconfig.yaml | 10 - manifests/base/webhook/manifests.yaml | 106 - manifests/base/webhook/patch.yaml | 18 - .../kubeflow/kubeflow-training-roles.yaml | 96 - .../overlays/kubeflow/kustomization.yaml | 15 - .../overlays/standalone/kustomization.yaml | 13 - manifests/overlays/standalone/namespace.yaml | 4 - .../v2/third-party/jobset/manifests.yaml | 130 + pkg/apis/kubeflow.org/v1/common_types.go | 251 - pkg/apis/kubeflow.org/v1/defaulting_utils.go | 63 - pkg/apis/kubeflow.org/v1/doc.go | 22 - pkg/apis/kubeflow.org/v1/groupversion_info.go | 46 - pkg/apis/kubeflow.org/v1/jax_defaults.go | 62 - pkg/apis/kubeflow.org/v1/jax_types.go | 101 - pkg/apis/kubeflow.org/v1/mpi_defaults.go | 39 - pkg/apis/kubeflow.org/v1/mpi_defaults_test.go | 171 - pkg/apis/kubeflow.org/v1/mpi_types.go | 96 - pkg/apis/kubeflow.org/v1/mpi_validation.go | 67 - .../kubeflow.org/v1/mpi_validation_test.go | 95 - .../kubeflow.org/v1/paddlepaddle_defaults.go | 81 - .../kubeflow.org/v1/paddlepaddle_types.go | 134 - pkg/apis/kubeflow.org/v1/pytorch_defaults.go | 96 - .../kubeflow.org/v1/pytorch_defaults_test.go | 192 - pkg/apis/kubeflow.org/v1/pytorch_types.go | 172 - .../kubeflow.org/v1/tensorflow_defaults.go | 72 - .../v1/tensorflow_defaults_test.go | 272 - pkg/apis/kubeflow.org/v1/tensorflow_types.go | 150 - .../kubeflow.org/v1/tensorflow_types_test.go | 45 - .../kubeflow.org/v1/validation_test_utils.go | 25 - pkg/apis/kubeflow.org/v1/xgboost_defaults.go | 63 - .../kubeflow.org/v1/xgboost_defaults_test.go | 233 - pkg/apis/kubeflow.org/v1/xgboost_types.go | 87 - .../kubeflow.org/v1/zz_generated.deepcopy.go | 942 -- .../kubeflow.org/v1/zz_generated.defaults.go | 109 - .../kubeflow.org/v1/zz_generated.openapi.go | 1412 --- .../kubeflow.org/v1/elasticpolicy.go | 139 - .../kubeflow.org/v1/jaxjob.go | 223 - .../kubeflow.org/v1/jaxjobspec.go | 56 - .../kubeflow.org/v1/jobcondition.go | 88 - .../kubeflow.org/v1/jobstatus.go | 89 - .../kubeflow.org/v1/mpijob.go | 223 - .../kubeflow.org/v1/mpijobspec.go | 83 - .../kubeflow.org/v1/paddleelasticpolicy.go | 70 - .../kubeflow.org/v1/paddlejob.go | 223 - .../kubeflow.org/v1/paddlejobspec.go | 65 - .../kubeflow.org/v1/pytorchjob.go | 223 - .../kubeflow.org/v1/pytorchjobspec.go | 74 - .../kubeflow.org/v1/rdzvconf.go | 46 - .../kubeflow.org/v1/replicaspec.go | 60 - .../kubeflow.org/v1/replicastatus.go | 77 - .../kubeflow.org/v1/runpolicy.go | 95 - .../kubeflow.org/v1/schedulingpolicy.go | 78 - .../kubeflow.org/v1/tfjob.go | 223 - .../kubeflow.org/v1/tfjobspec.go | 74 - .../kubeflow.org/v1/xgboostjob.go | 223 - .../kubeflow.org/v1/xgboostjobspec.go | 56 - pkg/client/applyconfiguration/utils.go | 48 +- pkg/client/clientset/versioned/clientset.go | 13 - .../versioned/fake/clientset_generated.go | 7 - .../clientset/versioned/fake/register.go | 2 - .../clientset/versioned/scheme/register.go | 2 - .../versioned/typed/kubeflow.org/v1/doc.go | 18 - .../typed/kubeflow.org/v1/fake/doc.go | 18 - .../typed/kubeflow.org/v1/fake/fake_jaxjob.go | 195 - .../v1/fake/fake_kubeflow.org_client.go | 58 - .../typed/kubeflow.org/v1/fake/fake_mpijob.go | 195 - .../kubeflow.org/v1/fake/fake_paddlejob.go | 195 - .../kubeflow.org/v1/fake/fake_pytorchjob.go | 195 - .../typed/kubeflow.org/v1/fake/fake_tfjob.go | 195 - .../kubeflow.org/v1/fake/fake_xgboostjob.go | 195 - .../kubeflow.org/v1/generated_expansion.go | 29 - .../versioned/typed/kubeflow.org/v1/jaxjob.go | 71 - .../kubeflow.org/v1/kubeflow.org_client.go | 130 - .../versioned/typed/kubeflow.org/v1/mpijob.go | 71 - .../typed/kubeflow.org/v1/paddlejob.go | 71 - .../typed/kubeflow.org/v1/pytorchjob.go | 71 - .../versioned/typed/kubeflow.org/v1/tfjob.go | 71 - .../typed/kubeflow.org/v1/xgboostjob.go | 71 - .../informers/externalversions/generic.go | 17 +- .../kubeflow.org/interface.go | 8 - .../kubeflow.org/v1/interface.go | 78 - .../kubeflow.org/v1/jaxjob.go | 88 - .../kubeflow.org/v1/mpijob.go | 88 - .../kubeflow.org/v1/paddlejob.go | 88 - .../kubeflow.org/v1/pytorchjob.go | 88 - .../externalversions/kubeflow.org/v1/tfjob.go | 88 - .../kubeflow.org/v1/xgboostjob.go | 88 - .../kubeflow.org/v1/expansion_generated.go | 65 - pkg/client/listers/kubeflow.org/v1/jaxjob.go | 68 - pkg/client/listers/kubeflow.org/v1/mpijob.go | 68 - .../listers/kubeflow.org/v1/paddlejob.go | 68 - .../listers/kubeflow.org/v1/pytorchjob.go | 68 - pkg/client/listers/kubeflow.org/v1/tfjob.go | 68 - .../listers/kubeflow.org/v1/xgboostjob.go | 68 - pkg/common/interface.go | 94 - pkg/common/metrics.go | 89 - pkg/common/update.go | 27 - pkg/common/util/fake_workqueue.go | 56 - pkg/common/util/reconciler.go | 198 - pkg/common/util/scheduler.go | 35 - pkg/common/util/util.go | 86 - pkg/common/util/util_test.go | 124 - pkg/common/util/webhooks.go | 32 - pkg/config/config.go | 36 - pkg/controller.v1/common/job.go | 464 - pkg/controller.v1/common/job_controller.go | 263 - pkg/controller.v1/common/job_test.go | 286 - pkg/controller.v1/common/pod.go | 483 - pkg/controller.v1/common/pod_test.go | 228 - pkg/controller.v1/common/scheduling.go | 93 - pkg/controller.v1/common/service.go | 273 - pkg/controller.v1/common/service_test.go | 114 - pkg/controller.v1/common/status.go | 17 - pkg/controller.v1/common/status_test.go | 49 - pkg/controller.v1/common/util.go | 145 - pkg/controller.v1/common/util_test.go | 82 - .../control/controller_ref_manager.go | 380 - .../control/controller_ref_manager_test.go | 373 - pkg/controller.v1/control/pod_control.go | 281 - pkg/controller.v1/control/pod_control_test.go | 80 - pkg/controller.v1/control/podgroup_control.go | 199 - pkg/controller.v1/control/service_control.go | 209 - .../control/service_control_test.go | 135 - pkg/controller.v1/control/utils.go | 51 - pkg/controller.v1/expectation/expectation.go | 222 - .../expectation/expectation_test.go | 152 - pkg/controller.v1/expectation/util.go | 15 - pkg/controller.v1/jax/envvar.go | 107 - pkg/controller.v1/jax/envvar_test.go | 138 - pkg/controller.v1/jax/jaxjob_controller.go | 478 - .../jax/jaxjob_controller_suite_test.go | 125 - .../jax/jaxjob_controller_test.go | 316 - pkg/controller.v1/mpi/mpijob.go | 268 - pkg/controller.v1/mpi/mpijob_controller.go | 1400 --- .../mpi/mpijob_controller_test.go | 1150 --- pkg/controller.v1/mpi/suite_test.go | 108 - pkg/controller.v1/paddlepaddle/envvar.go | 172 - .../paddlepaddle/paddlepaddle_controller.go | 521 - .../paddlepaddle_controller_suite_test.go | 127 - .../paddlepaddle_controller_test.go | 514 - pkg/controller.v1/pytorch/elastic.go | 197 - pkg/controller.v1/pytorch/elastic_test.go | 127 - pkg/controller.v1/pytorch/envvar.go | 168 - pkg/controller.v1/pytorch/hpa.go | 102 - pkg/controller.v1/pytorch/initcontainer.go | 136 - .../pytorch/initcontainer_test.go | 110 - pkg/controller.v1/pytorch/master.go | 62 - .../pytorch/pytorchjob_controller.go | 527 -- .../pytorchjob_controller_suite_test.go | 133 - .../pytorch/pytorchjob_controller_test.go | 741 -- pkg/controller.v1/register_controller.go | 83 - pkg/controller.v1/register_controller_test.go | 76 - pkg/controller.v1/tensorflow/job_test.go | 826 -- pkg/controller.v1/tensorflow/pod_test.go | 592 -- pkg/controller.v1/tensorflow/status_test.go | 598 -- pkg/controller.v1/tensorflow/suite_test.go | 143 - pkg/controller.v1/tensorflow/tensorflow.go | 188 - .../tensorflow/tensorflow_test.go | 45 - pkg/controller.v1/tensorflow/testutil/pod.go | 111 - .../tensorflow/testutil/service.go | 86 - .../tensorflow/testutil/tfjob.go | 244 - .../tensorflow/tfjob_controller.go | 662 -- .../tensorflow/tfjob_controller_test.go | 671 -- pkg/controller.v1/tensorflow/util.go | 118 - pkg/controller.v1/tensorflow/util_test.go | 63 - pkg/controller.v1/xgboost/suite_test.go | 128 - pkg/controller.v1/xgboost/xgboost.go | 149 - .../xgboost/xgboostjob_controller.go | 467 - .../xgboost/xgboostjob_controller_test.go | 433 - pkg/core/job.go | 133 - pkg/core/pod.go | 88 - pkg/core/service.go | 101 - pkg/core/status.go | 50 - pkg/core/utils.go | 33 - pkg/util/counter.go | 71 - pkg/util/k8sutil/client.go | 101 - pkg/util/k8sutil/k8sutil.go | 145 - pkg/util/labels/labels.go | 59 - pkg/util/labels/labels_test.go | 107 - pkg/util/logger.go | 96 - pkg/util/signals/signal.go | 43 - pkg/util/signals/signal_posix.go | 27 - pkg/util/signals/signal_windows.go | 23 - pkg/util/status.go | 149 - pkg/util/status_test.go | 165 - pkg/util/testutil/constants.go | 19 - pkg/util/train/train_util.go | 30 - pkg/util/train/train_util_test.go | 96 - .../clustertrainingruntime_webhook.go | 2 +- pkg/{webhook.v2 => webhooks.v2}/setup.go | 2 +- .../trainingruntime_webhook.go | 2 +- .../trainingruntime_webhook_test.go | 2 +- .../trainjob_webhook.go | 2 +- pkg/webhooks/jax/jaxjob_webhook.go | 124 - pkg/webhooks/jax/jaxjob_webhook_test.go | 198 - .../paddlepaddle/paddlepaddle_webhook.go | 126 - .../paddlepaddle/paddlepaddle_webhook_test.go | 203 - pkg/webhooks/pytorch/pytorchjob_webhook.go | 161 - .../pytorch/pytorchjob_webhook_test.go | 436 - pkg/webhooks/tensorflow/tfjob_webhook.go | 127 - pkg/webhooks/tensorflow/tfjob_webhook_test.go | 214 - pkg/webhooks/webhooks.go | 45 - pkg/webhooks/xgboost/xgboostjob_webhook.go | 140 - .../xgboost/xgboostjob_webhook_test.go | 265 - prow_config.yaml | 48 - scripts/copy-to-gopath.sh | 31 - scripts/gha/build-image.sh | 24 - scripts/gha/build-jax-mnist-image.sh | 25 - scripts/gha/build-storage-initializer.sh | 24 - scripts/gha/build-trainer.sh | 24 - scripts/setup-tf-operator.sh | 46 - scripts/setup-training-operator.sh | 48 - sdk/python/.gitignore | 72 - sdk/python/.openapi-generator-ignore | 33 - sdk/python/.openapi-generator/VERSION | 1 - sdk/python/Dockerfile.conformance | 30 - sdk/python/OWNERS | 2 - sdk/python/README.md | 95 - sdk/python/conformance/run.sh | 11 - sdk/python/docs/KubeflowOrgV1ElasticPolicy.md | 20 - sdk/python/docs/KubeflowOrgV1JAXJob.md | 15 - sdk/python/docs/KubeflowOrgV1JAXJobList.md | 14 - sdk/python/docs/KubeflowOrgV1JAXJobSpec.md | 12 - sdk/python/docs/KubeflowOrgV1JobCondition.md | 16 - sdk/python/docs/KubeflowOrgV1JobStatus.md | 15 - sdk/python/docs/KubeflowOrgV1MPIJob.md | 14 - sdk/python/docs/KubeflowOrgV1MPIJobList.md | 13 - sdk/python/docs/KubeflowOrgV1MPIJobSpec.md | 14 - .../docs/KubeflowOrgV1PaddleElasticPolicy.md | 13 - sdk/python/docs/KubeflowOrgV1PaddleJob.md | 15 - sdk/python/docs/KubeflowOrgV1PaddleJobList.md | 14 - sdk/python/docs/KubeflowOrgV1PaddleJobSpec.md | 13 - sdk/python/docs/KubeflowOrgV1PyTorchJob.md | 15 - .../docs/KubeflowOrgV1PyTorchJobList.md | 14 - .../docs/KubeflowOrgV1PyTorchJobSpec.md | 14 - sdk/python/docs/KubeflowOrgV1RDZVConf.md | 11 - sdk/python/docs/KubeflowOrgV1ReplicaSpec.md | 13 - sdk/python/docs/KubeflowOrgV1ReplicaStatus.md | 15 - sdk/python/docs/KubeflowOrgV1RunPolicy.md | 17 - .../docs/KubeflowOrgV1SchedulingPolicy.md | 15 - sdk/python/docs/KubeflowOrgV1TFJob.md | 15 - sdk/python/docs/KubeflowOrgV1TFJobList.md | 14 - sdk/python/docs/KubeflowOrgV1TFJobSpec.md | 14 - sdk/python/docs/KubeflowOrgV1XGBoostJob.md | 15 - .../docs/KubeflowOrgV1XGBoostJobList.md | 14 - .../docs/KubeflowOrgV1XGBoostJobSpec.md | 12 - sdk/python/docs/RuntimeTypeMeta.md | 12 - sdk/python/docs/RuntimeUnknown.md | 14 - sdk/python/docs/V1APIGroup.md | 16 - sdk/python/docs/V1APIGroupList.md | 13 - sdk/python/docs/V1APIResource.md | 20 - sdk/python/docs/V1APIResourceList.md | 14 - sdk/python/docs/V1APIVersions.md | 14 - sdk/python/docs/V1ApplyOptions.md | 15 - sdk/python/docs/V1Condition.md | 16 - sdk/python/docs/V1CreateOptions.md | 15 - sdk/python/docs/V1DeleteOptions.md | 17 - sdk/python/docs/V1FieldSelectorRequirement.md | 13 - sdk/python/docs/V1GetOptions.md | 13 - sdk/python/docs/V1GroupKind.md | 12 - sdk/python/docs/V1GroupResource.md | 12 - sdk/python/docs/V1GroupVersion.md | 12 - sdk/python/docs/V1GroupVersionForDiscovery.md | 12 - sdk/python/docs/V1GroupVersionKind.md | 13 - sdk/python/docs/V1GroupVersionResource.md | 13 - sdk/python/docs/V1InternalEvent.md | 12 - sdk/python/docs/V1LabelSelector.md | 12 - sdk/python/docs/V1LabelSelectorRequirement.md | 13 - sdk/python/docs/V1List.md | 14 - sdk/python/docs/V1ListMeta.md | 14 - sdk/python/docs/V1ListOptions.md | 22 - sdk/python/docs/V1ManagedFieldsEntry.md | 17 - sdk/python/docs/V1ObjectMeta.md | 25 - sdk/python/docs/V1OwnerReference.md | 16 - sdk/python/docs/V1PartialObjectMetadata.md | 13 - .../docs/V1PartialObjectMetadataList.md | 14 - sdk/python/docs/V1PatchOptions.md | 16 - sdk/python/docs/V1Preconditions.md | 12 - sdk/python/docs/V1RootPaths.md | 11 - .../docs/V1ServerAddressByClientCIDR.md | 12 - sdk/python/docs/V1Status.md | 18 - sdk/python/docs/V1StatusCause.md | 13 - sdk/python/docs/V1StatusDetails.md | 16 - sdk/python/docs/V1Table.md | 15 - sdk/python/docs/V1TableColumnDefinition.md | 15 - sdk/python/docs/V1TableOptions.md | 13 - sdk/python/docs/V1TableRow.md | 13 - sdk/python/docs/V1TableRowCondition.md | 14 - sdk/python/docs/V1Timestamp.md | 12 - sdk/python/docs/V1TypeMeta.md | 12 - sdk/python/docs/V1UpdateOptions.md | 15 - sdk/python/docs/V1WatchEvent.md | 12 - sdk/python/docs/VersionInfo.md | 19 - sdk/python/kubeflow/__init__.py | 1 - .../kubeflow/storage_initializer/Dockerfile | 17 - .../kubeflow/storage_initializer/__init__.py | 0 .../abstract_dataset_provider.py | 11 - .../abstract_model_provider.py | 11 - .../kubeflow/storage_initializer/constants.py | 3 - .../storage_initializer/hugging_face.py | 107 - .../storage_initializer/requirements.txt | 4 - sdk/python/kubeflow/storage_initializer/s3.py | 73 - .../kubeflow/storage_initializer/storage.py | 51 - sdk/python/kubeflow/trainer/Dockerfile | 17 - sdk/python/kubeflow/trainer/Dockerfile.cpu | 17 - .../kubeflow/trainer/hf_llm_training.py | 205 - sdk/python/kubeflow/trainer/requirements.txt | 4 - sdk/python/kubeflow/training/__init__.py | 59 - sdk/python/kubeflow/training/api/__init__.py | 5 - .../kubeflow/training/api/training_client.py | 1418 --- .../training/api/training_client_test.py | 1777 ---- sdk/python/kubeflow/training/api_client.py | 666 -- sdk/python/kubeflow/training/configuration.py | 376 - .../kubeflow/training/constants/__init__.py | 13 - .../kubeflow/training/constants/constants.py | 209 - sdk/python/kubeflow/training/exceptions.py | 120 - .../kubeflow/training/models/__init__.py | 46 - .../models/kubeflow_org_v1_elastic_policy.py | 392 - .../models/kubeflow_org_v1_jax_job.py | 228 - .../models/kubeflow_org_v1_jax_job_list.py | 205 - .../models/kubeflow_org_v1_jax_job_spec.py | 150 - .../models/kubeflow_org_v1_job_condition.py | 260 - .../models/kubeflow_org_v1_job_status.py | 228 - .../models/kubeflow_org_v1_mpi_job.py | 228 - .../models/kubeflow_org_v1_mpi_job_list.py | 203 - .../models/kubeflow_org_v1_mpi_job_spec.py | 233 - .../kubeflow_org_v1_paddle_elastic_policy.py | 206 - .../models/kubeflow_org_v1_paddle_job.py | 228 - .../models/kubeflow_org_v1_paddle_job_list.py | 205 - .../models/kubeflow_org_v1_paddle_job_spec.py | 176 - .../models/kubeflow_org_v1_py_torch_job.py | 228 - .../kubeflow_org_v1_py_torch_job_list.py | 205 - .../kubeflow_org_v1_py_torch_job_spec.py | 204 - .../models/kubeflow_org_v1_rdzv_conf.py | 146 - .../models/kubeflow_org_v1_replica_spec.py | 176 - .../models/kubeflow_org_v1_replica_status.py | 232 - .../models/kubeflow_org_v1_run_policy.py | 288 - .../kubeflow_org_v1_scheduling_policy.py | 224 - .../training/models/kubeflow_org_v1_tf_job.py | 228 - .../models/kubeflow_org_v1_tf_job_list.py | 205 - .../models/kubeflow_org_v1_tf_job_spec.py | 206 - .../models/kubeflow_org_v1_xg_boost_job.py | 228 - .../kubeflow_org_v1_xg_boost_job_list.py | 203 - .../kubeflow_org_v1_xg_boost_job_spec.py | 148 - sdk/python/kubeflow/training/rest.py | 291 - .../kubeflow/training/utils/__init__.py | 13 - sdk/python/kubeflow/training/utils/utils.py | 430 - sdk/python/setup.py | 69 - sdk/python/test/__init__.py | 0 sdk/python/test/conftest.py | 10 - .../test_e2e_pytorch_fine_tune_llm.py | 96 - sdk/python/test/e2e/__init__.py | 0 sdk/python/test/e2e/constants.py | 26 - sdk/python/test/e2e/test_e2e_jaxjob.py | 160 - sdk/python/test/e2e/test_e2e_mpijob.py | 219 - sdk/python/test/e2e/test_e2e_paddlejob.py | 162 - sdk/python/test/e2e/test_e2e_pytorchjob.py | 348 - sdk/python/test/e2e/test_e2e_tfjob.py | 168 - sdk/python/test/e2e/test_e2e_xgboostjob.py | 194 - sdk/python/test/e2e/utils.py | 88 - test/integration/framework/framework.go | 4 +- .../webhook.v2/clustertrainingruntime_test.go | 2 +- test/integration/webhook.v2/suite_test.go | 2 +- .../webhook.v2/trainingruntime_test.go | 2 +- test/integration/webhook.v2/trainjob_test.go | 2 +- test_job/README.md | 29 - test_job/apis/test_job/v1/constants.go | 33 - test_job/apis/test_job/v1/defaults.go | 116 - test_job/apis/test_job/v1/doc.go | 21 - .../apis/test_job/v1/openapi_generated.go | 170 - test_job/apis/test_job/v1/register.go | 74 - test_job/apis/test_job/v1/types.go | 69 - .../apis/test_job/v1/zz_generated.deepcopy.go | 122 - .../apis/test_job/v1/zz_generated.defaults.go | 44 - .../client/clientset/versioned/clientset.go | 119 - test_job/client/clientset/versioned/doc.go | 18 - .../versioned/fake/clientset_generated.go | 83 - .../client/clientset/versioned/fake/doc.go | 18 - .../clientset/versioned/fake/register.go | 54 - .../client/clientset/versioned/scheme/doc.go | 18 - .../clientset/versioned/scheme/register.go | 54 - .../versioned/typed/test_job/v1/doc.go | 18 - .../versioned/typed/test_job/v1/fake/doc.go | 18 - .../test_job/v1/fake/fake_test_job_client.go | 38 - .../typed/test_job/v1/fake/fake_testjob.go | 140 - .../typed/test_job/v1/generated_expansion.go | 19 - .../typed/test_job/v1/test_job_client.go | 105 - .../versioned/typed/test_job/v1/testjob.go | 193 - .../informers/externalversions/factory.go | 178 - .../informers/externalversions/generic.go | 60 - .../internalinterfaces/factory_interfaces.go | 38 - .../externalversions/test_job/interface.go | 44 - .../externalversions/test_job/v1/interface.go | 43 - .../externalversions/test_job/v1/testjob.go | 88 - .../test_job/v1/expansion_generated.go | 25 - .../client/listers/test_job/v1/testjob.go | 97 - test_job/test_util/v1/const.go | 32 - test_job/test_util/v1/pod.go | 95 - test_job/test_util/v1/service.go | 62 - test_job/test_util/v1/test_job_util.go | 76 - test_job/test_util/v1/util.go | 79 - third_party/library/license.txt | 7039 -------------- .../additional_license_info.csv | 14 - third_party_licenses/dep.txt | 57 - third_party_licenses/dep_repo.manual.csv | 1 - third_party_licenses/license_info.csv | 57 - third_party_licenses/repo.txt | 57 - vendor.go | 13 - 549 files changed, 271 insertions(+), 119679 deletions(-) delete mode 100644 PROJECT delete mode 100644 build/images/kubectl-delivery/Dockerfile delete mode 100644 build/images/training-operator/Dockerfile delete mode 100644 cmd/training-operator.v1/main.go delete mode 100644 docs/api/autogen/config.yaml delete mode 100644 docs/api/autogen/templates/gv_details.tpl delete mode 100644 docs/api/autogen/templates/gv_list.tpl delete mode 100644 docs/api/autogen/templates/type.tpl delete mode 100644 docs/api/autogen/templates/type_members.tpl delete mode 100644 docs/api/kubeflow.org_v1_generated.asciidoc delete mode 100644 examples/jax/cpu-demo/Dockerfile delete mode 100644 examples/jax/cpu-demo/demo.yaml delete mode 100644 examples/jax/cpu-demo/train.py delete mode 100644 examples/mpi/tensorflow-mnist-elastic.yaml delete mode 100644 examples/mpi/tensorflow-mnist.yaml delete mode 100644 examples/paddlepaddle/simple-cpu.yaml delete mode 100644 examples/paddlepaddle/simple-gpu.yaml delete mode 100644 examples/pytorch/README.md delete mode 100644 examples/pytorch/cpu-demo/Dockerfile delete mode 100644 examples/pytorch/cpu-demo/README.MD delete mode 100644 examples/pytorch/cpu-demo/demo.py delete mode 100644 examples/pytorch/cpu-demo/demo.yaml delete mode 100644 examples/pytorch/deepspeed-demo/Dockerfile delete mode 100644 examples/pytorch/deepspeed-demo/README.md delete mode 100644 examples/pytorch/deepspeed-demo/pytorch_deepspeed_demo.yaml delete mode 100644 examples/pytorch/deepspeed-demo/requirements.txt delete mode 100644 examples/pytorch/deepspeed-demo/train_bert_ds.py delete mode 100644 examples/pytorch/elastic/echo/Dockerfile delete mode 100644 examples/pytorch/elastic/echo/echo.py delete mode 100644 examples/pytorch/elastic/echo/echo.yaml delete mode 100644 examples/pytorch/elastic/etcd.yaml delete mode 100644 examples/pytorch/elastic/imagenet/.dockerignore delete mode 100644 examples/pytorch/elastic/imagenet/Dockerfile delete mode 100644 examples/pytorch/elastic/imagenet/imagenet.py delete mode 100644 examples/pytorch/elastic/imagenet/imagenet.yaml delete mode 100644 examples/pytorch/fsdp/fine-tune-t5-with-fsdp.ipynb delete mode 100644 examples/pytorch/image-classification/Train-CNN-with-FashionMNIST.ipynb delete mode 100644 examples/pytorch/image-classification/create-pytorchjob.ipynb delete mode 100644 examples/pytorch/language-modeling/train_api_hf_dataset.ipynb delete mode 100644 examples/pytorch/language-modeling/train_api_s3_dataset.ipynb delete mode 100644 examples/pytorch/mnist/Dockerfile delete mode 100644 examples/pytorch/mnist/Dockerfile-mpi delete mode 100644 examples/pytorch/mnist/Makefile delete mode 100644 examples/pytorch/mnist/README.md delete mode 100644 examples/pytorch/mnist/mnist.py delete mode 100644 examples/pytorch/mnist/v1/pytorch_job_mnist_gloo.yaml delete mode 100644 examples/pytorch/mnist/v1/pytorch_job_mnist_mpi.yaml delete mode 100644 examples/pytorch/mnist/v1/pytorch_job_mnist_nccl.yaml delete mode 100644 examples/pytorch/simple.yaml delete mode 100644 examples/pytorch/smoke-dist/Dockerfile delete mode 100644 examples/pytorch/smoke-dist/README.md delete mode 100644 examples/pytorch/smoke-dist/dist_sendrecv.py delete mode 100644 examples/pytorch/smoke-dist/pytorch_job_sendrecv.yaml delete mode 100644 examples/pytorch/text-classification/Fine-Tune-BERT-LLM.ipynb delete mode 100644 examples/tensorflow/dist-mnist/Dockerfile delete mode 100644 examples/tensorflow/dist-mnist/README.md delete mode 100755 examples/tensorflow/dist-mnist/dist_mnist.py delete mode 100644 examples/tensorflow/dist-mnist/tf_job_mnist.yaml delete mode 100644 examples/tensorflow/distribution_strategy/Dockerfile delete mode 100644 examples/tensorflow/distribution_strategy/README.md delete mode 100644 examples/tensorflow/distribution_strategy/multi_worker_strategy-with-keras.py delete mode 100644 examples/tensorflow/distribution_strategy/multi_worker_tfjob.yaml delete mode 100644 examples/tensorflow/distribution_strategy/pvc.yaml delete mode 100644 examples/tensorflow/image-classification/create-tfjob.ipynb delete mode 100644 examples/tensorflow/mnist_utils.py delete mode 100644 examples/tensorflow/mnist_with_summaries/Dockerfile delete mode 100644 examples/tensorflow/mnist_with_summaries/README.md delete mode 100644 examples/tensorflow/mnist_with_summaries/mnist_with_summaries.py delete mode 100644 examples/tensorflow/mnist_with_summaries/tf_job_mnist.yaml delete mode 100644 examples/tensorflow/mnist_with_summaries/tfevent-volume/tfevent-pv.yaml delete mode 100644 examples/tensorflow/mnist_with_summaries/tfevent-volume/tfevent-pvc.yaml delete mode 100644 examples/tensorflow/simple.yaml delete mode 100644 examples/tensorflow/tf_sample/Dockerfile delete mode 100644 examples/tensorflow/tf_sample/Makefile delete mode 100644 examples/tensorflow/tf_sample/setup.py delete mode 100644 examples/tensorflow/tf_sample/tf_smoke.py delete mode 100644 examples/xgboost/lightgbm-dist/Dockerfile delete mode 100644 examples/xgboost/lightgbm-dist/README.md delete mode 100644 examples/xgboost/lightgbm-dist/main.py delete mode 100644 examples/xgboost/lightgbm-dist/requirements.txt delete mode 100644 examples/xgboost/lightgbm-dist/train.py delete mode 100644 examples/xgboost/lightgbm-dist/utils.py delete mode 100644 examples/xgboost/lightgbm-dist/xgboostjob_v1_lightgbm_dist_training.yaml delete mode 100644 examples/xgboost/smoke-dist/Dockerfile delete mode 100644 examples/xgboost/smoke-dist/README.md delete mode 100644 examples/xgboost/smoke-dist/requirements.txt delete mode 100644 examples/xgboost/smoke-dist/tracker.py delete mode 100644 examples/xgboost/smoke-dist/xgboost_smoke_test.py delete mode 100644 examples/xgboost/smoke-dist/xgboostjob_v1_rabit_test.yaml delete mode 100644 examples/xgboost/smoke-dist/xgboostjob_v1alpha1_rabit_test.yaml delete mode 100644 examples/xgboost/xgboost-dist/Dockerfile delete mode 100644 examples/xgboost/xgboost-dist/README.md delete mode 100644 examples/xgboost/xgboost-dist/local_test.py delete mode 100644 examples/xgboost/xgboost-dist/main.py delete mode 100644 examples/xgboost/xgboost-dist/predict.py delete mode 100644 examples/xgboost/xgboost-dist/requirements.txt delete mode 100644 examples/xgboost/xgboost-dist/tracker.py delete mode 100644 examples/xgboost/xgboost-dist/train.py delete mode 100644 examples/xgboost/xgboost-dist/utils.py delete mode 100644 examples/xgboost/xgboost-dist/xgboostjob_v1_iris_predict.yaml delete mode 100644 examples/xgboost/xgboost-dist/xgboostjob_v1_iris_predict_local.yaml delete mode 100644 examples/xgboost/xgboost-dist/xgboostjob_v1_iris_train.yaml delete mode 100644 examples/xgboost/xgboost-dist/xgboostjob_v1_iris_train_local.yaml delete mode 100644 examples/xgboost/xgboostjob.yaml rename scripts/run-notebook.sh => hack/e2e-run-notebook.sh (79%) rename scripts/gha/setup-training-operator.sh => hack/e2e-setup-cluster.sh (83%) delete mode 100755 hack/generate-apidoc.sh delete mode 100755 hack/python-sdk/gen-sdk.sh delete mode 100755 hack/python-sdk/post_gen.py delete mode 100644 hack/python-sdk/swagger.json delete mode 100644 hack/python-sdk/swagger_config.json delete mode 100755 hack/scripts/cleanup_clusters.sh delete mode 100755 hack/scripts/update-changelog.sh delete mode 100644 hack/swagger/go.mod delete mode 100644 hack/swagger/go.sum delete mode 100644 hack/swagger/main.go delete mode 100644 hack/violation_exception_v1.list delete mode 100644 manifests/base/crds/kubeflow.org_jaxjobs.yaml delete mode 100644 manifests/base/crds/kubeflow.org_mpijobs.yaml delete mode 100644 manifests/base/crds/kubeflow.org_paddlejobs.yaml delete mode 100644 manifests/base/crds/kubeflow.org_pytorchjobs.yaml delete mode 100644 manifests/base/crds/kubeflow.org_tfjobs.yaml delete mode 100644 manifests/base/crds/kubeflow.org_xgboostjobs.yaml delete mode 100644 manifests/base/crds/kustomization.yaml delete mode 100644 manifests/base/deployment.yaml delete mode 100644 manifests/base/kustomization.yaml delete mode 100644 manifests/base/rbac/cluster-role-binding.yaml delete mode 100644 manifests/base/rbac/role.yaml delete mode 100644 manifests/base/rbac/service-account.yaml delete mode 100644 manifests/base/service.yaml delete mode 100644 manifests/base/webhook/kustomization.yaml delete mode 100644 manifests/base/webhook/kustomizeconfig.yaml delete mode 100644 manifests/base/webhook/manifests.yaml delete mode 100644 manifests/base/webhook/patch.yaml delete mode 100644 manifests/overlays/kubeflow/kubeflow-training-roles.yaml delete mode 100644 manifests/overlays/kubeflow/kustomization.yaml delete mode 100644 manifests/overlays/standalone/kustomization.yaml delete mode 100644 manifests/overlays/standalone/namespace.yaml create mode 100644 manifests/v2/third-party/jobset/manifests.yaml delete mode 100644 pkg/apis/kubeflow.org/v1/common_types.go delete mode 100644 pkg/apis/kubeflow.org/v1/defaulting_utils.go delete mode 100644 pkg/apis/kubeflow.org/v1/doc.go delete mode 100644 pkg/apis/kubeflow.org/v1/groupversion_info.go delete mode 100644 pkg/apis/kubeflow.org/v1/jax_defaults.go delete mode 100644 pkg/apis/kubeflow.org/v1/jax_types.go delete mode 100644 pkg/apis/kubeflow.org/v1/mpi_defaults.go delete mode 100644 pkg/apis/kubeflow.org/v1/mpi_defaults_test.go delete mode 100644 pkg/apis/kubeflow.org/v1/mpi_types.go delete mode 100644 pkg/apis/kubeflow.org/v1/mpi_validation.go delete mode 100644 pkg/apis/kubeflow.org/v1/mpi_validation_test.go delete mode 100644 pkg/apis/kubeflow.org/v1/paddlepaddle_defaults.go delete mode 100644 pkg/apis/kubeflow.org/v1/paddlepaddle_types.go delete mode 100644 pkg/apis/kubeflow.org/v1/pytorch_defaults.go delete mode 100644 pkg/apis/kubeflow.org/v1/pytorch_defaults_test.go delete mode 100644 pkg/apis/kubeflow.org/v1/pytorch_types.go delete mode 100644 pkg/apis/kubeflow.org/v1/tensorflow_defaults.go delete mode 100644 pkg/apis/kubeflow.org/v1/tensorflow_defaults_test.go delete mode 100644 pkg/apis/kubeflow.org/v1/tensorflow_types.go delete mode 100644 pkg/apis/kubeflow.org/v1/tensorflow_types_test.go delete mode 100644 pkg/apis/kubeflow.org/v1/validation_test_utils.go delete mode 100644 pkg/apis/kubeflow.org/v1/xgboost_defaults.go delete mode 100644 pkg/apis/kubeflow.org/v1/xgboost_defaults_test.go delete mode 100644 pkg/apis/kubeflow.org/v1/xgboost_types.go delete mode 100644 pkg/apis/kubeflow.org/v1/zz_generated.deepcopy.go delete mode 100644 pkg/apis/kubeflow.org/v1/zz_generated.defaults.go delete mode 100644 pkg/apis/kubeflow.org/v1/zz_generated.openapi.go delete mode 100644 pkg/client/applyconfiguration/kubeflow.org/v1/elasticpolicy.go delete mode 100644 pkg/client/applyconfiguration/kubeflow.org/v1/jaxjob.go delete mode 100644 pkg/client/applyconfiguration/kubeflow.org/v1/jaxjobspec.go delete mode 100644 pkg/client/applyconfiguration/kubeflow.org/v1/jobcondition.go delete mode 100644 pkg/client/applyconfiguration/kubeflow.org/v1/jobstatus.go delete mode 100644 pkg/client/applyconfiguration/kubeflow.org/v1/mpijob.go delete mode 100644 pkg/client/applyconfiguration/kubeflow.org/v1/mpijobspec.go delete mode 100644 pkg/client/applyconfiguration/kubeflow.org/v1/paddleelasticpolicy.go delete mode 100644 pkg/client/applyconfiguration/kubeflow.org/v1/paddlejob.go delete mode 100644 pkg/client/applyconfiguration/kubeflow.org/v1/paddlejobspec.go delete mode 100644 pkg/client/applyconfiguration/kubeflow.org/v1/pytorchjob.go delete mode 100644 pkg/client/applyconfiguration/kubeflow.org/v1/pytorchjobspec.go delete mode 100644 pkg/client/applyconfiguration/kubeflow.org/v1/rdzvconf.go delete mode 100644 pkg/client/applyconfiguration/kubeflow.org/v1/replicaspec.go delete mode 100644 pkg/client/applyconfiguration/kubeflow.org/v1/replicastatus.go delete mode 100644 pkg/client/applyconfiguration/kubeflow.org/v1/runpolicy.go delete mode 100644 pkg/client/applyconfiguration/kubeflow.org/v1/schedulingpolicy.go delete mode 100644 pkg/client/applyconfiguration/kubeflow.org/v1/tfjob.go delete mode 100644 pkg/client/applyconfiguration/kubeflow.org/v1/tfjobspec.go delete mode 100644 pkg/client/applyconfiguration/kubeflow.org/v1/xgboostjob.go delete mode 100644 pkg/client/applyconfiguration/kubeflow.org/v1/xgboostjobspec.go delete mode 100644 pkg/client/clientset/versioned/typed/kubeflow.org/v1/doc.go delete mode 100644 pkg/client/clientset/versioned/typed/kubeflow.org/v1/fake/doc.go delete mode 100644 pkg/client/clientset/versioned/typed/kubeflow.org/v1/fake/fake_jaxjob.go delete mode 100644 pkg/client/clientset/versioned/typed/kubeflow.org/v1/fake/fake_kubeflow.org_client.go delete mode 100644 pkg/client/clientset/versioned/typed/kubeflow.org/v1/fake/fake_mpijob.go delete mode 100644 pkg/client/clientset/versioned/typed/kubeflow.org/v1/fake/fake_paddlejob.go delete mode 100644 pkg/client/clientset/versioned/typed/kubeflow.org/v1/fake/fake_pytorchjob.go delete mode 100644 pkg/client/clientset/versioned/typed/kubeflow.org/v1/fake/fake_tfjob.go delete mode 100644 pkg/client/clientset/versioned/typed/kubeflow.org/v1/fake/fake_xgboostjob.go delete mode 100644 pkg/client/clientset/versioned/typed/kubeflow.org/v1/generated_expansion.go delete mode 100644 pkg/client/clientset/versioned/typed/kubeflow.org/v1/jaxjob.go delete mode 100644 pkg/client/clientset/versioned/typed/kubeflow.org/v1/kubeflow.org_client.go delete mode 100644 pkg/client/clientset/versioned/typed/kubeflow.org/v1/mpijob.go delete mode 100644 pkg/client/clientset/versioned/typed/kubeflow.org/v1/paddlejob.go delete mode 100644 pkg/client/clientset/versioned/typed/kubeflow.org/v1/pytorchjob.go delete mode 100644 pkg/client/clientset/versioned/typed/kubeflow.org/v1/tfjob.go delete mode 100644 pkg/client/clientset/versioned/typed/kubeflow.org/v1/xgboostjob.go delete mode 100644 pkg/client/informers/externalversions/kubeflow.org/v1/interface.go delete mode 100644 pkg/client/informers/externalversions/kubeflow.org/v1/jaxjob.go delete mode 100644 pkg/client/informers/externalversions/kubeflow.org/v1/mpijob.go delete mode 100644 pkg/client/informers/externalversions/kubeflow.org/v1/paddlejob.go delete mode 100644 pkg/client/informers/externalversions/kubeflow.org/v1/pytorchjob.go delete mode 100644 pkg/client/informers/externalversions/kubeflow.org/v1/tfjob.go delete mode 100644 pkg/client/informers/externalversions/kubeflow.org/v1/xgboostjob.go delete mode 100644 pkg/client/listers/kubeflow.org/v1/expansion_generated.go delete mode 100644 pkg/client/listers/kubeflow.org/v1/jaxjob.go delete mode 100644 pkg/client/listers/kubeflow.org/v1/mpijob.go delete mode 100644 pkg/client/listers/kubeflow.org/v1/paddlejob.go delete mode 100644 pkg/client/listers/kubeflow.org/v1/pytorchjob.go delete mode 100644 pkg/client/listers/kubeflow.org/v1/tfjob.go delete mode 100644 pkg/client/listers/kubeflow.org/v1/xgboostjob.go delete mode 100644 pkg/common/interface.go delete mode 100644 pkg/common/metrics.go delete mode 100644 pkg/common/update.go delete mode 100644 pkg/common/util/fake_workqueue.go delete mode 100644 pkg/common/util/reconciler.go delete mode 100644 pkg/common/util/scheduler.go delete mode 100644 pkg/common/util/util.go delete mode 100644 pkg/common/util/util_test.go delete mode 100644 pkg/common/util/webhooks.go delete mode 100644 pkg/config/config.go delete mode 100644 pkg/controller.v1/common/job.go delete mode 100644 pkg/controller.v1/common/job_controller.go delete mode 100644 pkg/controller.v1/common/job_test.go delete mode 100644 pkg/controller.v1/common/pod.go delete mode 100644 pkg/controller.v1/common/pod_test.go delete mode 100644 pkg/controller.v1/common/scheduling.go delete mode 100644 pkg/controller.v1/common/service.go delete mode 100644 pkg/controller.v1/common/service_test.go delete mode 100644 pkg/controller.v1/common/status.go delete mode 100644 pkg/controller.v1/common/status_test.go delete mode 100644 pkg/controller.v1/common/util.go delete mode 100644 pkg/controller.v1/common/util_test.go delete mode 100644 pkg/controller.v1/control/controller_ref_manager.go delete mode 100644 pkg/controller.v1/control/controller_ref_manager_test.go delete mode 100644 pkg/controller.v1/control/pod_control.go delete mode 100644 pkg/controller.v1/control/pod_control_test.go delete mode 100644 pkg/controller.v1/control/podgroup_control.go delete mode 100644 pkg/controller.v1/control/service_control.go delete mode 100644 pkg/controller.v1/control/service_control_test.go delete mode 100644 pkg/controller.v1/control/utils.go delete mode 100644 pkg/controller.v1/expectation/expectation.go delete mode 100644 pkg/controller.v1/expectation/expectation_test.go delete mode 100644 pkg/controller.v1/expectation/util.go delete mode 100644 pkg/controller.v1/jax/envvar.go delete mode 100644 pkg/controller.v1/jax/envvar_test.go delete mode 100644 pkg/controller.v1/jax/jaxjob_controller.go delete mode 100644 pkg/controller.v1/jax/jaxjob_controller_suite_test.go delete mode 100644 pkg/controller.v1/jax/jaxjob_controller_test.go delete mode 100644 pkg/controller.v1/mpi/mpijob.go delete mode 100644 pkg/controller.v1/mpi/mpijob_controller.go delete mode 100644 pkg/controller.v1/mpi/mpijob_controller_test.go delete mode 100644 pkg/controller.v1/mpi/suite_test.go delete mode 100644 pkg/controller.v1/paddlepaddle/envvar.go delete mode 100644 pkg/controller.v1/paddlepaddle/paddlepaddle_controller.go delete mode 100644 pkg/controller.v1/paddlepaddle/paddlepaddle_controller_suite_test.go delete mode 100644 pkg/controller.v1/paddlepaddle/paddlepaddle_controller_test.go delete mode 100644 pkg/controller.v1/pytorch/elastic.go delete mode 100644 pkg/controller.v1/pytorch/elastic_test.go delete mode 100644 pkg/controller.v1/pytorch/envvar.go delete mode 100644 pkg/controller.v1/pytorch/hpa.go delete mode 100644 pkg/controller.v1/pytorch/initcontainer.go delete mode 100644 pkg/controller.v1/pytorch/initcontainer_test.go delete mode 100644 pkg/controller.v1/pytorch/master.go delete mode 100644 pkg/controller.v1/pytorch/pytorchjob_controller.go delete mode 100644 pkg/controller.v1/pytorch/pytorchjob_controller_suite_test.go delete mode 100644 pkg/controller.v1/pytorch/pytorchjob_controller_test.go delete mode 100644 pkg/controller.v1/register_controller.go delete mode 100644 pkg/controller.v1/register_controller_test.go delete mode 100644 pkg/controller.v1/tensorflow/job_test.go delete mode 100644 pkg/controller.v1/tensorflow/pod_test.go delete mode 100644 pkg/controller.v1/tensorflow/status_test.go delete mode 100644 pkg/controller.v1/tensorflow/suite_test.go delete mode 100644 pkg/controller.v1/tensorflow/tensorflow.go delete mode 100644 pkg/controller.v1/tensorflow/tensorflow_test.go delete mode 100644 pkg/controller.v1/tensorflow/testutil/pod.go delete mode 100644 pkg/controller.v1/tensorflow/testutil/service.go delete mode 100644 pkg/controller.v1/tensorflow/testutil/tfjob.go delete mode 100644 pkg/controller.v1/tensorflow/tfjob_controller.go delete mode 100644 pkg/controller.v1/tensorflow/tfjob_controller_test.go delete mode 100644 pkg/controller.v1/tensorflow/util.go delete mode 100644 pkg/controller.v1/tensorflow/util_test.go delete mode 100644 pkg/controller.v1/xgboost/suite_test.go delete mode 100644 pkg/controller.v1/xgboost/xgboost.go delete mode 100644 pkg/controller.v1/xgboost/xgboostjob_controller.go delete mode 100644 pkg/controller.v1/xgboost/xgboostjob_controller_test.go delete mode 100644 pkg/core/job.go delete mode 100644 pkg/core/pod.go delete mode 100644 pkg/core/service.go delete mode 100644 pkg/core/status.go delete mode 100644 pkg/core/utils.go delete mode 100644 pkg/util/counter.go delete mode 100644 pkg/util/k8sutil/client.go delete mode 100644 pkg/util/k8sutil/k8sutil.go delete mode 100644 pkg/util/labels/labels.go delete mode 100644 pkg/util/labels/labels_test.go delete mode 100644 pkg/util/logger.go delete mode 100644 pkg/util/signals/signal.go delete mode 100644 pkg/util/signals/signal_posix.go delete mode 100644 pkg/util/signals/signal_windows.go delete mode 100644 pkg/util/status.go delete mode 100644 pkg/util/status_test.go delete mode 100644 pkg/util/testutil/constants.go delete mode 100644 pkg/util/train/train_util.go delete mode 100644 pkg/util/train/train_util_test.go rename pkg/{webhook.v2 => webhooks.v2}/clustertrainingruntime_webhook.go (99%) rename pkg/{webhook.v2 => webhooks.v2}/setup.go (98%) rename pkg/{webhook.v2 => webhooks.v2}/trainingruntime_webhook.go (99%) rename pkg/{webhook.v2 => webhooks.v2}/trainingruntime_webhook_test.go (99%) rename pkg/{webhook.v2 => webhooks.v2}/trainjob_webhook.go (99%) delete mode 100644 pkg/webhooks/jax/jaxjob_webhook.go delete mode 100644 pkg/webhooks/jax/jaxjob_webhook_test.go delete mode 100644 pkg/webhooks/paddlepaddle/paddlepaddle_webhook.go delete mode 100644 pkg/webhooks/paddlepaddle/paddlepaddle_webhook_test.go delete mode 100644 pkg/webhooks/pytorch/pytorchjob_webhook.go delete mode 100644 pkg/webhooks/pytorch/pytorchjob_webhook_test.go delete mode 100644 pkg/webhooks/tensorflow/tfjob_webhook.go delete mode 100644 pkg/webhooks/tensorflow/tfjob_webhook_test.go delete mode 100644 pkg/webhooks/webhooks.go delete mode 100644 pkg/webhooks/xgboost/xgboostjob_webhook.go delete mode 100644 pkg/webhooks/xgboost/xgboostjob_webhook_test.go delete mode 100644 prow_config.yaml delete mode 100755 scripts/copy-to-gopath.sh delete mode 100755 scripts/gha/build-image.sh delete mode 100755 scripts/gha/build-jax-mnist-image.sh delete mode 100755 scripts/gha/build-storage-initializer.sh delete mode 100755 scripts/gha/build-trainer.sh delete mode 100755 scripts/setup-tf-operator.sh delete mode 100755 scripts/setup-training-operator.sh delete mode 100644 sdk/python/.gitignore delete mode 100644 sdk/python/.openapi-generator-ignore delete mode 100644 sdk/python/.openapi-generator/VERSION delete mode 100644 sdk/python/Dockerfile.conformance delete mode 100644 sdk/python/OWNERS delete mode 100644 sdk/python/README.md delete mode 100644 sdk/python/conformance/run.sh delete mode 100644 sdk/python/docs/KubeflowOrgV1ElasticPolicy.md delete mode 100644 sdk/python/docs/KubeflowOrgV1JAXJob.md delete mode 100644 sdk/python/docs/KubeflowOrgV1JAXJobList.md delete mode 100644 sdk/python/docs/KubeflowOrgV1JAXJobSpec.md delete mode 100644 sdk/python/docs/KubeflowOrgV1JobCondition.md delete mode 100644 sdk/python/docs/KubeflowOrgV1JobStatus.md delete mode 100644 sdk/python/docs/KubeflowOrgV1MPIJob.md delete mode 100644 sdk/python/docs/KubeflowOrgV1MPIJobList.md delete mode 100644 sdk/python/docs/KubeflowOrgV1MPIJobSpec.md delete mode 100644 sdk/python/docs/KubeflowOrgV1PaddleElasticPolicy.md delete mode 100644 sdk/python/docs/KubeflowOrgV1PaddleJob.md delete mode 100644 sdk/python/docs/KubeflowOrgV1PaddleJobList.md delete mode 100644 sdk/python/docs/KubeflowOrgV1PaddleJobSpec.md delete mode 100644 sdk/python/docs/KubeflowOrgV1PyTorchJob.md delete mode 100644 sdk/python/docs/KubeflowOrgV1PyTorchJobList.md delete mode 100644 sdk/python/docs/KubeflowOrgV1PyTorchJobSpec.md delete mode 100644 sdk/python/docs/KubeflowOrgV1RDZVConf.md delete mode 100644 sdk/python/docs/KubeflowOrgV1ReplicaSpec.md delete mode 100644 sdk/python/docs/KubeflowOrgV1ReplicaStatus.md delete mode 100644 sdk/python/docs/KubeflowOrgV1RunPolicy.md delete mode 100644 sdk/python/docs/KubeflowOrgV1SchedulingPolicy.md delete mode 100644 sdk/python/docs/KubeflowOrgV1TFJob.md delete mode 100644 sdk/python/docs/KubeflowOrgV1TFJobList.md delete mode 100644 sdk/python/docs/KubeflowOrgV1TFJobSpec.md delete mode 100644 sdk/python/docs/KubeflowOrgV1XGBoostJob.md delete mode 100644 sdk/python/docs/KubeflowOrgV1XGBoostJobList.md delete mode 100644 sdk/python/docs/KubeflowOrgV1XGBoostJobSpec.md delete mode 100644 sdk/python/docs/RuntimeTypeMeta.md delete mode 100644 sdk/python/docs/RuntimeUnknown.md delete mode 100644 sdk/python/docs/V1APIGroup.md delete mode 100644 sdk/python/docs/V1APIGroupList.md delete mode 100644 sdk/python/docs/V1APIResource.md delete mode 100644 sdk/python/docs/V1APIResourceList.md delete mode 100644 sdk/python/docs/V1APIVersions.md delete mode 100644 sdk/python/docs/V1ApplyOptions.md delete mode 100644 sdk/python/docs/V1Condition.md delete mode 100644 sdk/python/docs/V1CreateOptions.md delete mode 100644 sdk/python/docs/V1DeleteOptions.md delete mode 100644 sdk/python/docs/V1FieldSelectorRequirement.md delete mode 100644 sdk/python/docs/V1GetOptions.md delete mode 100644 sdk/python/docs/V1GroupKind.md delete mode 100644 sdk/python/docs/V1GroupResource.md delete mode 100644 sdk/python/docs/V1GroupVersion.md delete mode 100644 sdk/python/docs/V1GroupVersionForDiscovery.md delete mode 100644 sdk/python/docs/V1GroupVersionKind.md delete mode 100644 sdk/python/docs/V1GroupVersionResource.md delete mode 100644 sdk/python/docs/V1InternalEvent.md delete mode 100644 sdk/python/docs/V1LabelSelector.md delete mode 100644 sdk/python/docs/V1LabelSelectorRequirement.md delete mode 100644 sdk/python/docs/V1List.md delete mode 100644 sdk/python/docs/V1ListMeta.md delete mode 100644 sdk/python/docs/V1ListOptions.md delete mode 100644 sdk/python/docs/V1ManagedFieldsEntry.md delete mode 100644 sdk/python/docs/V1ObjectMeta.md delete mode 100644 sdk/python/docs/V1OwnerReference.md delete mode 100644 sdk/python/docs/V1PartialObjectMetadata.md delete mode 100644 sdk/python/docs/V1PartialObjectMetadataList.md delete mode 100644 sdk/python/docs/V1PatchOptions.md delete mode 100644 sdk/python/docs/V1Preconditions.md delete mode 100644 sdk/python/docs/V1RootPaths.md delete mode 100644 sdk/python/docs/V1ServerAddressByClientCIDR.md delete mode 100644 sdk/python/docs/V1Status.md delete mode 100644 sdk/python/docs/V1StatusCause.md delete mode 100644 sdk/python/docs/V1StatusDetails.md delete mode 100644 sdk/python/docs/V1Table.md delete mode 100644 sdk/python/docs/V1TableColumnDefinition.md delete mode 100644 sdk/python/docs/V1TableOptions.md delete mode 100644 sdk/python/docs/V1TableRow.md delete mode 100644 sdk/python/docs/V1TableRowCondition.md delete mode 100644 sdk/python/docs/V1Timestamp.md delete mode 100644 sdk/python/docs/V1TypeMeta.md delete mode 100644 sdk/python/docs/V1UpdateOptions.md delete mode 100644 sdk/python/docs/V1WatchEvent.md delete mode 100644 sdk/python/docs/VersionInfo.md delete mode 100644 sdk/python/kubeflow/__init__.py delete mode 100644 sdk/python/kubeflow/storage_initializer/Dockerfile delete mode 100644 sdk/python/kubeflow/storage_initializer/__init__.py delete mode 100644 sdk/python/kubeflow/storage_initializer/abstract_dataset_provider.py delete mode 100644 sdk/python/kubeflow/storage_initializer/abstract_model_provider.py delete mode 100644 sdk/python/kubeflow/storage_initializer/constants.py delete mode 100644 sdk/python/kubeflow/storage_initializer/hugging_face.py delete mode 100644 sdk/python/kubeflow/storage_initializer/requirements.txt delete mode 100644 sdk/python/kubeflow/storage_initializer/s3.py delete mode 100644 sdk/python/kubeflow/storage_initializer/storage.py delete mode 100644 sdk/python/kubeflow/trainer/Dockerfile delete mode 100644 sdk/python/kubeflow/trainer/Dockerfile.cpu delete mode 100644 sdk/python/kubeflow/trainer/hf_llm_training.py delete mode 100644 sdk/python/kubeflow/trainer/requirements.txt delete mode 100644 sdk/python/kubeflow/training/__init__.py delete mode 100644 sdk/python/kubeflow/training/api/__init__.py delete mode 100644 sdk/python/kubeflow/training/api/training_client.py delete mode 100644 sdk/python/kubeflow/training/api/training_client_test.py delete mode 100644 sdk/python/kubeflow/training/api_client.py delete mode 100644 sdk/python/kubeflow/training/configuration.py delete mode 100644 sdk/python/kubeflow/training/constants/__init__.py delete mode 100644 sdk/python/kubeflow/training/constants/constants.py delete mode 100644 sdk/python/kubeflow/training/exceptions.py delete mode 100644 sdk/python/kubeflow/training/models/__init__.py delete mode 100644 sdk/python/kubeflow/training/models/kubeflow_org_v1_elastic_policy.py delete mode 100644 sdk/python/kubeflow/training/models/kubeflow_org_v1_jax_job.py delete mode 100644 sdk/python/kubeflow/training/models/kubeflow_org_v1_jax_job_list.py delete mode 100644 sdk/python/kubeflow/training/models/kubeflow_org_v1_jax_job_spec.py delete mode 100644 sdk/python/kubeflow/training/models/kubeflow_org_v1_job_condition.py delete mode 100644 sdk/python/kubeflow/training/models/kubeflow_org_v1_job_status.py delete mode 100644 sdk/python/kubeflow/training/models/kubeflow_org_v1_mpi_job.py delete mode 100644 sdk/python/kubeflow/training/models/kubeflow_org_v1_mpi_job_list.py delete mode 100644 sdk/python/kubeflow/training/models/kubeflow_org_v1_mpi_job_spec.py delete mode 100644 sdk/python/kubeflow/training/models/kubeflow_org_v1_paddle_elastic_policy.py delete mode 100644 sdk/python/kubeflow/training/models/kubeflow_org_v1_paddle_job.py delete mode 100644 sdk/python/kubeflow/training/models/kubeflow_org_v1_paddle_job_list.py delete mode 100644 sdk/python/kubeflow/training/models/kubeflow_org_v1_paddle_job_spec.py delete mode 100644 sdk/python/kubeflow/training/models/kubeflow_org_v1_py_torch_job.py delete mode 100644 sdk/python/kubeflow/training/models/kubeflow_org_v1_py_torch_job_list.py delete mode 100644 sdk/python/kubeflow/training/models/kubeflow_org_v1_py_torch_job_spec.py delete mode 100644 sdk/python/kubeflow/training/models/kubeflow_org_v1_rdzv_conf.py delete mode 100644 sdk/python/kubeflow/training/models/kubeflow_org_v1_replica_spec.py delete mode 100644 sdk/python/kubeflow/training/models/kubeflow_org_v1_replica_status.py delete mode 100644 sdk/python/kubeflow/training/models/kubeflow_org_v1_run_policy.py delete mode 100644 sdk/python/kubeflow/training/models/kubeflow_org_v1_scheduling_policy.py delete mode 100644 sdk/python/kubeflow/training/models/kubeflow_org_v1_tf_job.py delete mode 100644 sdk/python/kubeflow/training/models/kubeflow_org_v1_tf_job_list.py delete mode 100644 sdk/python/kubeflow/training/models/kubeflow_org_v1_tf_job_spec.py delete mode 100644 sdk/python/kubeflow/training/models/kubeflow_org_v1_xg_boost_job.py delete mode 100644 sdk/python/kubeflow/training/models/kubeflow_org_v1_xg_boost_job_list.py delete mode 100644 sdk/python/kubeflow/training/models/kubeflow_org_v1_xg_boost_job_spec.py delete mode 100644 sdk/python/kubeflow/training/rest.py delete mode 100644 sdk/python/kubeflow/training/utils/__init__.py delete mode 100644 sdk/python/kubeflow/training/utils/utils.py delete mode 100644 sdk/python/setup.py delete mode 100644 sdk/python/test/__init__.py delete mode 100644 sdk/python/test/conftest.py delete mode 100644 sdk/python/test/e2e-fine-tune-llm/test_e2e_pytorch_fine_tune_llm.py delete mode 100644 sdk/python/test/e2e/__init__.py delete mode 100644 sdk/python/test/e2e/constants.py delete mode 100644 sdk/python/test/e2e/test_e2e_jaxjob.py delete mode 100644 sdk/python/test/e2e/test_e2e_mpijob.py delete mode 100644 sdk/python/test/e2e/test_e2e_paddlejob.py delete mode 100644 sdk/python/test/e2e/test_e2e_pytorchjob.py delete mode 100644 sdk/python/test/e2e/test_e2e_tfjob.py delete mode 100644 sdk/python/test/e2e/test_e2e_xgboostjob.py delete mode 100644 sdk/python/test/e2e/utils.py delete mode 100644 test_job/README.md delete mode 100644 test_job/apis/test_job/v1/constants.go delete mode 100644 test_job/apis/test_job/v1/defaults.go delete mode 100644 test_job/apis/test_job/v1/doc.go delete mode 100644 test_job/apis/test_job/v1/openapi_generated.go delete mode 100644 test_job/apis/test_job/v1/register.go delete mode 100644 test_job/apis/test_job/v1/types.go delete mode 100644 test_job/apis/test_job/v1/zz_generated.deepcopy.go delete mode 100644 test_job/apis/test_job/v1/zz_generated.defaults.go delete mode 100644 test_job/client/clientset/versioned/clientset.go delete mode 100644 test_job/client/clientset/versioned/doc.go delete mode 100644 test_job/client/clientset/versioned/fake/clientset_generated.go delete mode 100644 test_job/client/clientset/versioned/fake/doc.go delete mode 100644 test_job/client/clientset/versioned/fake/register.go delete mode 100644 test_job/client/clientset/versioned/scheme/doc.go delete mode 100644 test_job/client/clientset/versioned/scheme/register.go delete mode 100644 test_job/client/clientset/versioned/typed/test_job/v1/doc.go delete mode 100644 test_job/client/clientset/versioned/typed/test_job/v1/fake/doc.go delete mode 100644 test_job/client/clientset/versioned/typed/test_job/v1/fake/fake_test_job_client.go delete mode 100644 test_job/client/clientset/versioned/typed/test_job/v1/fake/fake_testjob.go delete mode 100644 test_job/client/clientset/versioned/typed/test_job/v1/generated_expansion.go delete mode 100644 test_job/client/clientset/versioned/typed/test_job/v1/test_job_client.go delete mode 100644 test_job/client/clientset/versioned/typed/test_job/v1/testjob.go delete mode 100644 test_job/client/informers/externalversions/factory.go delete mode 100644 test_job/client/informers/externalversions/generic.go delete mode 100644 test_job/client/informers/externalversions/internalinterfaces/factory_interfaces.go delete mode 100644 test_job/client/informers/externalversions/test_job/interface.go delete mode 100644 test_job/client/informers/externalversions/test_job/v1/interface.go delete mode 100644 test_job/client/informers/externalversions/test_job/v1/testjob.go delete mode 100644 test_job/client/listers/test_job/v1/expansion_generated.go delete mode 100644 test_job/client/listers/test_job/v1/testjob.go delete mode 100644 test_job/test_util/v1/const.go delete mode 100644 test_job/test_util/v1/pod.go delete mode 100644 test_job/test_util/v1/service.go delete mode 100644 test_job/test_util/v1/test_job_util.go delete mode 100644 test_job/test_util/v1/util.go delete mode 100644 third_party/library/license.txt delete mode 100644 third_party_licenses/additional_license_info.csv delete mode 100644 third_party_licenses/dep.txt delete mode 100644 third_party_licenses/dep_repo.manual.csv delete mode 100644 third_party_licenses/license_info.csv delete mode 100644 third_party_licenses/repo.txt delete mode 100644 vendor.go diff --git a/.gitignore b/.gitignore index f31a988e29..f50b5f2538 100644 --- a/.gitignore +++ b/.gitignore @@ -1,52 +1,24 @@ # pkg and bin directories currently contain build artifacts # only so we exclude them. bin/ -/tf-operator -vendor/ -testbin/* manifests/external-crds/ -cover.out # IDEs .vscode/ __debug_bin - -# Python cache files -__pycache__/ - -# Emacs temporary files -*~ - -# VIM temporary files. -.swp - -# Other temporary files -.DS_Store - -# Files created by Gogland IDE .idea/ +.DS_Store +.swp +*~ -# Exclude wheel files for now. -# The only wheel file is the TF wheel one which is quite large. -# We don't want to check that into source control because it could be -# quite large. -*.whl - -# Bazel files -**/bazel-* -# Examples egg -examples/tf_sample/tf_sample.egg-info/ -examples/.ipynb_checkpoints/ - +# Jupyter Notebooks. **/.ipynb_checkpoints -# openapi-codegen tools and auto generated files but useless -hack/python-sdk/openapi-generator-cli.jar +# Python cache files +__pycache__/ + +# OpenAPI Generator CLI JAR file hack/python-sdk-v2/openapi-generator-cli.jar # Coverage cover.out - -/training-operator - -data/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 9eaa555f74..101c63ece8 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -23,19 +23,7 @@ repos: - id: flake8 exclude: | (?x)^( - pkg/apis/kubeflow.org/v1/openapi_generated.go| - pkg/apis/kubeflow.org/v1/zz_.*| pkg/client/.*| - test_job/apis/test_job/v1/.*generated.*.go| - test_job/client/.*| - docs/api/.*| - hack/python-sdk/post_gen.py| - sdk/python/kubeflow/training/[^/]*.py| - sdk/python/kubeflow/training/models/.*| - sdk/python/test/.*| - sdk/python/docs/.*| - sdk/python/.openapi-generator/VERSION| - sdk/python/kubeflow/__init__.py| sdk_v2/kubeflow/training/[^/]*.py| sdk_v2/kubeflow/training/models/.*| sdk_v2/docs/.* diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index a7bd8ef76e..1f52a469e0 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,5 +1,7 @@ # Developer Guide +# TODO (andreyvelich): This doc needs to be updated for Kubeflow Trainer V2 + Kubeflow Training Operator is currently at v1. ## Requirements @@ -47,38 +49,49 @@ Running the operator locally (as opposed to deploying it on a K8s cluster) is co First, you need to run a Kubernetes cluster locally. We recommend [Kind](https://kind.sigs.k8s.io). You can create a `kind` cluster by running + ```sh kind create cluster ``` + This will load your kubernetes config file with the new cluster. After creating the cluster, you can check the nodes with the code below which should show you the kind-control-plane. + ```sh kubectl get nodes ``` + The output should look something like below: + ``` $ kubectl get nodes NAME STATUS ROLES AGE VERSION kind-control-plane Ready control-plane 32s v1.27.3 ``` + Note, that for the example job below, the PyTorchJob uses the `kubeflow` namespace. From here we can apply the manifests to the cluster. + ```sh kubectl apply --server-side -k "github.com/kubeflow/training-operator/manifests/overlays/standalone" ``` Then we can patch it with the latest operator image. + ```sh kubectl patch -n kubeflow deployments training-operator --type json -p '[{"op": "replace", "path": "/spec/template/spec/containers/0/image", "value": "kubeflow/training-operator:latest"}]' ``` + Then we can run the job with the following command. ```sh kubectl apply -f https://raw.githubusercontent.com/kubeflow/training-operator/master/examples/pytorch/simple.yaml ``` + And we can see the output of the job from the logs, which may take some time to produce but should look something like below. + ``` $ kubectl logs -n kubeflow -l training.kubeflow.org/job-name=pytorch-simple --follow Defaulted container "pytorch" out of: pytorch, init-pytorch (init) @@ -110,12 +123,15 @@ Now that you confirmed you can spin up an operator locally, you can try to test You do this by building a new operator image and loading it into your kind cluster. ### Build Operator Image + ```sh make docker-build IMG=my-username/training-operator:my-pr-01 ``` + You can swap `my-username/training-operator:my-pr-01` with whatever you would like. ## Load docker image + ```sh kind load docker-image my-username/training-operator:my-pr-01 ``` @@ -126,21 +142,28 @@ kind load docker-image my-username/training-operator:my-pr-01 cd ./manifests/overlays/standalone kustomize edit set image my-username/training-operator=my-username/training-operator:my-pr-01 ``` + Update the `newTag` key in `./manifests/overlayes/standalone/kustimization.yaml` with the new image. Deploy the operator with: + ```sh kubectl apply -k ./manifests/overlays/standalone ``` + And now we can submit jobs to the operator. + ```sh kubectl patch -n kubeflow deployments training-operator --type json -p '[{"op": "replace", "path": "/spec/template/spec/containers/0/image", "value": "my-username/training-operator:my-pr-01"}]' kubectl apply -f https://raw.githubusercontent.com/kubeflow/training-operator/master/examples/pytorch/simple.yaml ``` + You should be able to see a pod for your training operator running in your namespace using + ``` kubectl logs -n kubeflow -l training.kubeflow.org/job-name=pytorch-simple ``` + ## Go version On ubuntu the default go package appears to be gccgo-go which has problems see [issue](https://github.com/golang/go/issues/15429) golang-go package is also really old so install from golang tarballs instead. diff --git a/Makefile b/Makefile index eeed3843ae..d031a45fb4 100644 --- a/Makefile +++ b/Makefile @@ -36,26 +36,17 @@ help: ## Display this help. ##@ Development -manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and CustomResourceDefinition objects. - $(CONTROLLER_GEN) $(CRD_OPTIONS) rbac:roleName=training-operator webhook paths="./pkg/apis/kubeflow.org/v1/..." \ - output:crd:artifacts:config=manifests/base/crds \ - output:rbac:artifacts:config=manifests/base/rbac \ - output:webhook:artifacts:config=manifests/base/webhook +manifests: controller-gen ## Generate manifests. $(CONTROLLER_GEN) "crd:generateEmbeddedObjectMeta=true" rbac:roleName=training-operator-v2 webhook \ - paths="./pkg/apis/kubeflow.org/v2alpha1/...;./pkg/controller.v2/...;./pkg/runtime.v2/...;./pkg/webhook.v2/...;./pkg/cert/..." \ + paths="./pkg/apis/kubeflow.org/v2alpha1/...;./pkg/controller.v2/...;./pkg/runtime.v2/...;./pkg/webhooks.v2/...;./pkg/cert/..." \ output:crd:artifacts:config=manifests/v2/base/crds \ output:rbac:artifacts:config=manifests/v2/base/rbac \ output:webhook:artifacts:config=manifests/v2/base/webhook -generate: go-mod-download manifests ## Generate apidoc, sdk and code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations. +generate: go-mod-download manifests ## Generate APIs and SDK. $(CONTROLLER_GEN) object:headerFile="hack/boilerplate/boilerplate.go.txt" paths="./pkg/apis/..." hack/update-codegen.sh - hack/python-sdk/gen-sdk.sh hack/python-sdk-v2/gen-sdk.sh - $(MAKE) apidoc - -apidoc: - hack/generate-apidoc.sh fmt: ## Run go fmt against code. go fmt ./... @@ -86,7 +77,7 @@ test-integrationv2: envtest jobset-operator-crd scheduler-plugins-crd .PHONY: testv2 testv2: - go test ./pkg/apis/kubeflow.org/v2alpha1/... ./pkg/controller.v2/... ./pkg/runtime.v2/... ./pkg/webhook.v2/... ./pkg/util.v2/... -coverprofile cover.out + go test ./pkg/apis/kubeflow.org/v2alpha1/... ./pkg/controller.v2/... ./pkg/runtime.v2/... ./pkg/webhooks.v2/... ./pkg/util.v2/... -coverprofile cover.out envtest: ifndef HAS_SETUP_ENVTEST diff --git a/PROJECT b/PROJECT deleted file mode 100644 index 4aea9cdea0..0000000000 --- a/PROJECT +++ /dev/null @@ -1,38 +0,0 @@ -layout: -- go.kubebuilder.io/v3 -projectName: training-operator -repo: github.com/kubeflow/training-operator -resources: -- api: - crdVersion: v1 - namespaced: true - controller: true - group: kubeflow.org - kind: XGBoostJob - path: github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1 - version: v1 -- api: - crdVersion: v1 - namespaced: true - controller: true - group: kubeflow.org - kind: PyTorchJob - path: github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1 - version: v1 -- api: - crdVersion: v1 - namespaced: true - controller: true - group: kubeflow.org - kind: TFJob - path: github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1 - version: v1 -- api: - crdVersion: v1 - namespaced: true - controller: true - group: kubeflow.org - kind: JAXJob - path: github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1 - version: v1 -version: "3" diff --git a/ROADMAP.md b/ROADMAP.md index e92ec903df..ad52d35fcf 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -1,14 +1,23 @@ # Roadmap -## 2022 +## 2023/2024 + +- Training Operator V2 +- Enhance JobSet APIs for distributed training and fine-tuning +- Kubeflow Training SDK improvements +- Support for distributed JAX +- Support for LLM Training runtimes +- Python APIs for LLMs fine-tuning +- Consolidate MPI Operator V2 into Training Operator -* Release training-operator v1.4 to be included in Kubeflow v1.5 release. -* Migrate v2 MPI operator to unified operator. -* Migrate PaddlePaddle operator to unified operator. -* Support elastic training for additional frameworks besides PyTorch. -* Support different gang scheduling definitions. -* Improve test coverage. +## 2022 +- Release training-operator v1.4 to be included in Kubeflow v1.5 release. +- Migrate v2 MPI operator to unified operator. +- Migrate PaddlePaddle operator to unified operator. +- Support elastic training for additional frameworks besides PyTorch. +- Support different gang scheduling definitions. +- Improve test coverage. ## 2020 and 2021 @@ -16,30 +25,30 @@ We will continue developing capabilities for better reliability, scaling, and maintenance of production distributed training experiences provided by operators. -* Enhance maintainability of operator common module. Related issue: [#54](https://github.com/kubeflow/common/issues/54). -* Migrate operators to use [kubeflow/common](https://github.com/kubeflow/common) APIs. Related issue: [#64](https://github.com/kubeflow/common/issues/64). -* Graduate MPI Operator, MXNet Operator and XGBoost Operator to v1. Related issue: [#65](https://github.com/kubeflow/common/issues/65). +- Enhance maintainability of operator common module. Related issue: [#54](https://github.com/kubeflow/common/issues/54). +- Migrate operators to use [kubeflow/common](https://github.com/kubeflow/common) APIs. Related issue: [#64](https://github.com/kubeflow/common/issues/64). +- Graduate MPI Operator, MXNet Operator and XGBoost Operator to v1. Related issue: [#65](https://github.com/kubeflow/common/issues/65). ### Features To take advantages of other capabilities of job scheduler components, operators will expose more APIs for advanced scheduling. More features will be added to simplify usage like dynamic volume supports and git ops experiences. In order to make it easily used in the Kubeflow ecosystem, we can add more launcher KFP components for adoption. -* Support dynamic volume provisioning for distributed training jobs. Related issue: [#19](https://github.com/kubeflow/common/issues/19). -* MLOps - Allow user to submit jobs using Git repo without building container images. Related issue: [#66](https://github.com/kubeflow/common/issues/66). -* Add Job priority and Queue in SchedulingPolicy for advanced scheduling in common operator. Related issue: [#46](https://github.com/kubeflow/common/issues/46). -* Add pipeline launcher components for different training jobs. Related issue: [pipeline#3445](https://github.com/kubeflow/pipelines/issues/3445). +- Support dynamic volume provisioning for distributed training jobs. Related issue: [#19](https://github.com/kubeflow/common/issues/19). +- MLOps - Allow user to submit jobs using Git repo without building container images. Related issue: [#66](https://github.com/kubeflow/common/issues/66). +- Add Job priority and Queue in SchedulingPolicy for advanced scheduling in common operator. Related issue: [#46](https://github.com/kubeflow/common/issues/46). +- Add pipeline launcher components for different training jobs. Related issue: [pipeline#3445](https://github.com/kubeflow/pipelines/issues/3445). ### Monitoring -* Provides a standardized logging interface. Related issue: [#60](https://github.com/kubeflow/common/issues/60). -* Expose generic prometheus metrics in common operators. Related issue: [#22](https://github.com/kubeflow/common/issues/22). -* Centralized Job Dashboard for training jobs (Add metadata graph, model artifacts later). Related issue: [#67](https://github.com/kubeflow/common/issues/67). +- Provides a standardized logging interface. Related issue: [#60](https://github.com/kubeflow/common/issues/60). +- Expose generic prometheus metrics in common operators. Related issue: [#22](https://github.com/kubeflow/common/issues/22). +- Centralized Job Dashboard for training jobs (Add metadata graph, model artifacts later). Related issue: [#67](https://github.com/kubeflow/common/issues/67). ### Performance Continue to optimize reconciler performance and reduce latency to take actions on CR events. -* Performance optimization for 500 concurrent jobs and large scale completed jobs. Related issues: [#68](https://github.com/kubeflow/common/issues/68), [tf-operator#965](https://github.com/kubeflow/tf-operator/issues/965), and [tf-operator#1079](https://github.com/kubeflow/tf-operator/issues/1079). +- Performance optimization for 500 concurrent jobs and large scale completed jobs. Related issues: [#68](https://github.com/kubeflow/common/issues/68), [tf-operator#965](https://github.com/kubeflow/tf-operator/issues/965), and [tf-operator#1079](https://github.com/kubeflow/tf-operator/issues/1079). ### Quarterly Goals diff --git a/build/images/kubectl-delivery/Dockerfile b/build/images/kubectl-delivery/Dockerfile deleted file mode 100644 index 3ff4b7d057..0000000000 --- a/build/images/kubectl-delivery/Dockerfile +++ /dev/null @@ -1,16 +0,0 @@ -FROM alpine:3.17 AS build - -# Install kubectl. -ENV K8S_VERSION v1.30.7 - -RUN apk add --no-cache wget -RUN wget -q https://dl.k8s.io/release/${K8S_VERSION}/bin/linux/amd64/kubectl -RUN chmod +x ./kubectl -RUN mv ./kubectl /bin/kubectl - -FROM alpine:3.17 -COPY --from=build /bin/kubectl /bin/kubectl -RUN apk add --no-cache bash - -ENTRYPOINT ["/bin/sh", "-c"] -CMD ["cp /bin/kubectl /opt/kube/kubectl"] diff --git a/build/images/training-operator/Dockerfile b/build/images/training-operator/Dockerfile deleted file mode 100644 index e3b00a67d3..0000000000 --- a/build/images/training-operator/Dockerfile +++ /dev/null @@ -1,23 +0,0 @@ -# Build the manager binary -FROM golang:1.23 as builder - -WORKDIR /workspace -# Copy the Go Modules manifests -COPY go.mod go.mod -COPY go.sum go.sum -# cache deps before building and copying source so that we don't need to re-download as much -# and so that source changes don't invalidate our downloaded layer -RUN go mod download - -# Copy the go source -COPY . . - -# Build -RUN CGO_ENABLED=0 GOOS=linux GO111MODULE=on go build -a -o manager cmd/training-operator.v1/main.go - -# Use distroless as minimal base image to package the manager binary -# Refer to https://github.com/GoogleContainerTools/distroless for more details -FROM gcr.io/distroless/static:latest -WORKDIR / -COPY --from=builder /workspace/manager . -ENTRYPOINT ["/manager"] diff --git a/cmd/training-operator.v1/main.go b/cmd/training-operator.v1/main.go deleted file mode 100644 index 41a28ff590..0000000000 --- a/cmd/training-operator.v1/main.go +++ /dev/null @@ -1,263 +0,0 @@ -/* -Copyright 2021. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package main - -import ( - "errors" - "flag" - "net/http" - "os" - "strings" - - "go.uber.org/zap/zapcore" - "k8s.io/apimachinery/pkg/api/meta" - "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/runtime/schema" - utilruntime "k8s.io/apimachinery/pkg/util/runtime" - clientgoscheme "k8s.io/client-go/kubernetes/scheme" - _ "k8s.io/client-go/plugin/pkg/client/auth" - ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/cache" - "sigs.k8s.io/controller-runtime/pkg/healthz" - "sigs.k8s.io/controller-runtime/pkg/log/zap" - metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" - "sigs.k8s.io/controller-runtime/pkg/webhook" - schedulerpluginsv1alpha1 "sigs.k8s.io/scheduler-plugins/apis/scheduling/v1alpha1" - "volcano.sh/apis/pkg/apis/scheduling/v1beta1" - volcanoclient "volcano.sh/apis/pkg/client/clientset/versioned" - - kubeflowv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - "github.com/kubeflow/training-operator/pkg/cert" - "github.com/kubeflow/training-operator/pkg/config" - controllerv1 "github.com/kubeflow/training-operator/pkg/controller.v1" - "github.com/kubeflow/training-operator/pkg/controller.v1/common" - "github.com/kubeflow/training-operator/pkg/webhooks" - //+kubebuilder:scaffold:imports -) - -const ( - // EnvKubeflowNamespace is an environment variable for namespace when deployed on kubernetes - EnvKubeflowNamespace = "KUBEFLOW_NAMESPACE" - - webhookConfigurationName = "validator.training-operator.kubeflow.org" -) - -var ( - scheme = runtime.NewScheme() - setupLog = ctrl.Log.WithName("setup") -) - -func init() { - utilruntime.Must(clientgoscheme.AddToScheme(scheme)) - utilruntime.Must(kubeflowv1.AddToScheme(scheme)) - utilruntime.Must(v1beta1.AddToScheme(scheme)) - utilruntime.Must(schedulerpluginsv1alpha1.AddToScheme(scheme)) - //+kubebuilder:scaffold:scheme -} - -func main() { - var metricsAddr string - var enableLeaderElection bool - var leaderElectionID string - var probeAddr string - var enabledSchemes controllerv1.EnabledSchemes - var gangSchedulerName string - var namespace string - var controllerThreads int - var webhookServerPort int - var webhookServiceName string - var webhookSecretName string - - flag.StringVar(&metricsAddr, "metrics-bind-address", ":8080", "The address the metric endpoint binds to.") - flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.") - flag.BoolVar(&enableLeaderElection, "leader-elect", false, - "Enable leader election for controller manager. "+ - "Enabling this will ensure there is only one active controller manager.") - flag.StringVar(&leaderElectionID, "leader-election-id", "1ca428e5.training-operator.kubeflow.org", "The ID for leader election.") - flag.Var(&enabledSchemes, "enable-scheme", "Enable scheme(s) as --enable-scheme=tfjob --enable-scheme=pytorchjob, case insensitive."+ - " Now supporting TFJob, PyTorchJob, XGBoostJob, PaddleJob, JAXJob. By default, all supported schemes will be enabled.") - flag.StringVar(&gangSchedulerName, "gang-scheduler-name", "", "Now Supporting volcano and scheduler-plugins."+ - " Note: If you set another scheduler name, the training-operator assumes it's the scheduler-plugins.") - flag.StringVar(&namespace, "namespace", os.Getenv(EnvKubeflowNamespace), "The namespace to monitor kubeflow jobs. If unset, it monitors all namespaces cluster-wide."+ - "If set, it only monitors kubeflow jobs in the given namespace.") - flag.IntVar(&controllerThreads, "controller-threads", 1, "Number of worker threads used by the controller.") - - // PyTorch related flags - flag.StringVar(&config.Config.PyTorchInitContainerImage, "pytorch-init-container-image", - config.PyTorchInitContainerImageDefault, "The image for pytorch init container") - flag.StringVar(&config.Config.PyTorchInitContainerTemplateFile, "pytorch-init-container-template-file", - config.PyTorchInitContainerTemplateFileDefault, "The template file for pytorch init container") - flag.IntVar(&config.Config.PyTorchInitContainerMaxTries, "pytorch-init-container-max-tries", - config.PyTorchInitContainerMaxTriesDefault, "The number of tries for the pytorch init container") - - // MPI related flags - flag.StringVar(&config.Config.MPIKubectlDeliveryImage, "mpi-kubectl-delivery-image", - config.MPIKubectlDeliveryImageDefault, "The image for mpi launcher init container") - - // Cert generation flags - flag.IntVar(&webhookServerPort, "webhook-server-port", 9443, "Endpoint port for the webhook server.") - flag.StringVar(&webhookServiceName, "webhook-service-name", "training-operator", "Name of the Service used as part of the DNSName") - flag.StringVar(&webhookSecretName, "webhook-secret-name", "training-operator-webhook-cert", "Name of the Secret to store CA and server certs") - - opts := zap.Options{ - Development: true, - StacktraceLevel: zapcore.DPanicLevel, - } - opts.BindFlags(flag.CommandLine) - flag.Parse() - - ctrl.SetLogger(zap.New(zap.UseFlagOptions(&opts))) - - var cacheOpts cache.Options - if namespace != "" { - cacheOpts = cache.Options{ - DefaultNamespaces: map[string]cache.Config{ - namespace: {}, - }, - } - } - - mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{ - Scheme: scheme, - Metrics: metricsserver.Options{ - BindAddress: metricsAddr, - }, - WebhookServer: webhook.NewServer(webhook.Options{ - Port: webhookServerPort, - }), - HealthProbeBindAddress: probeAddr, - LeaderElection: enableLeaderElection, - LeaderElectionID: leaderElectionID, - Cache: cacheOpts, - }) - if err != nil { - setupLog.Error(err, "unable to start manager") - os.Exit(1) - } - - certsReady := make(chan struct{}) - defer close(certsReady) - certGenerationConfig := cert.Config{ - WebhookSecretName: webhookSecretName, - WebhookServiceName: webhookServiceName, - WebhookConfigurationName: webhookConfigurationName, - } - if err = cert.ManageCerts(mgr, certGenerationConfig, certsReady); err != nil { - setupLog.Error(err, "Unable to set up cert rotation") - os.Exit(1) - } - - setupProbeEndpoints(mgr, certsReady) - // Set up controllers using goroutines to start the manager quickly. - go setupControllers(mgr, enabledSchemes, gangSchedulerName, controllerThreads, certsReady) - - //+kubebuilder:scaffold:builder - - setupLog.Info("starting manager") - if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil { - setupLog.Error(err, "problem running manager") - os.Exit(1) - } -} - -func setupControllers(mgr ctrl.Manager, enabledSchemes controllerv1.EnabledSchemes, gangSchedulerName string, controllerThreads int, certsReady <-chan struct{}) { - setupLog.Info("Waiting for certificate generation to complete") - <-certsReady - setupLog.Info("Certs ready") - - setupLog.Info("registering controllers...") - // Prepare GangSchedulingSetupFunc - gangSchedulingSetupFunc := common.GenNonGangSchedulerSetupFunc() - if strings.EqualFold(gangSchedulerName, string(common.GangSchedulerVolcano)) { - cfg := mgr.GetConfig() - volcanoClientSet := volcanoclient.NewForConfigOrDie(cfg) - gangSchedulingSetupFunc = common.GenVolcanoSetupFunc(volcanoClientSet) - gvk := v1beta1.SchemeGroupVersion.WithKind("PodGroup") - validateCRD(mgr, gvk) - } else if gangSchedulerName != "" { - gangSchedulingSetupFunc = common.GenSchedulerPluginsSetupFunc(mgr.GetClient(), gangSchedulerName) - gvk := schedulerpluginsv1alpha1.SchemeGroupVersion.WithKind("PodGroup") - validateCRD(mgr, gvk) - } - - // TODO: We need a general manager. all rest reconciler addsToManager - // Based on the user configuration, we start different controllers - if enabledSchemes.Empty() { - enabledSchemes.FillAll() - } - errMsg := "failed to set up controllers" - for _, s := range enabledSchemes { - setupReconcilerFunc, supportedReconciler := controllerv1.SupportedSchemeReconciler[s] - if !supportedReconciler { - setupLog.Error(errors.New(errMsg), "scheme is not supported", "scheme", s) - os.Exit(1) - } - if err := setupReconcilerFunc(mgr, gangSchedulingSetupFunc, controllerThreads); err != nil { - setupLog.Error(errors.New(errMsg), "unable to create controller", "scheme", s) - os.Exit(1) - } - setupWebhookFunc, supportedWebhook := webhooks.SupportedSchemeWebhook[s] - if !supportedWebhook { - setupLog.Error(errors.New(errMsg), "scheme is not supported", "scheme", s) - os.Exit(1) - } - if err := setupWebhookFunc(mgr); err != nil { - setupLog.Error(errors.New(errMsg), "unable to start webhook server", "scheme", s) - os.Exit(1) - } - } -} - -func setupProbeEndpoints(mgr ctrl.Manager, certsReady <-chan struct{}) { - defer setupLog.Info("Probe endpoints are configured on healthz and readyz") - - if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil { - setupLog.Error(err, "unable to set up health check") - os.Exit(1) - } - - // Wait for the webhook server to be listening before advertising the - // training-operator replica as ready. This allows users to wait with sending the first - // requests, requiring webhooks, until the training-operator deployment is available, so - // that the early requests are not rejected during the traininig-operator's startup. - // We wrap the call to GetWebhookServer in a closure to delay calling - // the function, otherwise a not fully-initialized webhook server (without - // ready certs) fails the start of the manager. - if err := mgr.AddReadyzCheck("readyz", func(req *http.Request) error { - select { - case <-certsReady: - return mgr.GetWebhookServer().StartedChecker()(req) - default: - return errors.New("certificates are not ready") - } - }); err != nil { - setupLog.Error(err, "unable to set up ready check") - os.Exit(1) - } -} - -func validateCRD(mgr ctrl.Manager, gvk schema.GroupVersionKind) { - _, err := mgr.GetRESTMapper().RESTMapping(gvk.GroupKind(), gvk.Version) - if err != nil { - if meta.IsNoMatchError(err) { - setupLog.Error(err, "crd might be missing, please install crd", "apiVersion", gvk.GroupVersion().String(), "kind", gvk.Kind) - os.Exit(1) - } - setupLog.Error(err, "unable to get crd", "apiVersion", gvk.GroupVersion().String(), "kind", gvk.Kind) - os.Exit(1) - } -} diff --git a/cmd/training-operator.v2alpha1/main.go b/cmd/training-operator.v2alpha1/main.go index 7933d04314..37b00571bb 100644 --- a/cmd/training-operator.v2alpha1/main.go +++ b/cmd/training-operator.v2alpha1/main.go @@ -42,7 +42,7 @@ import ( controllerv2 "github.com/kubeflow/training-operator/pkg/controller.v2" runtime "github.com/kubeflow/training-operator/pkg/runtime.v2" runtimecore "github.com/kubeflow/training-operator/pkg/runtime.v2/core" - webhookv2 "github.com/kubeflow/training-operator/pkg/webhook.v2" + webhooksv2 "github.com/kubeflow/training-operator/pkg/webhooks.v2" ) const ( @@ -164,7 +164,7 @@ func setupControllers(mgr ctrl.Manager, runtimes map[string]runtime.Runtime, cer setupLog.Error(err, "Could not create controller", "controller", failedCtrlName) os.Exit(1) } - if failedWebhook, err := webhookv2.Setup(mgr, runtimes); err != nil { + if failedWebhook, err := webhooksv2.Setup(mgr, runtimes); err != nil { setupLog.Error(err, "Could not create webhook", "webhook", failedWebhook) os.Exit(1) } diff --git a/docs/api/autogen/config.yaml b/docs/api/autogen/config.yaml deleted file mode 100644 index d518ede595..0000000000 --- a/docs/api/autogen/config.yaml +++ /dev/null @@ -1,2 +0,0 @@ -render: - kubernetesVersion: "1.22" diff --git a/docs/api/autogen/templates/gv_details.tpl b/docs/api/autogen/templates/gv_details.tpl deleted file mode 100644 index 12f40524c0..0000000000 --- a/docs/api/autogen/templates/gv_details.tpl +++ /dev/null @@ -1,20 +0,0 @@ -{{- define "gvDetails" -}} -{{- $gv := . -}} -[id="{{ asciidocGroupVersionID $gv | asciidocRenderAnchorID }}"] -== {{ $gv.GroupVersionString }} - -{{ $gv.Doc }} - -{{- if $gv.Kinds }} -.Resource Types -{{- range $gv.SortedKinds }} -- {{ $gv.TypeForKind . | asciidocRenderTypeLink }} -{{- end }} -{{ end }} - -=== Definitions -{{ range $gv.SortedTypes }} -{{ template "type" . }} -{{ end }} - -{{- end -}} \ No newline at end of file diff --git a/docs/api/autogen/templates/gv_list.tpl b/docs/api/autogen/templates/gv_list.tpl deleted file mode 100644 index b83439604c..0000000000 --- a/docs/api/autogen/templates/gv_list.tpl +++ /dev/null @@ -1,19 +0,0 @@ -{{- define "gvList" -}} -{{- $groupVersions := . -}} - -// Generated documentation. Please do not edit. -:anchor_prefix: k8s-api - -[id="{p}-api-reference"] -= API Reference - -.Packages -{{- range $groupVersions }} -- {{ asciidocRenderGVLink . }} -{{- end }} - -{{ range $groupVersions }} -{{ template "gvDetails" . }} -{{ end }} - -{{- end -}} \ No newline at end of file diff --git a/docs/api/autogen/templates/type.tpl b/docs/api/autogen/templates/type.tpl deleted file mode 100644 index c2b7257bcf..0000000000 --- a/docs/api/autogen/templates/type.tpl +++ /dev/null @@ -1,35 +0,0 @@ -{{- define "type" -}} -{{- $type := . -}} -{{- if asciidocShouldRenderType $type -}} - -[id="{{ asciidocTypeID $type | asciidocRenderAnchorID }}"] -==== {{ $type.Name }} {{ if $type.IsAlias }}({{ asciidocRenderTypeLink $type.UnderlyingType }}) {{ end }} - -{{ $type.Doc }} - -{{ if $type.References -}} -.Appears In: -**** -{{- range $type.SortedReferences }} -- {{ asciidocRenderTypeLink . }} -{{- end }} -**** -{{- end }} - -{{ if $type.Members -}} -[cols="25a,75a", options="header"] -|=== -| Field | Description -{{ if $type.GVK -}} -| *`apiVersion`* __string__ | `{{ $type.GVK.Group }}/{{ $type.GVK.Version }}` -| *`kind`* __string__ | `{{ $type.GVK.Kind }}` -{{ end -}} - -{{ range $type.Members -}} -| *`{{ .Name }}`* __{{ asciidocRenderType .Type }}__ | {{ template "type_members" . }} -{{ end -}} -|=== -{{ end -}} - -{{- end -}} -{{- end -}} \ No newline at end of file diff --git a/docs/api/autogen/templates/type_members.tpl b/docs/api/autogen/templates/type_members.tpl deleted file mode 100644 index b2729fd399..0000000000 --- a/docs/api/autogen/templates/type_members.tpl +++ /dev/null @@ -1,8 +0,0 @@ -{{- define "type_members" -}} -{{- $field := . -}} -{{- if eq $field.Name "metadata" -}} -Refer to Kubernetes API documentation for fields of `metadata`. -{{ else -}} -{{ $field.Doc }} -{{- end -}} -{{- end -}} \ No newline at end of file diff --git a/docs/api/kubeflow.org_v1_generated.asciidoc b/docs/api/kubeflow.org_v1_generated.asciidoc deleted file mode 100644 index 840b7485e9..0000000000 --- a/docs/api/kubeflow.org_v1_generated.asciidoc +++ /dev/null @@ -1,887 +0,0 @@ -// Generated documentation. Please do not edit. -:anchor_prefix: k8s-api - -[id="{p}-api-reference"] -= API Reference - -.Packages -- xref:{anchor_prefix}-kubeflow-org-v1[$$kubeflow.org/v1$$] - - -[id="{anchor_prefix}-kubeflow-org-v1"] -== kubeflow.org/v1 - -Package v1 contains API Schema definitions for the kubeflow.org v1 API group - -.Resource Types -- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-jaxjob[$$JAXJob$$] -- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-jaxjoblist[$$JAXJobList$$] -- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-mpijob[$$MPIJob$$] -- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-mpijoblist[$$MPIJobList$$] -- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-paddlejob[$$PaddleJob$$] -- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-paddlejoblist[$$PaddleJobList$$] -- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-pytorchjob[$$PyTorchJob$$] -- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-pytorchjoblist[$$PyTorchJobList$$] -- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-tfjob[$$TFJob$$] -- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-tfjoblist[$$TFJobList$$] -- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-xgboostjob[$$XGBoostJob$$] -- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-xgboostjoblist[$$XGBoostJobList$$] - - -=== Definitions - -[id="{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-cleanpodpolicy"] -==== CleanPodPolicy (string) - -CleanPodPolicy describes how to deal with pods when the job is finished. - -.Appears In: -**** -- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-mpijobspec[$$MPIJobSpec$$] -- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-runpolicy[$$RunPolicy$$] -**** - - - -[id="{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-elasticpolicy"] -==== ElasticPolicy - - - -.Appears In: -**** -- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-pytorchjobspec[$$PyTorchJobSpec$$] -**** - -[cols="25a,75a", options="header"] -|=== -| Field | Description -| *`minReplicas`* __integer__ | minReplicas is the lower limit for the number of replicas to which the training job -can scale down. It defaults to null. -| *`maxReplicas`* __integer__ | upper limit for the number of pods that can be set by the autoscaler; cannot be smaller than MinReplicas, defaults to null. -| *`rdzvBackend`* __xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-rdzvbackend[$$RDZVBackend$$]__ | -| *`rdzvPort`* __integer__ | -| *`rdzvHost`* __string__ | -| *`rdzvId`* __string__ | -| *`rdzvConf`* __xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-rdzvconf[$$RDZVConf$$] array__ | RDZVConf contains additional rendezvous configuration (=,=,...). -| *`standalone`* __boolean__ | Start a local standalone rendezvous backend that is represented by a C10d TCP store -on port 29400. Useful when launching single-node, multi-worker job. If specified ---rdzv_backend, --rdzv_endpoint, --rdzv_id are auto-assigned; any explicitly set values -are ignored. -| *`nProcPerNode`* __integer__ | Number of workers per node; supported values: [auto, cpu, gpu, int]. -Deprecated: This API is deprecated in v1.7+ -Use .spec.nprocPerNode instead. -| *`maxRestarts`* __integer__ | -| *`metrics`* __link:https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.22/#metricspec-v2-autoscaling[$$MetricSpec$$] array__ | Metrics contains the specifications which are used to calculate the -desired replica count (the maximum replica count across all metrics will -be used). The desired replica count is calculated with multiplying the -ratio between the target value and the current value by the current -number of pods. Ergo, metrics used must decrease as the pod count is -increased, and vice-versa. See the individual metric source types for -more information about how each type of metric must respond. -If not set, the HPA will not be created. -|=== - - -[id="{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-jaxjob"] -==== JAXJob - -JAXJob Represents a JAXJob resource. - -.Appears In: -**** -- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-jaxjoblist[$$JAXJobList$$] -**** - -[cols="25a,75a", options="header"] -|=== -| Field | Description -| *`apiVersion`* __string__ | `kubeflow.org/v1` -| *`kind`* __string__ | `JAXJob` -| *`kind`* __string__ | Kind is a string value representing the REST resource this object represents. -Servers may infer this from the endpoint the client submits requests to. -Cannot be updated. -In CamelCase. -More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds -| *`apiVersion`* __string__ | APIVersion defines the versioned schema of this representation of an object. -Servers should convert recognized schemas to the latest internal value, and -may reject unrecognized values. -More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources -| *`metadata`* __link:https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.22/#objectmeta-v1-meta[$$ObjectMeta$$]__ | Refer to Kubernetes API documentation for fields of `metadata`. - -| *`spec`* __xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-jaxjobspec[$$JAXJobSpec$$]__ | Specification of the desired state of the JAXJob. -| *`status`* __xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-jobstatus[$$JobStatus$$]__ | Most recently observed status of the JAXJob. -Read-only (modified by the system). -|=== - - -[id="{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-jaxjoblist"] -==== JAXJobList - -JAXJobList is a list of JAXJobs. - - - -[cols="25a,75a", options="header"] -|=== -| Field | Description -| *`apiVersion`* __string__ | `kubeflow.org/v1` -| *`kind`* __string__ | `JAXJobList` -| *`kind`* __string__ | Kind is a string value representing the REST resource this object represents. -Servers may infer this from the endpoint the client submits requests to. -Cannot be updated. -In CamelCase. -More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds -| *`apiVersion`* __string__ | APIVersion defines the versioned schema of this representation of an object. -Servers should convert recognized schemas to the latest internal value, and -may reject unrecognized values. -More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources -| *`metadata`* __link:https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.22/#listmeta-v1-meta[$$ListMeta$$]__ | Refer to Kubernetes API documentation for fields of `metadata`. - -| *`items`* __xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-jaxjob[$$JAXJob$$] array__ | List of JAXJobs. -|=== - - -[id="{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-jaxjobspec"] -==== JAXJobSpec - -JAXJobSpec is a desired state description of the JAXJob. - -.Appears In: -**** -- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-jaxjob[$$JAXJob$$] -**** - -[cols="25a,75a", options="header"] -|=== -| Field | Description -| *`runPolicy`* __xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-runpolicy[$$RunPolicy$$]__ | RunPolicy encapsulates various runtime policies of the distributed training -job, for example how to clean up resources and how long the job can stay -active. -| *`jaxReplicaSpecs`* __object (keys:xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-replicatype[$$ReplicaType$$], values:xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-replicaspec[$$ReplicaSpec$$])__ | A map of JAXReplicaType (type) to ReplicaSpec (value). Specifies the JAX cluster configuration. -For example, - { - "Worker": JAXReplicaSpec, - } -|=== - - -[id="{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-jobcondition"] -==== JobCondition - -JobCondition describes the state of the job at a certain point. - -.Appears In: -**** -- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-jobstatus[$$JobStatus$$] -**** - -[cols="25a,75a", options="header"] -|=== -| Field | Description -| *`type`* __xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-jobconditiontype[$$JobConditionType$$]__ | Type of job condition. -| *`status`* __link:https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.22/#conditionstatus-v1-core[$$ConditionStatus$$]__ | Status of the condition, one of True, False, Unknown. -| *`reason`* __string__ | The reason for the condition's last transition. -| *`message`* __string__ | A human readable message indicating details about the transition. -| *`lastUpdateTime`* __link:https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.22/#time-v1-meta[$$Time$$]__ | The last time this condition was updated. -| *`lastTransitionTime`* __link:https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.22/#time-v1-meta[$$Time$$]__ | Last time the condition transitioned from one status to another. -|=== - - -[id="{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-jobconditiontype"] -==== JobConditionType (string) - -JobConditionType defines all kinds of types of JobStatus. - -.Appears In: -**** -- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-jobcondition[$$JobCondition$$] -**** - - - -[id="{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-jobstatus"] -==== JobStatus - -JobStatus represents the current observed state of the training Job. - -.Appears In: -**** -- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-jaxjob[$$JAXJob$$] -- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-mpijob[$$MPIJob$$] -- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-paddlejob[$$PaddleJob$$] -- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-pytorchjob[$$PyTorchJob$$] -- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-tfjob[$$TFJob$$] -- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-xgboostjob[$$XGBoostJob$$] -**** - -[cols="25a,75a", options="header"] -|=== -| Field | Description -| *`conditions`* __xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-jobcondition[$$JobCondition$$] array__ | Conditions is an array of current observed job conditions. -| *`replicaStatuses`* __object (keys:xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-replicatype[$$ReplicaType$$], values:xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-replicastatus[$$ReplicaStatus$$])__ | ReplicaStatuses is map of ReplicaType and ReplicaStatus, -specifies the status of each replica. -| *`startTime`* __link:https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.22/#time-v1-meta[$$Time$$]__ | Represents time when the job was acknowledged by the job controller. -It is not guaranteed to be set in happens-before order across separate operations. -It is represented in RFC3339 form and is in UTC. -| *`completionTime`* __link:https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.22/#time-v1-meta[$$Time$$]__ | Represents time when the job was completed. It is not guaranteed to -be set in happens-before order across separate operations. -It is represented in RFC3339 form and is in UTC. -| *`lastReconcileTime`* __link:https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.22/#time-v1-meta[$$Time$$]__ | Represents last time when the job was reconciled. It is not guaranteed to -be set in happens-before order across separate operations. -It is represented in RFC3339 form and is in UTC. -|=== - - -[id="{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-mpijob"] -==== MPIJob - - - -.Appears In: -**** -- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-mpijoblist[$$MPIJobList$$] -**** - -[cols="25a,75a", options="header"] -|=== -| Field | Description -| *`apiVersion`* __string__ | `kubeflow.org/v1` -| *`kind`* __string__ | `MPIJob` -| *`kind`* __string__ | Kind is a string value representing the REST resource this object represents. -Servers may infer this from the endpoint the client submits requests to. -Cannot be updated. -In CamelCase. -More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds -| *`apiVersion`* __string__ | APIVersion defines the versioned schema of this representation of an object. -Servers should convert recognized schemas to the latest internal value, and -may reject unrecognized values. -More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources -| *`metadata`* __link:https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.22/#objectmeta-v1-meta[$$ObjectMeta$$]__ | Refer to Kubernetes API documentation for fields of `metadata`. - -| *`spec`* __xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-mpijobspec[$$MPIJobSpec$$]__ | -| *`status`* __xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-jobstatus[$$JobStatus$$]__ | -|=== - - -[id="{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-mpijoblist"] -==== MPIJobList - - - - - -[cols="25a,75a", options="header"] -|=== -| Field | Description -| *`apiVersion`* __string__ | `kubeflow.org/v1` -| *`kind`* __string__ | `MPIJobList` -| *`kind`* __string__ | Kind is a string value representing the REST resource this object represents. -Servers may infer this from the endpoint the client submits requests to. -Cannot be updated. -In CamelCase. -More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds -| *`apiVersion`* __string__ | APIVersion defines the versioned schema of this representation of an object. -Servers should convert recognized schemas to the latest internal value, and -may reject unrecognized values. -More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources -| *`metadata`* __link:https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.22/#listmeta-v1-meta[$$ListMeta$$]__ | Refer to Kubernetes API documentation for fields of `metadata`. - -| *`items`* __xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-mpijob[$$MPIJob$$] array__ | -|=== - - -[id="{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-mpijobspec"] -==== MPIJobSpec - - - -.Appears In: -**** -- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-mpijob[$$MPIJob$$] -**** - -[cols="25a,75a", options="header"] -|=== -| Field | Description -| *`slotsPerWorker`* __integer__ | Specifies the number of slots per worker used in hostfile. -Defaults to 1. -| *`cleanPodPolicy`* __xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-cleanpodpolicy[$$CleanPodPolicy$$]__ | CleanPodPolicy defines the policy that whether to kill pods after the job completes. -Defaults to None. -| *`mpiReplicaSpecs`* __object (keys:xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-replicatype[$$ReplicaType$$], values:xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-replicaspec[$$ReplicaSpec$$])__ | `MPIReplicaSpecs` contains maps from `MPIReplicaType` to `ReplicaSpec` that -specify the MPI replicas to run. -| *`mainContainer`* __string__ | MainContainer specifies name of the main container which -executes the MPI code. -| *`runPolicy`* __xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-runpolicy[$$RunPolicy$$]__ | `RunPolicy` encapsulates various runtime policies of the distributed training -job, for example how to clean up resources and how long the job can stay -active. -|=== - - -[id="{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-paddleelasticpolicy"] -==== PaddleElasticPolicy - - - -.Appears In: -**** -- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-paddlejobspec[$$PaddleJobSpec$$] -**** - -[cols="25a,75a", options="header"] -|=== -| Field | Description -| *`minReplicas`* __integer__ | minReplicas is the lower limit for the number of replicas to which the training job -can scale down. It defaults to null. -| *`maxReplicas`* __integer__ | upper limit for the number of pods that can be set by the autoscaler; cannot be smaller than MinReplicas, defaults to null. -| *`maxRestarts`* __integer__ | MaxRestarts is the limit for restart times of pods in elastic mode. -| *`metrics`* __link:https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.22/#metricspec-v2-autoscaling[$$MetricSpec$$] array__ | Metrics contains the specifications which are used to calculate the -desired replica count (the maximum replica count across all metrics will -be used). The desired replica count is calculated with multiplying the -ratio between the target value and the current value by the current -number of pods. Ergo, metrics used must decrease as the pod count is -increased, and vice-versa. See the individual metric source types for -more information about how each type of metric must respond. -If not set, the HPA will not be created. -|=== - - -[id="{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-paddlejob"] -==== PaddleJob - -PaddleJob Represents a PaddleJob resource. - -.Appears In: -**** -- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-paddlejoblist[$$PaddleJobList$$] -**** - -[cols="25a,75a", options="header"] -|=== -| Field | Description -| *`apiVersion`* __string__ | `kubeflow.org/v1` -| *`kind`* __string__ | `PaddleJob` -| *`kind`* __string__ | Kind is a string value representing the REST resource this object represents. -Servers may infer this from the endpoint the client submits requests to. -Cannot be updated. -In CamelCase. -More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds -| *`apiVersion`* __string__ | APIVersion defines the versioned schema of this representation of an object. -Servers should convert recognized schemas to the latest internal value, and -may reject unrecognized values. -More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources -| *`metadata`* __link:https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.22/#objectmeta-v1-meta[$$ObjectMeta$$]__ | Refer to Kubernetes API documentation for fields of `metadata`. - -| *`spec`* __xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-paddlejobspec[$$PaddleJobSpec$$]__ | Specification of the desired state of the PaddleJob. -| *`status`* __xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-jobstatus[$$JobStatus$$]__ | Most recently observed status of the PaddleJob. -Read-only (modified by the system). -|=== - - -[id="{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-paddlejoblist"] -==== PaddleJobList - -PaddleJobList is a list of PaddleJobs. - - - -[cols="25a,75a", options="header"] -|=== -| Field | Description -| *`apiVersion`* __string__ | `kubeflow.org/v1` -| *`kind`* __string__ | `PaddleJobList` -| *`kind`* __string__ | Kind is a string value representing the REST resource this object represents. -Servers may infer this from the endpoint the client submits requests to. -Cannot be updated. -In CamelCase. -More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds -| *`apiVersion`* __string__ | APIVersion defines the versioned schema of this representation of an object. -Servers should convert recognized schemas to the latest internal value, and -may reject unrecognized values. -More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources -| *`metadata`* __link:https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.22/#listmeta-v1-meta[$$ListMeta$$]__ | Refer to Kubernetes API documentation for fields of `metadata`. - -| *`items`* __xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-paddlejob[$$PaddleJob$$] array__ | List of PaddleJobs. -|=== - - -[id="{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-paddlejobspec"] -==== PaddleJobSpec - -PaddleJobSpec is a desired state description of the PaddleJob. - -.Appears In: -**** -- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-paddlejob[$$PaddleJob$$] -**** - -[cols="25a,75a", options="header"] -|=== -| Field | Description -| *`runPolicy`* __xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-runpolicy[$$RunPolicy$$]__ | RunPolicy encapsulates various runtime policies of the distributed training -job, for example how to clean up resources and how long the job can stay -active. -| *`elasticPolicy`* __xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-paddleelasticpolicy[$$PaddleElasticPolicy$$]__ | ElasticPolicy holds the elastic policy for paddle job. -| *`paddleReplicaSpecs`* __object (keys:xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-replicatype[$$ReplicaType$$], values:xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-replicaspec[$$ReplicaSpec$$])__ | A map of PaddleReplicaType (type) to ReplicaSpec (value). Specifies the Paddle cluster configuration. -For example, - { - "Master": PaddleReplicaSpec, - "Worker": PaddleReplicaSpec, - } -|=== - - -[id="{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-pytorchjob"] -==== PyTorchJob - -PyTorchJob Represents a PyTorchJob resource. - -.Appears In: -**** -- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-pytorchjoblist[$$PyTorchJobList$$] -**** - -[cols="25a,75a", options="header"] -|=== -| Field | Description -| *`apiVersion`* __string__ | `kubeflow.org/v1` -| *`kind`* __string__ | `PyTorchJob` -| *`kind`* __string__ | Kind is a string value representing the REST resource this object represents. -Servers may infer this from the endpoint the client submits requests to. -Cannot be updated. -In CamelCase. -More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds -| *`apiVersion`* __string__ | APIVersion defines the versioned schema of this representation of an object. -Servers should convert recognized schemas to the latest internal value, and -may reject unrecognized values. -More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources -| *`metadata`* __link:https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.22/#objectmeta-v1-meta[$$ObjectMeta$$]__ | Refer to Kubernetes API documentation for fields of `metadata`. - -| *`spec`* __xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-pytorchjobspec[$$PyTorchJobSpec$$]__ | Specification of the desired state of the PyTorchJob. -| *`status`* __xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-jobstatus[$$JobStatus$$]__ | Most recently observed status of the PyTorchJob. -Read-only (modified by the system). -|=== - - -[id="{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-pytorchjoblist"] -==== PyTorchJobList - -PyTorchJobList is a list of PyTorchJobs. - - - -[cols="25a,75a", options="header"] -|=== -| Field | Description -| *`apiVersion`* __string__ | `kubeflow.org/v1` -| *`kind`* __string__ | `PyTorchJobList` -| *`kind`* __string__ | Kind is a string value representing the REST resource this object represents. -Servers may infer this from the endpoint the client submits requests to. -Cannot be updated. -In CamelCase. -More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds -| *`apiVersion`* __string__ | APIVersion defines the versioned schema of this representation of an object. -Servers should convert recognized schemas to the latest internal value, and -may reject unrecognized values. -More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources -| *`metadata`* __link:https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.22/#listmeta-v1-meta[$$ListMeta$$]__ | Refer to Kubernetes API documentation for fields of `metadata`. - -| *`items`* __xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-pytorchjob[$$PyTorchJob$$] array__ | List of PyTorchJobs. -|=== - - -[id="{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-pytorchjobspec"] -==== PyTorchJobSpec - -PyTorchJobSpec is a desired state description of the PyTorchJob. - -.Appears In: -**** -- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-pytorchjob[$$PyTorchJob$$] -**** - -[cols="25a,75a", options="header"] -|=== -| Field | Description -| *`runPolicy`* __xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-runpolicy[$$RunPolicy$$]__ | RunPolicy encapsulates various runtime policies of the distributed training -job, for example how to clean up resources and how long the job can stay -active. -| *`elasticPolicy`* __xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-elasticpolicy[$$ElasticPolicy$$]__ | -| *`pytorchReplicaSpecs`* __object (keys:xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-replicatype[$$ReplicaType$$], values:xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-replicaspec[$$ReplicaSpec$$])__ | A map of PyTorchReplicaType (type) to ReplicaSpec (value). Specifies the PyTorch cluster configuration. -For example, - { - "Master": PyTorchReplicaSpec, - "Worker": PyTorchReplicaSpec, - } -| *`nprocPerNode`* __string__ | Number of workers per node; supported values: [auto, cpu, gpu, int]. -For more, https://github.com/pytorch/pytorch/blob/26f7f470df64d90e092081e39507e4ac751f55d6/torch/distributed/run.py#L629-L658. -Defaults to auto. -|=== - - -[id="{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-rdzvbackend"] -==== RDZVBackend (string) - - - -.Appears In: -**** -- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-elasticpolicy[$$ElasticPolicy$$] -**** - - - -[id="{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-rdzvconf"] -==== RDZVConf - - - -.Appears In: -**** -- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-elasticpolicy[$$ElasticPolicy$$] -**** - -[cols="25a,75a", options="header"] -|=== -| Field | Description -| *`key`* __string__ | -| *`value`* __string__ | -|=== - - -[id="{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-replicaspec"] -==== ReplicaSpec - -ReplicaSpec is a description of the replica - -.Appears In: -**** -- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-jaxjobspec[$$JAXJobSpec$$] -- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-mpijobspec[$$MPIJobSpec$$] -- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-paddlejobspec[$$PaddleJobSpec$$] -- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-pytorchjobspec[$$PyTorchJobSpec$$] -- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-tfjobspec[$$TFJobSpec$$] -- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-xgboostjobspec[$$XGBoostJobSpec$$] -**** - -[cols="25a,75a", options="header"] -|=== -| Field | Description -| *`replicas`* __integer__ | Replicas is the desired number of replicas of the given template. -If unspecified, defaults to 1. -| *`template`* __link:https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.22/#podtemplatespec-v1-core[$$PodTemplateSpec$$]__ | Template is the object that describes the pod that -will be created for this replica. RestartPolicy in PodTemplateSpec -will be overide by RestartPolicy in ReplicaSpec -| *`restartPolicy`* __xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-restartpolicy[$$RestartPolicy$$]__ | Restart policy for all replicas within the job. -One of Always, OnFailure, Never and ExitCode. -Default to Never. -|=== - - -[id="{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-replicastatus"] -==== ReplicaStatus - -ReplicaStatus represents the current observed state of the replica. - -.Appears In: -**** -- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-jobstatus[$$JobStatus$$] -**** - -[cols="25a,75a", options="header"] -|=== -| Field | Description -| *`active`* __integer__ | The number of actively running pods. -| *`succeeded`* __integer__ | The number of pods which reached phase Succeeded. -| *`failed`* __integer__ | The number of pods which reached phase Failed. -| *`labelSelector`* __link:https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.22/#labelselector-v1-meta[$$LabelSelector$$]__ | Deprecated: Use Selector instead -| *`selector`* __string__ | A Selector is a label query over a set of resources. The result of matchLabels and -matchExpressions are ANDed. An empty Selector matches all objects. A null -Selector matches no objects. -|=== - - -[id="{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-replicatype"] -==== ReplicaType (string) - -ReplicaType represents the type of the replica. Each operator needs to define its -own set of ReplicaTypes. - -.Appears In: -**** -- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-jaxjobspec[$$JAXJobSpec$$] -- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-jobstatus[$$JobStatus$$] -- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-mpijobspec[$$MPIJobSpec$$] -- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-paddlejobspec[$$PaddleJobSpec$$] -- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-pytorchjobspec[$$PyTorchJobSpec$$] -- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-tfjobspec[$$TFJobSpec$$] -- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-xgboostjobspec[$$XGBoostJobSpec$$] -**** - - - -[id="{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-restartpolicy"] -==== RestartPolicy (string) - -RestartPolicy describes how the replicas should be restarted. -Only one of the following restart policies may be specified. -If none of the following policies is specified, the default one -is RestartPolicyAlways. - -.Appears In: -**** -- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-replicaspec[$$ReplicaSpec$$] -**** - - - -[id="{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-runpolicy"] -==== RunPolicy - -RunPolicy encapsulates various runtime policies of the distributed training -job, for example how to clean up resources and how long the job can stay -active. - -.Appears In: -**** -- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-jaxjobspec[$$JAXJobSpec$$] -- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-mpijobspec[$$MPIJobSpec$$] -- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-paddlejobspec[$$PaddleJobSpec$$] -- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-pytorchjobspec[$$PyTorchJobSpec$$] -- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-tfjobspec[$$TFJobSpec$$] -- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-xgboostjobspec[$$XGBoostJobSpec$$] -**** - -[cols="25a,75a", options="header"] -|=== -| Field | Description -| *`cleanPodPolicy`* __xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-cleanpodpolicy[$$CleanPodPolicy$$]__ | CleanPodPolicy defines the policy to kill pods after the job completes. -Default to None. -| *`ttlSecondsAfterFinished`* __integer__ | TTLSecondsAfterFinished is the TTL to clean up jobs. -It may take extra ReconcilePeriod seconds for the cleanup, since -reconcile gets called periodically. -Default to infinite. -| *`activeDeadlineSeconds`* __integer__ | Specifies the duration in seconds relative to the startTime that the job may be active -before the system tries to terminate it; value must be positive integer. -| *`backoffLimit`* __integer__ | Optional number of retries before marking this job failed. -| *`schedulingPolicy`* __xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-schedulingpolicy[$$SchedulingPolicy$$]__ | SchedulingPolicy defines the policy related to scheduling, e.g. gang-scheduling -| *`suspend`* __boolean__ | suspend specifies whether the Job controller should create Pods or not. -If a Job is created with suspend set to true, no Pods are created by -the Job controller. If a Job is suspended after creation (i.e. the -flag goes from false to true), the Job controller will delete all -active Pods and PodGroups associated with this Job. -Users must design their workload to gracefully handle this. -Suspending a Job will reset the StartTime field of the Job. - - -Defaults to false. -| *`managedBy`* __string__ | ManagedBy is used to indicate the controller or entity that manages a job. -The value must be either an empty, 'kubeflow.org/training-operator' or -'kueue.x-k8s.io/multikueue'. -The training-operator reconciles a job which doesn't have this -field at all or the field value is the reserved string -'kubeflow.org/training-operator', but delegates reconciling the job -with 'kueue.x-k8s.io/multikueue' to the Kueue. -The field is immutable. -|=== - - -[id="{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-schedulingpolicy"] -==== SchedulingPolicy - -SchedulingPolicy encapsulates various scheduling policies of the distributed training -job, for example `minAvailable` for gang-scheduling. - -.Appears In: -**** -- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-runpolicy[$$RunPolicy$$] -**** - -[cols="25a,75a", options="header"] -|=== -| Field | Description -| *`minAvailable`* __integer__ | -| *`queue`* __string__ | -| *`minResources`* __link:https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.22/#quantity-resource-api[$$Quantity$$]__ | -| *`priorityClass`* __string__ | -| *`scheduleTimeoutSeconds`* __integer__ | -|=== - - -[id="{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-successpolicy"] -==== SuccessPolicy (string) - -SuccessPolicy is the success policy. - -.Appears In: -**** -- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-tfjobspec[$$TFJobSpec$$] -**** - - - -[id="{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-tfjob"] -==== TFJob - -TFJob represents a TFJob resource. - -.Appears In: -**** -- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-tfjoblist[$$TFJobList$$] -**** - -[cols="25a,75a", options="header"] -|=== -| Field | Description -| *`apiVersion`* __string__ | `kubeflow.org/v1` -| *`kind`* __string__ | `TFJob` -| *`kind`* __string__ | Kind is a string value representing the REST resource this object represents. -Servers may infer this from the endpoint the client submits requests to. -Cannot be updated. -In CamelCase. -More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds -| *`apiVersion`* __string__ | APIVersion defines the versioned schema of this representation of an object. -Servers should convert recognized schemas to the latest internal value, and -may reject unrecognized values. -More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources -| *`metadata`* __link:https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.22/#objectmeta-v1-meta[$$ObjectMeta$$]__ | Refer to Kubernetes API documentation for fields of `metadata`. - -| *`spec`* __xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-tfjobspec[$$TFJobSpec$$]__ | Specification of the desired state of the TFJob. -| *`status`* __xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-jobstatus[$$JobStatus$$]__ | Most recently observed status of the TFJob. -Populated by the system. -Read-only. -|=== - - -[id="{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-tfjoblist"] -==== TFJobList - -TFJobList is a list of TFJobs. - - - -[cols="25a,75a", options="header"] -|=== -| Field | Description -| *`apiVersion`* __string__ | `kubeflow.org/v1` -| *`kind`* __string__ | `TFJobList` -| *`kind`* __string__ | Kind is a string value representing the REST resource this object represents. -Servers may infer this from the endpoint the client submits requests to. -Cannot be updated. -In CamelCase. -More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds -| *`apiVersion`* __string__ | APIVersion defines the versioned schema of this representation of an object. -Servers should convert recognized schemas to the latest internal value, and -may reject unrecognized values. -More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources -| *`metadata`* __link:https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.22/#listmeta-v1-meta[$$ListMeta$$]__ | Refer to Kubernetes API documentation for fields of `metadata`. - -| *`items`* __xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-tfjob[$$TFJob$$] array__ | List of TFJobs. -|=== - - -[id="{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-tfjobspec"] -==== TFJobSpec - -TFJobSpec is a desired state description of the TFJob. - -.Appears In: -**** -- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-tfjob[$$TFJob$$] -**** - -[cols="25a,75a", options="header"] -|=== -| Field | Description -| *`runPolicy`* __xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-runpolicy[$$RunPolicy$$]__ | RunPolicy encapsulates various runtime policies of the distributed training -job, for example how to clean up resources and how long the job can stay -active. -| *`successPolicy`* __xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-successpolicy[$$SuccessPolicy$$]__ | SuccessPolicy defines the policy to mark the TFJob as succeeded. -Default to "", using the default rules. -| *`tfReplicaSpecs`* __object (keys:xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-replicatype[$$ReplicaType$$], values:xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-replicaspec[$$ReplicaSpec$$])__ | A map of TFReplicaType (type) to ReplicaSpec (value). Specifies the TF cluster configuration. -For example, - { - "PS": ReplicaSpec, - "Worker": ReplicaSpec, - } -| *`enableDynamicWorker`* __boolean__ | A switch to enable dynamic worker -|=== - - -[id="{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-xgboostjob"] -==== XGBoostJob - -XGBoostJob is the Schema for the xgboostjobs API - -.Appears In: -**** -- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-xgboostjoblist[$$XGBoostJobList$$] -**** - -[cols="25a,75a", options="header"] -|=== -| Field | Description -| *`apiVersion`* __string__ | `kubeflow.org/v1` -| *`kind`* __string__ | `XGBoostJob` -| *`kind`* __string__ | Kind is a string value representing the REST resource this object represents. -Servers may infer this from the endpoint the client submits requests to. -Cannot be updated. -In CamelCase. -More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds -| *`apiVersion`* __string__ | APIVersion defines the versioned schema of this representation of an object. -Servers should convert recognized schemas to the latest internal value, and -may reject unrecognized values. -More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources -| *`metadata`* __link:https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.22/#objectmeta-v1-meta[$$ObjectMeta$$]__ | Refer to Kubernetes API documentation for fields of `metadata`. - -| *`spec`* __xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-xgboostjobspec[$$XGBoostJobSpec$$]__ | -| *`status`* __xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-jobstatus[$$JobStatus$$]__ | -|=== - - -[id="{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-xgboostjoblist"] -==== XGBoostJobList - -XGBoostJobList contains a list of XGBoostJob - - - -[cols="25a,75a", options="header"] -|=== -| Field | Description -| *`apiVersion`* __string__ | `kubeflow.org/v1` -| *`kind`* __string__ | `XGBoostJobList` -| *`kind`* __string__ | Kind is a string value representing the REST resource this object represents. -Servers may infer this from the endpoint the client submits requests to. -Cannot be updated. -In CamelCase. -More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds -| *`apiVersion`* __string__ | APIVersion defines the versioned schema of this representation of an object. -Servers should convert recognized schemas to the latest internal value, and -may reject unrecognized values. -More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources -| *`metadata`* __link:https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.22/#listmeta-v1-meta[$$ListMeta$$]__ | Refer to Kubernetes API documentation for fields of `metadata`. - -| *`items`* __xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-xgboostjob[$$XGBoostJob$$] array__ | -|=== - - -[id="{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-xgboostjobspec"] -==== XGBoostJobSpec - -XGBoostJobSpec defines the desired state of XGBoostJob - -.Appears In: -**** -- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-xgboostjob[$$XGBoostJob$$] -**** - -[cols="25a,75a", options="header"] -|=== -| Field | Description -| *`runPolicy`* __xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-runpolicy[$$RunPolicy$$]__ | INSERT ADDITIONAL SPEC FIELDS - desired state of cluster -Important: Run "make" to regenerate code after modifying this file -| *`xgbReplicaSpecs`* __object (keys:xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-replicatype[$$ReplicaType$$], values:xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-kubeflow-org-v1-replicaspec[$$ReplicaSpec$$])__ | -|=== - - diff --git a/examples/jax/cpu-demo/Dockerfile b/examples/jax/cpu-demo/Dockerfile deleted file mode 100644 index b8255313c1..0000000000 --- a/examples/jax/cpu-demo/Dockerfile +++ /dev/null @@ -1,26 +0,0 @@ -FROM python:3.12 - -RUN pip install jax absl-py kubernetes - -RUN apt-get update && apt-get install -y \ - build-essential \ - cmake \ - git \ - libgoogle-glog-dev \ - libgflags-dev \ - libprotobuf-dev \ - protobuf-compiler \ - && rm -rf /var/lib/apt/lists/* - -RUN git clone https://github.com/facebookincubator/gloo.git \ - && cd gloo \ - && git checkout 43b7acbf372cdce14075f3526e39153b7e433b53 \ - && mkdir build \ - && cd build \ - && cmake ../ \ - && make \ - && make install - -WORKDIR /app - -ADD train.py /app diff --git a/examples/jax/cpu-demo/demo.yaml b/examples/jax/cpu-demo/demo.yaml deleted file mode 100644 index bffd3cc16f..0000000000 --- a/examples/jax/cpu-demo/demo.yaml +++ /dev/null @@ -1,19 +0,0 @@ -apiVersion: "kubeflow.org/v1" -kind: JAXJob -metadata: - name: jaxjob-simple - namespace: kubeflow -spec: - jaxReplicaSpecs: - Worker: - replicas: 2 - restartPolicy: OnFailure - template: - spec: - containers: - - name: jax - image: docker.io/kubeflow/jaxjob-simple:latest - command: - - "python3" - - "train.py" - imagePullPolicy: Always diff --git a/examples/jax/cpu-demo/train.py b/examples/jax/cpu-demo/train.py deleted file mode 100644 index de8b5ec2c7..0000000000 --- a/examples/jax/cpu-demo/train.py +++ /dev/null @@ -1,43 +0,0 @@ -# example ref: -# https://jax.readthedocs.io/en/latest/multi_process.html#running-multi-process-computations -# https://github.com/GoogleCloudPlatform/ai-on-gke/blob/main/tutorials-and-examples/gpu-examples/a100-jax/train.py # noqa - -import os -import socket - -import jax -from absl import app - -jax.config.update("jax_cpu_collectives_implementation", "gloo") - - -def _main(argv): - - process_id = int(os.getenv("PROCESS_ID")) - num_processes = int(os.getenv("NUM_PROCESSES")) - coordinator_address = os.getenv("COORDINATOR_ADDRESS") - coordinator_port = int(os.getenv("COORDINATOR_PORT")) - coordinator_address = f"{coordinator_address}:{coordinator_port}" - - jax.distributed.initialize( - coordinator_address=coordinator_address, - num_processes=num_processes, - process_id=process_id, - ) - - print( - f"JAX process {jax.process_index()}/{jax.process_count() - 1} initialized on " - f"{socket.gethostname()}" - ) - print(f"JAX global devices:{jax.devices()}") - print(f"JAX local devices:{jax.local_devices()}") - - print(f"JAX device count:{jax.device_count()}") - print(f"JAX local device count:{jax.local_device_count()}") - - xs = jax.numpy.ones(jax.local_device_count()) - print(jax.pmap(lambda x: jax.lax.psum(x, "i"), axis_name="i")(xs)) - - -if __name__ == "__main__": - app.run(_main) diff --git a/examples/mpi/tensorflow-mnist-elastic.yaml b/examples/mpi/tensorflow-mnist-elastic.yaml deleted file mode 100644 index b28a2ad001..0000000000 --- a/examples/mpi/tensorflow-mnist-elastic.yaml +++ /dev/null @@ -1,43 +0,0 @@ -apiVersion: kubeflow.org/v1 -kind: MPIJob -metadata: - name: tensorflow-mnist-elastic -spec: - slotsPerWorker: 1 - cleanPodPolicy: Running - mpiReplicaSpecs: - Launcher: - replicas: 1 - template: - spec: - containers: - - image: horovod/horovod:0.20.0-tf2.3.0-torch1.6.0-mxnet1.5.0-py3.7-cpu - name: mpi-launcher - command: - - horovodrun - args: - - -np - - "2" - - --min-np - - "1" - - --max-np - - "3" - - --host-discovery-script - - /etc/mpi/discover_hosts.sh - - python - - /examples/elastic/tensorflow2_mnist_elastic.py - resources: - limits: - cpu: 1 - memory: 2Gi - Worker: - replicas: 2 - template: - spec: - containers: - - image: horovod/horovod:0.20.0-tf2.3.0-torch1.6.0-mxnet1.5.0-py3.7-cpu - name: mpi-worker - resources: - limits: - cpu: 2 - memory: 4Gi diff --git a/examples/mpi/tensorflow-mnist.yaml b/examples/mpi/tensorflow-mnist.yaml deleted file mode 100644 index af6c9fc92b..0000000000 --- a/examples/mpi/tensorflow-mnist.yaml +++ /dev/null @@ -1,53 +0,0 @@ -apiVersion: kubeflow.org/v1 -kind: MPIJob -metadata: - name: tensorflow-mnist -spec: - slotsPerWorker: 1 - runPolicy: - cleanPodPolicy: Running - mpiReplicaSpecs: - Launcher: - replicas: 1 - template: - spec: - containers: - - image: horovod/horovod:0.28.1 - name: mpi - command: - - mpirun - args: - - -np - - "2" - - --allow-run-as-root - - -bind-to - - none - - -map-by - - slot - - -x - - LD_LIBRARY_PATH - - -x - - PATH - - -mca - - pml - - ob1 - - -mca - - btl - - ^openib - - python - - /horovod/examples/tensorflow2/tensorflow2_mnist.py - resources: - limits: - cpu: 1 - memory: 2Gi - Worker: - replicas: 2 - template: - spec: - containers: - - image: horovod/horovod:0.28.1 - name: mpi - resources: - limits: - cpu: 2 - memory: 4Gi diff --git a/examples/paddlepaddle/simple-cpu.yaml b/examples/paddlepaddle/simple-cpu.yaml deleted file mode 100644 index 2a2bd8714d..0000000000 --- a/examples/paddlepaddle/simple-cpu.yaml +++ /dev/null @@ -1,25 +0,0 @@ -apiVersion: "kubeflow.org/v1" -kind: PaddleJob -metadata: - name: paddle-simple-cpu - namespace: kubeflow -spec: - paddleReplicaSpecs: - Worker: - replicas: 2 - restartPolicy: OnFailure - template: - spec: - containers: - - name: paddle - image: registry.baidubce.com/paddlepaddle/paddle:2.4.0rc0-cpu - command: - - python - args: - - "-m" - - paddle.distributed.launch - - "run_check" - ports: - - containerPort: 37777 - name: master - imagePullPolicy: Always diff --git a/examples/paddlepaddle/simple-gpu.yaml b/examples/paddlepaddle/simple-gpu.yaml deleted file mode 100644 index e726536bd7..0000000000 --- a/examples/paddlepaddle/simple-gpu.yaml +++ /dev/null @@ -1,35 +0,0 @@ -apiVersion: "kubeflow.org/v1" -kind: PaddleJob -metadata: - name: paddle-simple-gpu - namespace: kubeflow -spec: - paddleReplicaSpecs: - Worker: - replicas: 2 - restartPolicy: OnFailure - template: - spec: - containers: - - name: paddle - image: registry.baidubce.com/paddlepaddle/paddle:2.4.0rc0-gpu-cuda11.2-cudnn8.1-trt8.0 - command: - - python - args: - - "-m" - - paddle.distributed.launch - - "run_check" - ports: - - containerPort: 37777 - name: master - imagePullPolicy: Always - resources: - limits: - nvidia.com/gpu: 2 - volumeMounts: - - mountPath: /dev/shm - name: dshm - volumes: - - name: dshm - emptyDir: - medium: Memory diff --git a/examples/pytorch/README.md b/examples/pytorch/README.md deleted file mode 100644 index 0db74af091..0000000000 --- a/examples/pytorch/README.md +++ /dev/null @@ -1,39 +0,0 @@ -## Installation & deployment tips -1. You need to configure your node to utilize GPU. This can be done the following way: - * Install [nvidia-docker2](https://github.com/NVIDIA/nvidia-docker) - * Connect to your MasterNode and set nvidia as the default run in `/etc/docker/daemon.json`: - ``` - { - "default-runtime": "nvidia", - "runtimes": { - "nvidia": { - "path": "/usr/bin/nvidia-container-runtime", - "runtimeArgs": [] - } - } - } - ``` - * After that deploy nvidia-daemon to kubernetes: - ```bash - kubectl create -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v1.11/nvidia-device-plugin.yml - ``` - -2. NVIDIA GPUs can now be consumed via container level resource requirements using the resource name nvidia.com/gpu: - ``` - resources: - limits: - nvidia.com/gpu: 2 # requesting 2 GPUs - ``` - -3. Building image. Each example has prebuilt images that are stored on google cloud resources (GCR). If you want to create your own image we recommend using dockerhub. Each example has its own Dockerfile that we strongly advise to use. To build your custom image follow instruction on [TechRepublic](https://www.techrepublic.com/article/how-to-create-a-docker-image-and-push-it-to-docker-hub/). - -4. To deploy your job we recommend using official [kubeflow documentation](https://www.kubeflow.org/docs/guides/components/pytorch/). Each example has example yaml files for two versions of apis. Feel free to modify them, e.g. image or number of GPUs. - -**Note**: PyTorch job doesn’t work in a user namespace by default because of Istio [automatic sidecar injection](https://istio.io/v1.3/docs/setup/additional-setup/sidecar-injection/#automatic-sidecar-injection). In order to get it running, it needs annotation sidecar.istio.io/inject: "false" to disable it for either PyTorch pods or namespace. For example: - -```yaml -template: - metadata: - annotations: - sidecar.istio.io/inject: "false" -``` diff --git a/examples/pytorch/cpu-demo/Dockerfile b/examples/pytorch/cpu-demo/Dockerfile deleted file mode 100644 index 1e6204bdef..0000000000 --- a/examples/pytorch/cpu-demo/Dockerfile +++ /dev/null @@ -1,7 +0,0 @@ -FROM python:3.8 - -RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu - -WORKDIR / - -COPY demo.py . diff --git a/examples/pytorch/cpu-demo/README.MD b/examples/pytorch/cpu-demo/README.MD deleted file mode 100644 index 088dc122f5..0000000000 --- a/examples/pytorch/cpu-demo/README.MD +++ /dev/null @@ -1,7 +0,0 @@ -## Demo - -This demo presents the usage of `torchrun` with training-operator. - -> Make the `nprocPerNode` part consistent with the gpu resource declaration in GPU context. - -The image used in demo.yaml is constructed with the Dockerfile provided alongside. diff --git a/examples/pytorch/cpu-demo/demo.py b/examples/pytorch/cpu-demo/demo.py deleted file mode 100644 index 3d2d5dc83f..0000000000 --- a/examples/pytorch/cpu-demo/demo.py +++ /dev/null @@ -1,11 +0,0 @@ -import torch - -torch.distributed.init_process_group(init_method="env://") -rank = torch.distributed.get_rank() -world_size = torch.distributed.get_world_size() -print(f"rank {rank} world_size {world_size}") -a = torch.tensor([1]) -torch.distributed.all_reduce(a) -print(f"rank {rank} world_size {world_size} result {a}") -torch.distributed.barrier() -print(f"rank {rank} world_size {world_size}") diff --git a/examples/pytorch/cpu-demo/demo.yaml b/examples/pytorch/cpu-demo/demo.yaml deleted file mode 100644 index fa9960f029..0000000000 --- a/examples/pytorch/cpu-demo/demo.yaml +++ /dev/null @@ -1,29 +0,0 @@ -apiVersion: "kubeflow.org/v1" -kind: PyTorchJob -metadata: - name: torchrun-cpu -spec: - nprocPerNode: "2" - pytorchReplicaSpecs: - Master: - replicas: 1 - restartPolicy: OnFailure - template: - spec: - containers: - - name: pytorch - image: pytorch-cpu:py3.8 - command: - - "torchrun" - - "demo.py" - Worker: - replicas: 1 - restartPolicy: OnFailure - template: - spec: - containers: - - name: pytorch - image: pytorch-cpu:py3.8 - command: - - "torchrun" - - "demo.py" diff --git a/examples/pytorch/deepspeed-demo/Dockerfile b/examples/pytorch/deepspeed-demo/Dockerfile deleted file mode 100644 index 51201bdf0f..0000000000 --- a/examples/pytorch/deepspeed-demo/Dockerfile +++ /dev/null @@ -1,11 +0,0 @@ -FROM deepspeed/deepspeed:v072_torch112_cu117 - -RUN apt update -RUN apt install -y ninja-build - -WORKDIR / -COPY requirements.txt . -COPY train_bert_ds.py . - -RUN pip install -r requirements.txt -RUN mkdir -p /root/deepspeed_data diff --git a/examples/pytorch/deepspeed-demo/README.md b/examples/pytorch/deepspeed-demo/README.md deleted file mode 100644 index ed3a522162..0000000000 --- a/examples/pytorch/deepspeed-demo/README.md +++ /dev/null @@ -1,37 +0,0 @@ -## Training a Masked Language Model with PyTorch and DeepSpeed - -This folder contains an example of training a Masked Language Model with PyTorch and DeepSpeed. - -The python script used to train BERT with PyTorch and DeepSpeed. For more information, please refer to the [DeepSpeedExamples](https://github.com/microsoft/DeepSpeedExamples/blob/master/training/HelloDeepSpeed/README.md). - -DeepSpeed can be deployed by different launchers such as torchrun, the deepspeed launcher, or Accelerate. -See [deepspeed](https://huggingface.co/docs/transformers/main/en/deepspeed?deploy=multi-GPU&pass-config=path+to+file&multinode=torchrun#deployment). - -This guide will show you how to deploy DeepSpeed with the `torchrun` launcher. -The simplest way to quickly reproduce the following is to switch to the DeepSpeedExamples commit: -```shell -git clone https://github.com/microsoft/DeepSpeedExamples.git -cd DeepSpeedExamples -git checkout efacebb -``` - -The script train_bert_ds.py is located in the DeepSpeedExamples/HelloDeepSpeed/ directory. -Since the script is not launched using the deepspeed launcher, it needs to read the local_rank from the environment. -The following content has been added at line 670: -``` -local_rank = int(os.getenv('LOCAL_RANK', '-1')) -``` - -### Build Image - -The default image name and tag is `kubeflow/pytorch-deepspeed-demo:latest`. - -```shell -docker build -f Dockerfile -t kubeflow/pytorch-deepspeed-demo:latest ./ -``` - -### Create the PyTorchJob with DeepSpeed example - -```shell -kubectl create -f pytorch_deepspeed_demo.yaml -``` diff --git a/examples/pytorch/deepspeed-demo/pytorch_deepspeed_demo.yaml b/examples/pytorch/deepspeed-demo/pytorch_deepspeed_demo.yaml deleted file mode 100644 index 25a07e61c3..0000000000 --- a/examples/pytorch/deepspeed-demo/pytorch_deepspeed_demo.yaml +++ /dev/null @@ -1,38 +0,0 @@ -apiVersion: "kubeflow.org/v1" -kind: PyTorchJob -metadata: - name: pytorch-deepspeed-demo -spec: - pytorchReplicaSpecs: - Master: - replicas: 1 - restartPolicy: OnFailure - template: - spec: - containers: - - name: pytorch - image: kubeflow/pytorch-deepspeed-demo:latest - command: - - torchrun - - /train_bert_ds.py - - --checkpoint_dir - - /root/deepspeed_data - resources: - limits: - nvidia.com/gpu: 1 - Worker: - replicas: 1 - restartPolicy: OnFailure - template: - spec: - containers: - - name: pytorch - image: kubeflow/pytorch-deepspeed-demo:latest - command: - - torchrun - - /train_bert_ds.py - - --checkpoint_dir - - /root/deepspeed_data - resources: - limits: - nvidia.com/gpu: 1 diff --git a/examples/pytorch/deepspeed-demo/requirements.txt b/examples/pytorch/deepspeed-demo/requirements.txt deleted file mode 100644 index cdb0321466..0000000000 --- a/examples/pytorch/deepspeed-demo/requirements.txt +++ /dev/null @@ -1,6 +0,0 @@ -datasets==2.15.0 -transformers==4.38.0 -fire==0.4.0 -pytz==2021.1 -loguru==0.5.3 -sh==1.14.2 diff --git a/examples/pytorch/deepspeed-demo/train_bert_ds.py b/examples/pytorch/deepspeed-demo/train_bert_ds.py deleted file mode 100644 index a8d5b8413f..0000000000 --- a/examples/pytorch/deepspeed-demo/train_bert_ds.py +++ /dev/null @@ -1,829 +0,0 @@ -""" -Modified version of train_bert.py that adds DeepSpeed -""" - -import datetime -import json -import logging -import os -import pathlib -import random -import re -import string -from functools import partial -from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, TypeVar, Union - -import datasets -import deepspeed -import fire -import loguru -import numpy as np -import pytz -import sh -import torch -import torch.nn as nn -from torch.utils.data import DataLoader, Dataset -from torch.utils.tensorboard import SummaryWriter -from transformers import AutoTokenizer, PreTrainedTokenizer, PreTrainedTokenizerFast -from transformers.models.roberta import RobertaConfig, RobertaModel -from transformers.models.roberta.modeling_roberta import ( - RobertaLMHead, - RobertaPreTrainedModel, -) - - -def is_rank_0() -> bool: - return int(os.environ.get("RANK", "0")) == 0 - - -# Logging Functions - -logger = loguru.logger - - -def log_dist(message: str, ranks: List[int] = [], level: int = logging.INFO) -> None: - """Log messages for specified ranks only""" - my_rank = int(os.environ.get("RANK", "0")) - if my_rank in ranks: - if level == logging.INFO: - logger.info(f"[Rank {my_rank}] {message}") - if level == logging.ERROR: - logger.error(f"[Rank {my_rank}] {message}") - if level == logging.DEBUG: - logger.debug(f"[Rank {my_rank}] {message}") - - -# Dataset Creation Related Functions - -TokenizerType = Union[PreTrainedTokenizer, PreTrainedTokenizerFast] - - -def collate_function( - batch: List[Tuple[List[int], List[int]]], pad_token_id: int -) -> Dict[str, torch.Tensor]: - """Collect a list of masked token indices, and labels, and - batch them, padding to max length in the batch. - """ - max_length = max(len(token_ids) for token_ids, _ in batch) - padded_token_ids = [ - token_ids + [pad_token_id for _ in range(0, max_length - len(token_ids))] - for token_ids, _ in batch - ] - padded_labels = [ - labels + [pad_token_id for _ in range(0, max_length - len(labels))] - for _, labels in batch - ] - src_tokens = torch.LongTensor(padded_token_ids) - tgt_tokens = torch.LongTensor(padded_labels) - attention_mask = src_tokens.ne(pad_token_id).type_as(src_tokens) - return { - "src_tokens": src_tokens, - "tgt_tokens": tgt_tokens, - "attention_mask": attention_mask, - } - - -def masking_function( - text: str, - tokenizer: TokenizerType, - mask_prob: float, - random_replace_prob: float, - unmask_replace_prob: float, - max_length: int, -) -> Tuple[List[int], List[int]]: - """Given a text string, randomly mask wordpieces for Bert MLM - training. - - Args: - text (str): - The input text - tokenizer (TokenizerType): - The tokenizer for tokenization - mask_prob (float): - What fraction of tokens to mask - random_replace_prob (float): - Of the masked tokens, how many should be replaced with - random tokens (improves performance) - unmask_replace_prob (float): - Of the masked tokens, how many should be replaced with - the original token (improves performance) - max_length (int): - The maximum sequence length to consider. Note that for - Bert style models, this is a function of the number of - positional embeddings you learn - - Returns: - Tuple[List[int], List[int]]: - The masked token ids (based on the tokenizer passed), - and the output labels (padded with `tokenizer.pad_token_id`) - """ - # Note: By default, encode does add the BOS and EOS token - # Disabling that behaviour to make this more clear - tokenized_ids = ( - [tokenizer.bos_token_id] - + tokenizer.encode( - text, add_special_tokens=False, truncation=True, max_length=max_length - 2 - ) - + [tokenizer.eos_token_id] - ) - seq_len = len(tokenized_ids) - tokenized_ids = np.array(tokenized_ids) - subword_mask = np.full(len(tokenized_ids), False) - - # Masking the BOS and EOS token leads to slightly worse performance - low = 1 - high = len(subword_mask) - 1 - mask_choices = np.arange(low, high) - num_subwords_to_mask = max(int((mask_prob * (high - low)) + np.random.rand()), 1) - subword_mask[ - np.random.choice(mask_choices, num_subwords_to_mask, replace=False) - ] = True - - # Create the labels first - labels = np.full(seq_len, tokenizer.pad_token_id) - labels[subword_mask] = tokenized_ids[subword_mask] - - tokenized_ids[subword_mask] = tokenizer.mask_token_id - - # Now of the masked tokens, choose how many to replace with random and how many to unmask - rand_or_unmask_prob = random_replace_prob + unmask_replace_prob - if rand_or_unmask_prob > 0: - rand_or_unmask = subword_mask & ( - np.random.rand(len(tokenized_ids)) < rand_or_unmask_prob - ) - if random_replace_prob == 0: - unmask = rand_or_unmask - rand_mask = None - elif unmask_replace_prob == 0: - unmask = None - rand_mask = rand_or_unmask - else: - unmask_prob = unmask_replace_prob / rand_or_unmask_prob - decision = np.random.rand(len(tokenized_ids)) < unmask_prob - unmask = rand_or_unmask & decision - rand_mask = rand_or_unmask & (~decision) - if unmask is not None: - tokenized_ids[unmask] = labels[unmask] - if rand_mask is not None: - weights = np.ones(tokenizer.vocab_size) - weights[tokenizer.all_special_ids] = 0 - probs = weights / weights.sum() - num_rand = rand_mask.sum() - tokenized_ids[rand_mask] = np.random.choice( - tokenizer.vocab_size, num_rand, p=probs - ) - return tokenized_ids.tolist(), labels.tolist() - - -class WikiTextMLMDataset(Dataset): - """A [Map style dataset](https://pytorch.org/docs/stable/data.html) - for iterating over the wikitext dataset. Note that this assumes - the dataset can fit in memory. For larger datasets - you'd want to shard them and use an iterable dataset (eg: see - [Infinibatch](https://github.com/microsoft/infinibatch)) - - Args: - Dataset (datasets.arrow_dataset.Dataset): - The wikitext dataset - masking_function (Callable[[str], Tuple[List[int], List[int]]]) - The masking function. To generate one training instance, - the masking function is applied to the `text` of a dataset - record - - """ - - def __init__( - self, - dataset: datasets.arrow_dataset.Dataset, - masking_function: Callable[[str], Tuple[List[int], List[int]]], - ) -> None: - self.dataset = dataset - self.masking_function = masking_function - - def __len__(self) -> int: - return len(self.dataset) - - def __getitem__(self, idx: int) -> Tuple[List[int], List[int]]: - tokens, labels = self.masking_function(self.dataset[idx]["text"]) - return (tokens, labels) - - -T = TypeVar("T") - - -class InfiniteIterator(object): - def __init__(self, iterable: Iterable[T]) -> None: - self._iterable = iterable - self._iterator = iter(self._iterable) - - def __iter__(self): - return self - - def __next__(self) -> T: - next_item = None - try: - next_item = next(self._iterator) - except StopIteration: - self._iterator = iter(self._iterable) - next_item = next(self._iterator) - return next_item - - -def create_data_iterator( - mask_prob: float, - random_replace_prob: float, - unmask_replace_prob: float, - batch_size: int, - max_seq_length: int = 512, - tokenizer: str = "roberta-base", -) -> InfiniteIterator: - """Create the dataloader. - - Args: - mask_prob (float): - Fraction of tokens to mask - random_replace_prob (float): - Fraction of masked tokens to replace with random token - unmask_replace_prob (float): - Fraction of masked tokens to replace with the actual token - batch_size (int): - The batch size of the generated tensors - max_seq_length (int, optional): - The maximum sequence length for the MLM task. Defaults to 512. - tokenizer (str, optional): - The tokenizer to use. Defaults to "roberta-base". - - Returns: - InfiniteIterator: - The torch DataLoader, wrapped in an InfiniteIterator class, to - be able to continuously generate samples - - """ - wikitext_dataset = datasets.load_dataset( - "Salesforce/wikitext", "wikitext-2-v1", split="train" - ) - wikitext_dataset = wikitext_dataset.filter(lambda record: record["text"] != "").map( - lambda record: {"text": record["text"].rstrip("\n")} - ) - tokenizer = AutoTokenizer.from_pretrained(tokenizer) - masking_function_partial = partial( - masking_function, - tokenizer=tokenizer, - mask_prob=mask_prob, - random_replace_prob=random_replace_prob, - unmask_replace_prob=unmask_replace_prob, - max_length=max_seq_length, - ) - dataset = WikiTextMLMDataset(wikitext_dataset, masking_function_partial) - collate_fn_partial = partial(collate_function, pad_token_id=tokenizer.pad_token_id) - dataloader = DataLoader( - dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn_partial - ) - - return InfiniteIterator(dataloader) - - -# Model Creation Related Functions - - -class RobertaLMHeadWithMaskedPredict(RobertaLMHead): - def __init__( - self, config: RobertaConfig, embedding_weight: Optional[torch.Tensor] = None - ) -> None: - super(RobertaLMHeadWithMaskedPredict, self).__init__(config) - if embedding_weight is not None: - self.decoder.weight = embedding_weight - - def forward( # pylint: disable=arguments-differ - self, - features: torch.Tensor, - masked_token_indices: Optional[torch.Tensor] = None, - **kwargs, - ) -> torch.Tensor: - """The current `transformers` library does not provide support - for masked_token_indices. This function provides the support, by - running the final forward pass only for the masked indices. This saves - memory - - Args: - features (torch.Tensor): - The features to select from. Shape (batch, seq_len, h_dim) - masked_token_indices (torch.Tensor, optional): - The indices of masked tokens for index select. Defaults to None. - Shape: (num_masked_tokens,) - - Returns: - torch.Tensor: - The index selected features. Shape (num_masked_tokens, h_dim) - - """ - if masked_token_indices is not None: - features = torch.index_select( - features.view(-1, features.shape[-1]), 0, masked_token_indices - ) - return super().forward(features) - - -class RobertaMLMModel(RobertaPreTrainedModel): - def __init__(self, config: RobertaConfig, encoder: RobertaModel) -> None: - super().__init__(config) - self.encoder = encoder - self.lm_head = RobertaLMHeadWithMaskedPredict( - config, self.encoder.embeddings.word_embeddings.weight - ) - self.lm_head.apply(self._init_weights) - - def forward( - self, - src_tokens: torch.Tensor, - attention_mask: torch.Tensor, - tgt_tokens: torch.Tensor, - ) -> torch.Tensor: - """The forward pass for the MLM task - - Args: - src_tokens (torch.Tensor): - The masked token indices. Shape: (batch, seq_len) - attention_mask (torch.Tensor): - The attention mask, since the batches are padded - to the largest sequence. Shape: (batch, seq_len) - tgt_tokens (torch.Tensor): - The output tokens (padded with `config.pad_token_id`) - - Returns: - torch.Tensor: - The MLM loss - """ - # shape: (batch, seq_len, h_dim) - sequence_output, *_ = self.encoder( - input_ids=src_tokens, attention_mask=attention_mask, return_dict=False - ) - - pad_token_id = self.config.pad_token_id - # (labels have also been padded with pad_token_id) - # filter out all masked labels - # shape: (num_masked_tokens,) - masked_token_indexes = torch.nonzero( - (tgt_tokens != pad_token_id).view(-1) - ).view(-1) - # shape: (num_masked_tokens, vocab_size) - prediction_scores = self.lm_head(sequence_output, masked_token_indexes) - # shape: (num_masked_tokens,) - target = torch.index_select(tgt_tokens.view(-1), 0, masked_token_indexes) - - loss_fct = nn.CrossEntropyLoss(ignore_index=-1) - - masked_lm_loss = loss_fct( - prediction_scores.view(-1, self.config.vocab_size), target - ) - return masked_lm_loss - - -def create_model( - num_layers: int, num_heads: int, ff_dim: int, h_dim: int, dropout: float -) -> RobertaMLMModel: - """Create a Bert model with the specified `num_heads`, `ff_dim`, - `h_dim` and `dropout` - - Args: - num_layers (int): - The number of layers - num_heads (int): - The number of attention heads - ff_dim (int): - The intermediate hidden size of - the feed forward block of the - transformer - h_dim (int): - The hidden dim of the intermediate - representations of the transformer - dropout (float): - The value of dropout to be used. - Note that we apply the same dropout - to both the attention layers and the - FF layers - - Returns: - RobertaMLMModel: - A Roberta model for MLM task - - """ - roberta_config_dict = { - "attention_probs_dropout_prob": dropout, - "bos_token_id": 0, - "eos_token_id": 2, - "hidden_act": "gelu", - "hidden_dropout_prob": dropout, - "hidden_size": h_dim, - "initializer_range": 0.02, - "intermediate_size": ff_dim, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 514, - "model_type": "roberta", - "num_attention_heads": num_heads, - "num_hidden_layers": num_layers, - "pad_token_id": 1, - "type_vocab_size": 1, - "vocab_size": 50265, - } - roberta_config = RobertaConfig.from_dict(roberta_config_dict) - roberta_encoder = RobertaModel(roberta_config) - roberta_model = RobertaMLMModel(roberta_config, roberta_encoder) - return roberta_model - - -# Experiment Management Related Functions - - -def get_unique_identifier(length: int = 8) -> str: - """Create a unique identifier by choosing `length` - random characters from list of ascii characters and numbers - """ - alphabet = string.ascii_lowercase + string.digits - uuid = "".join(alphabet[ix] for ix in np.random.choice(len(alphabet), length)) - return uuid - - -def create_experiment_dir( - checkpoint_dir: pathlib.Path, all_arguments: Dict[str, Any] -) -> pathlib.Path: - """Create an experiment directory and save all arguments in it. - Additionally, also store the githash and gitdiff. Finally create - a directory for `Tensorboard` logs. The structure would look something - like - checkpoint_dir - `-experiment-name - |- hparams.json - |- githash.log - |- gitdiff.log - `- tb_dir/ - - Args: - checkpoint_dir (pathlib.Path): - The base checkpoint directory - all_arguments (Dict[str, Any]): - The arguments to save - - Returns: - pathlib.Path: The experiment directory - """ - # experiment name follows the following convention - # {exp_type}.{YYYY}.{MM}.{DD}.{HH}.{MM}.{SS}.{uuid} - current_time = datetime.datetime.now(pytz.timezone("US/Pacific")) - expname = "bert_pretrain.{0}.{1}.{2}.{3}.{4}.{5}.{6}".format( - current_time.year, - current_time.month, - current_time.day, - current_time.hour, - current_time.minute, - current_time.second, - get_unique_identifier(), - ) - exp_dir = checkpoint_dir / expname - if not is_rank_0(): - return exp_dir - exp_dir.mkdir(exist_ok=False) - hparams_file = exp_dir / "hparams.json" - with hparams_file.open("w") as handle: - json.dump(obj=all_arguments, fp=handle, indent=2) - # Save the git hash - try: - gitlog = sh.git.log("-1", format="%H", _tty_out=False, _fg=False) - with (exp_dir / "githash.log").open("w") as handle: - handle.write(gitlog.stdout.decode("utf-8")) - except sh.ErrorReturnCode_128: - log_dist( - "Seems like the code is not running from" - " within a git repo, so hash will" - " not be stored. However, it" - " is strongly advised to use" - " version control.", - ranks=[0], - level=logging.INFO, - ) - # And the git diff - try: - gitdiff = sh.git.diff(_fg=False, _tty_out=False) - with (exp_dir / "gitdiff.log").open("w") as handle: - handle.write(gitdiff.stdout.decode("utf-8")) - except sh.ErrorReturnCode_129: - log_dist( - "Seems like the code is not running from" - " within a git repo, so diff will" - " not be stored. However, it" - " is strongly advised to use" - " version control.", - ranks=[0], - level=logging.INFO, - ) - # Finally create the Tensorboard Dir - tb_dir = exp_dir / "tb_dir" - tb_dir.mkdir(exist_ok=False) - return exp_dir - - -# Checkpoint Related Functions - - -def load_model_checkpoint( - load_checkpoint_dir: pathlib.Path, - model: torch.nn.Module, - optimizer: torch.optim.Optimizer, -) -> Tuple[int, torch.nn.Module, torch.optim.Optimizer]: - """Loads the optimizer state dict and model state dict from the load_checkpoint_dir - into the passed model and optimizer. Searches for the most recent checkpoint to - load from - - Args: - load_checkpoint_dir (pathlib.Path): - The base checkpoint directory to load from - model (torch.nn.Module): - The model to load the checkpoint weights into - optimizer (torch.optim.Optimizer): - The optimizer to load the checkpoint weigths into - - Returns: - Tuple[int, torch.nn.Module, torch.optim.Optimizer]: - The checkpoint step, model with state_dict loaded and - optimizer with state_dict loaded - - """ - log_dist( - f"Loading model and optimizer checkpoint from {load_checkpoint_dir}", - ranks=[0], - level=logging.INFO, - ) - checkpoint_files = list( - filter( - lambda path: re.search(r"iter_(?P\d+)\.pt", path.name) is not None, - load_checkpoint_dir.glob("*.pt"), - ) - ) - assert len(checkpoint_files) > 0, "No checkpoints found in directory" - checkpoint_files = sorted( - checkpoint_files, - key=lambda path: int( - re.search(r"iter_(?P\d+)\.pt", path.name).group("iter_no") - ), - ) - latest_checkpoint_path = checkpoint_files[-1] - checkpoint_step = int( - re.search(r"iter_(?P\d+)\.pt", latest_checkpoint_path.name).group( - "iter_no" - ) - ) - - state_dict = torch.load(latest_checkpoint_path) - model.load_state_dict(state_dict["model"], strict=True) - optimizer.load_state_dict(state_dict["optimizer"]) - log_dist( - f"Loading model and optimizer checkpoints done. Loaded from {latest_checkpoint_path}", - ranks=[0], - level=logging.INFO, - ) - return checkpoint_step, model, optimizer - - -# Driver Functions - - -def train( - checkpoint_dir: str = None, - load_checkpoint_dir: str = None, - # Dataset Parameters - mask_prob: float = 0.15, - random_replace_prob: float = 0.1, - unmask_replace_prob: float = 0.1, - max_seq_length: int = 512, - tokenizer: str = "roberta-base", - # Model Parameters - num_layers: int = 6, - num_heads: int = 8, - ff_dim: int = 512, - h_dim: int = 256, - dropout: float = 0.1, - # Training Parameters - batch_size: int = 8, - num_iterations: int = 10000, - checkpoint_every: int = 1000, - log_every: int = 10, - local_rank: int = -1, -) -> pathlib.Path: - """Trains a [Bert style](https://arxiv.org/pdf/1810.04805.pdf) - (transformer encoder only) model for MLM Task - - Args: - checkpoint_dir (str): - The base experiment directory to save experiments to - mask_prob (float, optional): - The fraction of tokens to mask. Defaults to 0.15. - random_replace_prob (float, optional): - The fraction of masked tokens to replace with random token. - Defaults to 0.1. - unmask_replace_prob (float, optional): - The fraction of masked tokens to leave unchanged. - Defaults to 0.1. - max_seq_length (int, optional): - The maximum sequence length of the examples. Defaults to 512. - tokenizer (str, optional): - The tokenizer to use. Defaults to "roberta-base". - num_layers (int, optional): - The number of layers in the Bert model. Defaults to 6. - num_heads (int, optional): - Number of attention heads to use. Defaults to 8. - ff_dim (int, optional): - Size of the intermediate dimension in the FF layer. - Defaults to 512. - h_dim (int, optional): - Size of intermediate representations. - Defaults to 256. - dropout (float, optional): - Amout of Dropout to use. Defaults to 0.1. - batch_size (int, optional): - The minibatch size. Defaults to 8. - num_iterations (int, optional): - Total number of iterations to run the model for. - Defaults to 10000. - checkpoint_every (int, optional): - Save checkpoint after these many steps. - - ..note :: - - You want this to be frequent enough that you can - resume training in case it crashes, but not so much - that you fill up your entire storage ! - - Defaults to 1000. - log_every (int, optional): - Print logs after these many steps. Defaults to 10. - local_rank (int, optional): - Which GPU to run on (-1 for CPU). Defaults to -1. - - Returns: - pathlib.Path: The final experiment directory - - """ - local_rank = int(os.getenv("LOCAL_RANK", "-1")) - device = ( - torch.device("cuda", local_rank) - if (local_rank > -1) and torch.cuda.is_available() - else torch.device("cpu") - ) - # Create Exp. Dir - if checkpoint_dir is None and load_checkpoint_dir is None: - log_dist( - "Need to specify one of checkpoint_dir" " or load_checkpoint_dir", - ranks=[0], - level=logging.ERROR, - ) - return - if checkpoint_dir is not None and load_checkpoint_dir is not None: - log_dist( - "Cannot specify both checkpoint_dir" " and load_checkpoint_dir", - ranks=[0], - level=logging.ERROR, - ) - return - if checkpoint_dir: - log_dist("Creating Experiment Directory", ranks=[0], level=logging.INFO) - checkpoint_dir = pathlib.Path(checkpoint_dir) - checkpoint_dir.mkdir(exist_ok=True) - all_arguments = { - # Dataset Params - "mask_prob": mask_prob, - "random_replace_prob": random_replace_prob, - "unmask_replace_prob": unmask_replace_prob, - "max_seq_length": max_seq_length, - "tokenizer": tokenizer, - # Model Params - "num_layers": num_layers, - "num_heads": num_heads, - "ff_dim": ff_dim, - "h_dim": h_dim, - "dropout": dropout, - # Training Params - "batch_size": batch_size, - "num_iterations": num_iterations, - "checkpoint_every": checkpoint_every, - } - exp_dir = create_experiment_dir(checkpoint_dir, all_arguments) - log_dist( - f"Experiment Directory created at {exp_dir}", ranks=[0], level=logging.INFO - ) - else: - log_dist("Loading from Experiment Directory", ranks=[0], level=logging.INFO) - load_checkpoint_dir = pathlib.Path(load_checkpoint_dir) - assert load_checkpoint_dir.exists() - with (load_checkpoint_dir / "hparams.json").open("r") as handle: - hparams = json.load(handle) - # Set the hparams - # Dataset Params - mask_prob = hparams.get("mask_prob", mask_prob) - tokenizer = hparams.get("tokenizer", tokenizer) - random_replace_prob = hparams.get("random_replace_prob", random_replace_prob) - unmask_replace_prob = hparams.get("unmask_replace_prob", unmask_replace_prob) - max_seq_length = hparams.get("max_seq_length", max_seq_length) - # Model Params - ff_dim = hparams.get("ff_dim", ff_dim) - h_dim = hparams.get("h_dim", h_dim) - dropout = hparams.get("dropout", dropout) - num_layers = hparams.get("num_layers", num_layers) - num_heads = hparams.get("num_heads", num_heads) - # Training Params - batch_size = hparams.get("batch_size", batch_size) - _num_iterations = hparams.get("num_iterations", num_iterations) - num_iterations = max(num_iterations, _num_iterations) - checkpoint_every = hparams.get("checkpoint_every", checkpoint_every) - exp_dir = load_checkpoint_dir - # Tensorboard writer - if is_rank_0(): - tb_dir = exp_dir / "tb_dir" - assert tb_dir.exists() - summary_writer = SummaryWriter(log_dir=tb_dir) - # Create Datasets - log_dist("Creating Datasets", ranks=[0], level=logging.INFO) - data_iterator = create_data_iterator( - mask_prob=mask_prob, - random_replace_prob=random_replace_prob, - unmask_replace_prob=unmask_replace_prob, - tokenizer=tokenizer, - max_seq_length=max_seq_length, - batch_size=batch_size, - ) - log_dist("Dataset Creation Done", ranks=[0], level=logging.INFO) - # Create Model - log_dist("Creating Model", ranks=[0], level=logging.INFO) - model = create_model( - num_layers=num_layers, - num_heads=num_heads, - ff_dim=ff_dim, - h_dim=h_dim, - dropout=dropout, - ) - log_dist("Model Creation Done", ranks=[0], level=logging.INFO) - # DeepSpeed engine - log_dist("Creating DeepSpeed engine", ranks=[0], level=logging.INFO) - ds_config = { - "train_micro_batch_size_per_gpu": batch_size, - "optimizer": {"type": "Adam", "params": {"lr": 1e-4}}, - "fp16": {"enabled": True}, - "zero_optimization": {"stage": 1, "offload_optimizer": {"device": "cpu"}}, - } - model, _, _, _ = deepspeed.initialize( - model=model, model_parameters=model.parameters(), config=ds_config - ) - log_dist("DeepSpeed engine created", ranks=[0], level=logging.INFO) - # Load Model checkpoint - start_step = 1 - if load_checkpoint_dir is not None: - _, client_state = model.load_checkpoint(load_dir=load_checkpoint_dir) - checkpoint_step = client_state["checkpoint_step"] - start_step = checkpoint_step + 1 - - # The Training Loop - log_dist( - f"Total number of model parameters: {sum([p.numel() for p in model.parameters()]):,d}", - ranks=[0], - level=logging.INFO, - ) - model.train() - losses = [] - for step, batch in enumerate(data_iterator, start=start_step): - if step >= num_iterations: - break - # Move the tensors to device - for key, value in batch.items(): - batch[key] = value.to(device) - # Forward pass - loss = model(**batch) - # Backward pass - model.backward(loss) - # Optimizer Step - model.step() - losses.append(loss.item()) - if step % log_every == 0: - log_dist( - "Loss: {0:.4f}".format(np.mean(losses)), ranks=[0], level=logging.INFO - ) - if is_rank_0(): - summary_writer.add_scalar("Train/loss", np.mean(losses), step) - if step % checkpoint_every == 0: - model.save_checkpoint( - save_dir=exp_dir, client_state={"checkpoint_step": step} - ) - log_dist( - "Saved model to {0}".format(exp_dir), ranks=[0], level=logging.INFO - ) - # Save the last checkpoint if not saved yet - if step % checkpoint_every != 0: - model.save_checkpoint(save_dir=exp_dir, client_state={"checkpoint_step": step}) - log_dist("Saved model to {0}".format(exp_dir), ranks=[0], level=logging.INFO) - - return exp_dir - - -if __name__ == "__main__": - torch.manual_seed(42) - np.random.seed(0) - random.seed(0) - fire.Fire(train) diff --git a/examples/pytorch/elastic/echo/Dockerfile b/examples/pytorch/elastic/echo/Dockerfile deleted file mode 100644 index 1a663d1b45..0000000000 --- a/examples/pytorch/elastic/echo/Dockerfile +++ /dev/null @@ -1,8 +0,0 @@ -FROM python:3.8-buster -WORKDIR /workspace -RUN pip install torch==1.13.1 numpy -# TODO Replace this with the PIP version when available -ADD examples/pytorch/elastic/echo/echo.py echo.py -ENV PYTHONPATH /workspace -ENV ALLOW_NONE_AUTHENTICATION yes -ENTRYPOINT ["python", "-m", "torch.distributed.run"] diff --git a/examples/pytorch/elastic/echo/echo.py b/examples/pytorch/elastic/echo/echo.py deleted file mode 100644 index d72e154496..0000000000 --- a/examples/pytorch/elastic/echo/echo.py +++ /dev/null @@ -1,42 +0,0 @@ -#!/usr/bin/env python3 -import io -import os -import pprint -import sys - -import torch.distributed as dist - -if __name__ == "__main__": - env_dict = { - k: os.environ[k] - for k in ( - "LOCAL_RANK", - "RANK", - "GROUP_RANK", - "WORLD_SIZE", - "MASTER_ADDR", - "MASTER_PORT", - "TORCHELASTIC_RESTART_COUNT", - "TORCHELASTIC_MAX_RESTARTS", - ) - } - - with io.StringIO() as buff: - print("======================================================", file=buff) - print( - f"Environment variables set by the agent on PID {os.getpid()}:", file=buff - ) - pprint.pprint(env_dict, stream=buff) - print("======================================================", file=buff) - print(buff.getvalue()) - sys.stdout.flush() - - dist.init_process_group(backend="gloo") - dist.barrier() - - print( - ( - f"On PID {os.getpid()}, after init process group, " - f"rank={dist.get_rank()}, world_size = {dist.get_world_size()}\n" - ) - ) diff --git a/examples/pytorch/elastic/echo/echo.yaml b/examples/pytorch/elastic/echo/echo.yaml deleted file mode 100644 index ee51ec8418..0000000000 --- a/examples/pytorch/elastic/echo/echo.yaml +++ /dev/null @@ -1,28 +0,0 @@ -apiVersion: "kubeflow.org/v1" -kind: PyTorchJob -metadata: - name: elastic-example-echo -spec: - elasticPolicy: - rdzvBackend: c10d - minReplicas: 1 - maxReplicas: 2 - maxRestarts: 100 - pytorchReplicaSpecs: - Worker: - replicas: 2 - template: - spec: - containers: - - name: pytorch - image: kubeflow/pytorch-elastic-example-echo:latest - imagePullPolicy: IfNotPresent - env: - - name: LOGLEVEL - value: DEBUG - command: - - python - - -m - - torch.distributed.run - - --rdzv_backend=c10d - - ./echo.py diff --git a/examples/pytorch/elastic/etcd.yaml b/examples/pytorch/elastic/etcd.yaml deleted file mode 100644 index edb3bb1e9d..0000000000 --- a/examples/pytorch/elastic/etcd.yaml +++ /dev/null @@ -1,74 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: etcd-client -spec: - ports: - - name: etcd-client-port - port: 2379 - protocol: TCP - targetPort: 2379 - selector: - app: etcd - ---- - -apiVersion: v1 -kind: Pod -metadata: - labels: - app: etcd - etcd_node: etcd-server - name: etcd-server -spec: - containers: - - command: - - /usr/local/bin/etcd - - --data-dir - - /var/lib/etcd - - --enable-v2 - - --name - - etcd-server - - --initial-advertise-peer-urls - - http://etcd-server:2380 - - --listen-peer-urls - - http://0.0.0.0:2380 - - --listen-client-urls - - http://0.0.0.0:2379 - - --advertise-client-urls - - http://etcd-server:2379 - - --initial-cluster - - etcd-server=http://etcd-server:2380 - - --initial-cluster-state - - new - image: quay.io/coreos/etcd:latest - name: etcd-server - ports: - - containerPort: 2379 - name: client - protocol: TCP - - containerPort: 2380 - name: server - protocol: TCP - restartPolicy: Always - ---- - -apiVersion: v1 -kind: Service -metadata: - labels: - etcd_node: etcd-server - name: etcd-server -spec: - ports: - - name: client - port: 2379 - protocol: TCP - targetPort: 2379 - - name: server - port: 2380 - protocol: TCP - targetPort: 2380 - selector: - etcd_node: etcd-server diff --git a/examples/pytorch/elastic/imagenet/.dockerignore b/examples/pytorch/elastic/imagenet/.dockerignore deleted file mode 100644 index 1269488f7f..0000000000 --- a/examples/pytorch/elastic/imagenet/.dockerignore +++ /dev/null @@ -1 +0,0 @@ -data diff --git a/examples/pytorch/elastic/imagenet/Dockerfile b/examples/pytorch/elastic/imagenet/Dockerfile deleted file mode 100644 index 01a7aae532..0000000000 --- a/examples/pytorch/elastic/imagenet/Dockerfile +++ /dev/null @@ -1,20 +0,0 @@ -# We need to use the nvcr.io/nvidia/pytorch image as a base image to support both linux/amd64 and linux_arm64 platforms. -# PyTorch=2.2.0, cuda=12.3.2 -# Ref: https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-24-01.html#rel-24-01 -ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:24.01-py3 -FROM $BASE_IMAGE - -WORKDIR /workspace - -# download imagenet tiny for data -RUN apt-get -q update && apt-get -q install -y wget unzip -RUN wget -q http://cs231n.stanford.edu/tiny-imagenet-200.zip && unzip -q tiny-imagenet-200.zip -d data && rm tiny-imagenet-200.zip - -# install dependent library -RUN pip install --upgrade pip && pip install python-etcd - -COPY examples/pytorch/elastic/imagenet/ ./examples - -USER root -ENTRYPOINT ["python", "-m", "torch.distributed.run"] -CMD ["--help"] diff --git a/examples/pytorch/elastic/imagenet/imagenet.py b/examples/pytorch/elastic/imagenet/imagenet.py deleted file mode 100644 index ee540ae10f..0000000000 --- a/examples/pytorch/elastic/imagenet/imagenet.py +++ /dev/null @@ -1,578 +0,0 @@ -#!/usr/bin/env python3 - -# Copyright (c) Facebook, Inc. and its affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -r""" -Source: `pytorch imagenet example `_ # noqa B950 - -Modified and simplified to make the original pytorch example compatible with -torchelastic.distributed.launch. - -Changes: - -1. Removed ``rank``, ``gpu``, ``multiprocessing-distributed``, ``dist_url`` options. - These are obsolete parameters when using ``torchelastic.distributed.launch``. - -2. Removed ``seed``, ``evaluate``, ``pretrained`` options for simplicity. - -3. Removed ``resume``, ``start-epoch`` options. - Loads the most recent checkpoint by default. - -4. ``batch-size`` is now per GPU (worker) batch size rather than for all GPUs. - -5. Defaults ``workers`` (num data loader workers) to ``0``. - -Usage - -:: - - >>> python -m torchelastic.distributed.launch - --nnodes=$NUM_NODES - --nproc_per_node=$WORKERS_PER_NODE - --rdzv_id=$JOB_ID - --rdzv_backend=etcd - --rdzv_endpoint=$ETCD_HOST:$ETCD_PORT - main.py - --arch resnet18 - --epochs 20 - --batch-size 32 - -""" - -import argparse -import io -import os -import shutil -import time -from contextlib import contextmanager -from datetime import timedelta -from typing import List, Tuple - -import numpy -import torch -import torch.distributed as dist -import torch.nn as nn -import torch.nn.parallel -import torch.optim -import torch.utils.data -import torch.utils.data.distributed -import torchvision.datasets as datasets -import torchvision.models as models -import torchvision.transforms as transforms -from torch.distributed.elastic.multiprocessing.errors import record -from torch.distributed.elastic.utils.data import ElasticDistributedSampler -from torch.nn.parallel import DistributedDataParallel -from torch.optim import SGD -from torch.utils.data import DataLoader - -model_names = sorted( - name - for name in models.__dict__ - if name.islower() and not name.startswith("__") and callable(models.__dict__[name]) -) - -parser = argparse.ArgumentParser(description="PyTorch Elastic ImageNet Training") -parser.add_argument("data", metavar="DIR", help="path to dataset") -parser.add_argument( - "-a", - "--arch", - metavar="ARCH", - default="resnet18", - choices=model_names, - help="model architecture: " + " | ".join(model_names) + " (default: resnet18)", -) -parser.add_argument( - "-j", - "--workers", - default=0, - type=int, - metavar="N", - help="number of data loading workers", -) -parser.add_argument( - "--epochs", default=90, type=int, metavar="N", help="number of total epochs to run" -) -parser.add_argument( - "-b", - "--batch-size", - default=32, - type=int, - metavar="N", - help="mini-batch size (default: 32), per worker (GPU)", -) -parser.add_argument( - "--lr", - "--learning-rate", - default=0.1, - type=float, - metavar="LR", - help="initial learning rate", - dest="lr", -) -parser.add_argument("--momentum", default=0.9, type=float, metavar="M", help="momentum") -parser.add_argument( - "--wd", - "--weight-decay", - default=1e-4, - type=float, - metavar="W", - help="weight decay (default: 1e-4)", - dest="weight_decay", -) -parser.add_argument( - "-p", - "--print-freq", - default=10, - type=int, - metavar="N", - help="print frequency (default: 10)", -) -parser.add_argument( - "--dist-backend", - default="gloo", - choices=["nccl", "gloo"], - type=str, - help="distributed backend", -) -parser.add_argument( - "--checkpoint-file", - default="/tmp/checkpoint.pth.tar", - type=str, - help="checkpoint file path, to load and save to", -) - - -@record -def main(): - args = parser.parse_args() - device = torch.device("cpu") - - dist.init_process_group( - backend=args.dist_backend, init_method="env://", timeout=timedelta(seconds=10) - ) - - model, criterion, optimizer = initialize_model( - args.arch, args.lr, args.momentum, args.weight_decay, device - ) - - train_loader, val_loader = initialize_data_loader( - args.data, args.batch_size, args.workers - ) - - # resume from checkpoint if one exists; - state = load_checkpoint(args.checkpoint_file, args.arch, model, optimizer) - - start_epoch = state.epoch + 1 - print(f"=> start_epoch: {start_epoch}, best_acc1: {state.best_acc1}") - - print_freq = args.print_freq - for epoch in range(start_epoch, args.epochs): - state.epoch = epoch - train_loader.batch_sampler.sampler.set_epoch(epoch) - adjust_learning_rate(optimizer, epoch, args.lr) - - # train for one epoch - train(train_loader, model, criterion, optimizer, epoch, print_freq) - - # evaluate on validation set - acc1 = validate(val_loader, model, criterion, print_freq) - - # remember best acc@1 and save checkpoint - is_best = acc1 > state.best_acc1 - state.best_acc1 = max(acc1, state.best_acc1) - - save_checkpoint(state, is_best, args.checkpoint_file) - - -class State: - """ - Container for objects that we want to checkpoint. Represents the - current "state" of the worker. This object is mutable. - """ - - def __init__(self, arch, model, optimizer): - self.epoch = -1 - self.best_acc1 = 0 - self.arch = arch - self.model = model - self.optimizer = optimizer - - def capture_snapshot(self): - """ - Essentially a ``serialize()`` function, returns the state as an - object compatible with ``torch.save()``. The following should work - :: - - snapshot = state_0.capture_snapshot() - state_1.apply_snapshot(snapshot) - assert state_0 == state_1 - """ - return { - "epoch": self.epoch, - "best_acc1": self.best_acc1, - "arch": self.arch, - "state_dict": self.model.state_dict(), - "optimizer": self.optimizer.state_dict(), - } - - def apply_snapshot(self, obj): - """ - The complimentary function of ``capture_snapshot()``. Applies the - snapshot object that was returned by ``capture_snapshot()``. - This function mutates this state object. - """ - - self.epoch = obj["epoch"] - self.best_acc1 = obj["best_acc1"] - self.state_dict = obj["state_dict"] - self.model.load_state_dict(obj["state_dict"]) - self.optimizer.load_state_dict(obj["optimizer"]) - - def save(self, f): - torch.save(self.capture_snapshot(), f) - - def load(self, f): - # Map model to be loaded to specified single gpu. - snapshot = torch.load(f) - self.apply_snapshot(snapshot) - - -def initialize_model( - arch: str, lr: float, momentum: float, weight_decay: float, device -): - print(f"=> creating model: {arch}") - model = models.__dict__[arch]() - # For multiprocessing distributed, DistributedDataParallel constructor - # should always set the single device scope, otherwise, - # DistributedDataParallel will use all available devices. - model.to(device) - model = nn.parallel.DistributedDataParallel(model) - # define loss function (criterion) and optimizer - criterion = nn.CrossEntropyLoss() - optimizer = SGD( - model.parameters(), lr, momentum=momentum, weight_decay=weight_decay - ) - return model, criterion, optimizer - - -def initialize_data_loader( - data_dir, batch_size, num_data_workers -) -> Tuple[DataLoader, DataLoader]: - traindir = os.path.join(data_dir, "train") - valdir = os.path.join(data_dir, "val") - normalize = transforms.Normalize( - mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] - ) - train_dataset = datasets.ImageFolder( - traindir, - transforms.Compose( - [ - transforms.RandomResizedCrop(224), - transforms.RandomHorizontalFlip(), - transforms.ToTensor(), - normalize, - ] - ), - ) - train_sampler = ElasticDistributedSampler(train_dataset) - train_loader = DataLoader( - train_dataset, - batch_size=batch_size, - num_workers=num_data_workers, - # pin_memory=True, - sampler=train_sampler, - ) - val_loader = DataLoader( - datasets.ImageFolder( - valdir, - transforms.Compose( - [ - transforms.Resize(256), - transforms.CenterCrop(224), - transforms.ToTensor(), - normalize, - ] - ), - ), - batch_size=batch_size, - shuffle=False, - num_workers=num_data_workers, - # pin_memory=True, - ) - return train_loader, val_loader - - -def load_checkpoint( - checkpoint_file: str, - arch: str, - model: DistributedDataParallel, - optimizer, # SGD -) -> State: - """ - Loads a local checkpoint (if any). Otherwise, checks to see if any of - the neighbors have a non-zero state. If so, restore the state - from the rank that has the most up-to-date checkpoint. - - .. note:: when your job has access to a globally visible persistent storage - (e.g. nfs mount, S3) you can simply have all workers load - from the most recent checkpoint from such storage. Since this - example is expected to run on vanilla hosts (with no shared - storage) the checkpoints are written to local disk, hence - we have the extra logic to broadcast the checkpoint from a - surviving node. - """ - - state = State(arch, model, optimizer) - - if os.path.isfile(checkpoint_file): - print(f"=> loading checkpoint file: {checkpoint_file}") - state.load(checkpoint_file) - print(f"=> loaded checkpoint file: {checkpoint_file}") - - # logic below is unnecessary when the checkpoint is visible on all nodes! - # create a temporary cpu pg to broadcast most up-to-date checkpoint - with tmp_process_group(backend="gloo") as pg: - rank = dist.get_rank(group=pg) - - # get rank that has the largest state.epoch - epochs = torch.zeros(dist.get_world_size(), dtype=torch.int32) - epochs[rank] = state.epoch - dist.all_reduce(epochs, op=dist.ReduceOp.SUM, group=pg) - t_max_epoch, t_max_rank = torch.max(epochs, dim=0) - max_epoch = t_max_epoch.item() - max_rank = t_max_rank.item() - - # max_epoch == -1 means no one has checkpointed return base state - if max_epoch == -1: - print("=> no workers have checkpoints, starting from epoch 0") - return state - - # broadcast the state from max_rank (which has the most up-to-date state) - # pickle the snapshot, convert it into a byte-blob tensor - # then broadcast it, unpickle it and apply the snapshot - print(f"=> using checkpoint from rank: {max_rank}, max_epoch: {max_epoch}") - - with io.BytesIO() as f: - torch.save(state.capture_snapshot(), f) - raw_blob = numpy.frombuffer(f.getvalue(), dtype=numpy.uint8) - - blob_len = torch.tensor(len(raw_blob)) - dist.broadcast(blob_len, src=max_rank, group=pg) - print(f"=> checkpoint broadcast size is: {blob_len}") - - if rank != max_rank: - blob = torch.zeros(blob_len.item(), dtype=torch.uint8) - else: - blob = torch.as_tensor(raw_blob, dtype=torch.uint8) - - dist.broadcast(blob, src=max_rank, group=pg) - print("=> done broadcasting checkpoint") - - if rank != max_rank: - with io.BytesIO(blob.numpy()) as f: - snapshot = torch.load(f) - state.apply_snapshot(snapshot) - - # wait till everyone has loaded the checkpoint - dist.barrier(group=pg) - - print("=> done restoring from previous checkpoint") - return state - - -@contextmanager -def tmp_process_group(backend): - cpu_pg = dist.new_group(backend=backend) - try: - yield cpu_pg - finally: - dist.destroy_process_group(cpu_pg) - - -def save_checkpoint(state: State, is_best: bool, filename: str): - checkpoint_dir = os.path.dirname(filename) - os.makedirs(checkpoint_dir, exist_ok=True) - - # save to tmp, then commit by moving the file in case the job - # gets interrupted while writing the checkpoint - tmp_filename = filename + ".tmp" - torch.save(state.capture_snapshot(), tmp_filename) - os.rename(tmp_filename, filename) - print(f"=> saved checkpoint for epoch {state.epoch} at {filename}") - if is_best: - best = os.path.join(checkpoint_dir, "model_best.pth.tar") - print(f"=> best model found at epoch {state.epoch} saving to {best}") - shutil.copyfile(filename, best) - - -def train( - train_loader: DataLoader, - model: DistributedDataParallel, - criterion, # nn.CrossEntropyLoss - optimizer, # SGD, - epoch: int, - print_freq: int, -): - batch_time = AverageMeter("Time", ":6.3f") - data_time = AverageMeter("Data", ":6.3f") - losses = AverageMeter("Loss", ":.4e") - top1 = AverageMeter("Acc@1", ":6.2f") - top5 = AverageMeter("Acc@5", ":6.2f") - progress = ProgressMeter( - len(train_loader), - [batch_time, data_time, losses, top1, top5], - prefix="Epoch: [{}]".format(epoch), - ) - - # switch to train mode - model.train() - - end = time.time() - for i, (images, target) in enumerate(train_loader): - # measure data loading time - data_time.update(time.time() - end) - - # compute output - output = model(images) - loss = criterion(output, target) - - # measure accuracy and record loss - acc1, acc5 = accuracy(output, target, topk=(1, 5)) - losses.update(loss.item(), images.size(0)) - top1.update(acc1[0], images.size(0)) - top5.update(acc5[0], images.size(0)) - - # compute gradient and do SGD step - optimizer.zero_grad() - loss.backward() - optimizer.step() - - # measure elapsed time - batch_time.update(time.time() - end) - end = time.time() - - if i % print_freq == 0: - progress.display(i) - - -def validate( - val_loader: DataLoader, - model: DistributedDataParallel, - criterion, # nn.CrossEntropyLoss - print_freq: int, -): - batch_time = AverageMeter("Time", ":6.3f") - losses = AverageMeter("Loss", ":.4e") - top1 = AverageMeter("Acc@1", ":6.2f") - top5 = AverageMeter("Acc@5", ":6.2f") - progress = ProgressMeter( - len(val_loader), [batch_time, losses, top1, top5], prefix="Test: " - ) - - # switch to evaluate mode - model.eval() - - with torch.no_grad(): - end = time.time() - for i, (images, target) in enumerate(val_loader): - # compute output - output = model(images) - loss = criterion(output, target) - - # measure accuracy and record loss - acc1, acc5 = accuracy(output, target, topk=(1, 5)) - losses.update(loss.item(), images.size(0)) - top1.update(acc1[0], images.size(0)) - top5.update(acc5[0], images.size(0)) - - # measure elapsed time - batch_time.update(time.time() - end) - end = time.time() - - if i % print_freq == 0: - progress.display(i) - - # TODO: this should also be done with the ProgressMeter - print( - " * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}".format(top1=top1, top5=top5) - ) - - return top1.avg - - -class AverageMeter(object): - """Computes and stores the average and current value""" - - def __init__(self, name: str, fmt: str = ":f"): - self.name = name - self.fmt = fmt - self.reset() - - def reset(self) -> None: - self.val = 0 - self.avg = 0 - self.sum = 0 - self.count = 0 - - def update(self, val, n=1) -> None: - self.val = val - self.sum += val * n - self.count += n - self.avg = self.sum / self.count - - def __str__(self): - fmtstr = "{name} {val" + self.fmt + "} ({avg" + self.fmt + "})" - return fmtstr.format(**self.__dict__) - - -class ProgressMeter(object): - def __init__(self, num_batches: int, meters: List[AverageMeter], prefix: str = ""): - self.batch_fmtstr = self._get_batch_fmtstr(num_batches) - self.meters = meters - self.prefix = prefix - - def display(self, batch: int) -> None: - entries = [self.prefix + self.batch_fmtstr.format(batch)] - entries += [str(meter) for meter in self.meters] - print("\t".join(entries)) - - def _get_batch_fmtstr(self, num_batches: int) -> str: - num_digits = len(str(num_batches // 1)) - fmt = "{:" + str(num_digits) + "d}" - return "[" + fmt + "/" + fmt.format(num_batches) + "]" - - -def adjust_learning_rate(optimizer, epoch: int, lr: float) -> None: - """ - Sets the learning rate to the initial LR decayed by 10 every 30 epochs - """ - learning_rate = lr * (0.1 ** (epoch // 30)) - for param_group in optimizer.param_groups: - param_group["lr"] = learning_rate - - -def accuracy(output, target, topk=(1,)): - """ - Computes the accuracy over the k top predictions for the specified values of k - """ - with torch.no_grad(): - maxk = max(topk) - batch_size = target.size(0) - - _, pred = output.topk(maxk, 1, True, True) - pred = pred.t() - correct = pred.eq(target.view(1, -1).expand_as(pred)) - - res = [] - for k in topk: - correct_k = correct[:k].reshape(1, -1).view(-1).float().sum(0, keepdim=True) - res.append(correct_k.mul_(100.0 / batch_size)) - return res - - -if __name__ == "__main__": - main() diff --git a/examples/pytorch/elastic/imagenet/imagenet.yaml b/examples/pytorch/elastic/imagenet/imagenet.yaml deleted file mode 100644 index b52b2c2594..0000000000 --- a/examples/pytorch/elastic/imagenet/imagenet.yaml +++ /dev/null @@ -1,43 +0,0 @@ -apiVersion: "kubeflow.org/v1" -kind: PyTorchJob -metadata: - name: elastic-example-imagenet -spec: - elasticPolicy: - rdzvBackend: c10d - minReplicas: 1 - maxReplicas: 3 - maxRestarts: 100 - metrics: - - type: Resource - resource: - name: cpu - target: - type: Utilization - averageUtilization: 80 - pytorchReplicaSpecs: - Worker: - replicas: 2 - restartPolicy: OnFailure - template: - spec: - containers: - - name: pytorch - image: kubeflow/pytorch-elastic-example-imagenet:latest - imagePullPolicy: IfNotPresent - resources: - requests: - cpu: 4 - env: - - name: LOGLEVEL - value: DEBUG - command: - - python - - -m - - torch.distributed.run - - /workspace/examples/imagenet.py - - "--arch=resnet18" - - "--epochs=1" - - "--batch-size=32" - - "--workers=0" - - "/workspace/data/tiny-imagenet-200" diff --git a/examples/pytorch/fsdp/fine-tune-t5-with-fsdp.ipynb b/examples/pytorch/fsdp/fine-tune-t5-with-fsdp.ipynb deleted file mode 100644 index cc1fab15bc..0000000000 --- a/examples/pytorch/fsdp/fine-tune-t5-with-fsdp.ipynb +++ /dev/null @@ -1,509 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Fine-Tune T5 Model with PyTorchJob and FSDP" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This Notebook will fine-tune Text-to-Text Transfer Transformer (T5) with Wikihow dataset for text summarization using Kubeflow PyTorchJob.\n", - "\n", - "Pretrained T5 model: https://huggingface.co/google-t5/t5-base\n", - "\n", - "Wikihow dataset: https://github.com/mahnazkoupaee/WikiHow-Dataset\n", - "\n", - "This Notebook will use **4** GPUs to fine-tune T5 model on 2 Nodes. This example is based on [the official PyTorch FSDP tutorial](https://pytorch.org/tutorials/intermediate/FSDP_adavnced_tutorial.html)." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## FSDP with multi-node multi-worker training" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This Notebook demonstrates multi-node, multi-worker distributed training with Fully Sharded Data Parallel (FSDP) and PyTorchJob.\n", - "\n", - "When a model is trained with FSDP, the GPU memory footprint is smaller compare to Distributed Data Parallel (DDP),\n", - "as the model parameters are sharded across GPU devices.\n", - "\n", - "This enables training of very large models that would otherwise be impossible to fit on a single GPU device.\n", - "\n", - "Check this guide to learn more about PyTorch FSDP: https://pytorch.org/tutorials/intermediate/FSDP_tutorial.html\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "tags": [] - }, - "source": [ - "## Install the required packages\n", - "\n", - "Install the Kubeflow Training Python SDK." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# TODO (andreyvelich): Use the release version of SDK.\n", - "!pip install git+https://github.com/kubeflow/training-operator.git#subdirectory=sdk/python" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Create script to fine-tune T5 using FSDP\n", - "\n", - "We need to wrap our fine-tuning script in a function to create Kubeflow PyTorchJob." - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "def train_function(parameters):\n", - " import os\n", - " import time\n", - " import functools\n", - "\n", - " import torch\n", - " import torch.distributed as dist\n", - " from torch.utils.data.distributed import DistributedSampler\n", - " from torch.distributed.fsdp import FullyShardedDataParallel as FSDP\n", - " from torch.distributed.fsdp.wrap import transformer_auto_wrap_policy\n", - "\n", - " from transformers import T5Tokenizer, T5ForConditionalGeneration\n", - " from transformers.models.t5.modeling_t5 import T5Block\n", - " from datasets import Dataset\n", - "\n", - " # [1] Setup PyTorch distributed and get the distributed parameters.\n", - " dist.init_process_group(\"nccl\")\n", - " local_rank = int(os.environ[\"LOCAL_RANK\"])\n", - " rank = dist.get_rank()\n", - " world_size = dist.get_world_size()\n", - "\n", - " # Local rank identifies the GPU number inside the pod.\n", - " torch.cuda.set_device(local_rank)\n", - "\n", - " print(\n", - " f\"FSDP Training for WORLD_SIZE: {world_size}, RANK: {rank}, LOCAL_RANK: {local_rank}\"\n", - " )\n", - "\n", - " # [2] Prepare the Wikihow dataset\n", - " class wikihow(torch.utils.data.Dataset):\n", - " def __init__(\n", - " self,\n", - " tokenizer,\n", - " num_samples,\n", - " input_length,\n", - " output_length,\n", - " ):\n", - "\n", - " self.dataset = Dataset.from_csv(parameters[\"DATASET_URL\"])\n", - " self.dataset = self.dataset.select(list(range(0, num_samples)))\n", - " self.input_length = input_length\n", - " self.tokenizer = tokenizer\n", - " self.output_length = output_length\n", - "\n", - " def __len__(self):\n", - " return self.dataset.shape[0]\n", - "\n", - " def clean_text(self, text):\n", - " # Dataset contains empty values.\n", - " if text is None:\n", - " return \"\"\n", - " text = text.replace(\"Example of text:\", \"\")\n", - " text = text.replace(\"Example of Summary:\", \"\")\n", - " text = text.replace(\"\\n\", \"\")\n", - " text = text.replace(\"``\", \"\")\n", - " text = text.replace('\"', \"\")\n", - "\n", - " return text\n", - "\n", - " def convert_to_features(self, example_batch):\n", - " # Tokenize text and headline (as pairs of inputs).\n", - " input_ = self.clean_text(example_batch[\"text\"])\n", - " target_ = self.clean_text(example_batch[\"headline\"])\n", - "\n", - " source = self.tokenizer.batch_encode_plus(\n", - " [input_],\n", - " max_length=self.input_length,\n", - " padding=\"max_length\",\n", - " truncation=True,\n", - " return_tensors=\"pt\",\n", - " )\n", - "\n", - " targets = self.tokenizer.batch_encode_plus(\n", - " [target_],\n", - " max_length=self.output_length,\n", - " padding=\"max_length\",\n", - " truncation=True,\n", - " return_tensors=\"pt\",\n", - " )\n", - "\n", - " return source, targets\n", - "\n", - " def __getitem__(self, index):\n", - " source, targets = self.convert_to_features(self.dataset[index])\n", - "\n", - " source_ids = source[\"input_ids\"].squeeze()\n", - " target_ids = targets[\"input_ids\"].squeeze()\n", - "\n", - " src_mask = source[\"attention_mask\"].squeeze()\n", - " target_mask = targets[\"attention_mask\"].squeeze()\n", - "\n", - " return {\n", - " \"source_ids\": source_ids,\n", - " \"source_mask\": src_mask,\n", - " \"target_ids\": target_ids,\n", - " \"target_mask\": target_mask,\n", - " }\n", - "\n", - " # [3] Get the T5 pre-trained model and tokenizer.\n", - " # Since this script is run by multiple workers, we should print results only for the worker with RANK=0.\n", - " if rank == 0:\n", - " print(f\"Downloading the {parameters['MODEL_NAME']} model\")\n", - "\n", - " model = T5ForConditionalGeneration.from_pretrained(parameters[\"MODEL_NAME\"])\n", - " tokenizer = T5Tokenizer.from_pretrained(parameters[\"MODEL_NAME\"])\n", - "\n", - " # [4] Download the Wikihow dataset.\n", - " if rank == 0:\n", - " print(\"Downloading the Wikihow dataset\")\n", - "\n", - " dataset = wikihow(tokenizer, 1500, 512, 150)\n", - " train_loader = torch.utils.data.DataLoader(\n", - " dataset,\n", - " batch_size=4,\n", - " sampler=DistributedSampler(dataset),\n", - " )\n", - "\n", - " # [5] Setup model with FSDP.\n", - " # Model is on CPU before input to FSDP.\n", - " t5_auto_wrap_policy = functools.partial(\n", - " transformer_auto_wrap_policy,\n", - " transformer_layer_cls={\n", - " T5Block,\n", - " },\n", - " )\n", - " model = FSDP(\n", - " model,\n", - " auto_wrap_policy=t5_auto_wrap_policy,\n", - " device_id=torch.cuda.current_device(),\n", - " )\n", - "\n", - " # [6] Start training.\n", - " optimizer = torch.optim.AdamW(model.parameters(), lr=0.002)\n", - " scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.7)\n", - " t0 = time.time()\n", - " if rank == 0:\n", - " print(\"Training is started...\")\n", - "\n", - " for epoch in range(1, 3):\n", - " model.train()\n", - " fsdp_loss = torch.zeros(2).to(local_rank)\n", - "\n", - " for batch in train_loader:\n", - " for key in batch.keys():\n", - " batch[key] = batch[key].to(local_rank)\n", - "\n", - " optimizer.zero_grad()\n", - "\n", - " output = model(\n", - " input_ids=batch[\"source_ids\"],\n", - " attention_mask=batch[\"source_mask\"],\n", - " labels=batch[\"target_ids\"],\n", - " )\n", - " loss = output[\"loss\"]\n", - " loss.backward()\n", - " optimizer.step()\n", - " fsdp_loss[0] += loss.item()\n", - " fsdp_loss[1] += len(batch)\n", - "\n", - " dist.all_reduce(fsdp_loss, op=dist.ReduceOp.SUM)\n", - " train_accuracy = fsdp_loss[0] / fsdp_loss[1]\n", - "\n", - " if rank == 0:\n", - " print(f\"Train Epoch: \\t{epoch}, Loss: \\t{train_accuracy:.4f}\")\n", - "\n", - " scheduler.step()\n", - "\n", - " dist.barrier()\n", - "\n", - " if rank == 0:\n", - " print(f\"FSDP training time: {int(time.time() - t0)} seconds\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Create Kubeflow PyTorchJob to fine-tune T5 with FSDP\n", - "\n", - "Use `TrainingClient()` to create PyTorchJob which will fine-tine T5 on **2 workers** using **2 GPU** for each worker.\n", - "\n", - "If you don't have enough GPU resources, you can decrease number of workers or number of GPUs per worker." - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "from kubeflow.training import TrainingClient\n", - "\n", - "job_name = \"fsdp-fine-tuning\"\n", - "\n", - "parameters = {\n", - " \"DATASET_URL\": \"https://public-nlp-datasets.s3.us-west-2.amazonaws.com/wikihowAll.csv\",\n", - " \"MODEL_NAME\": \"t5-base\",\n", - "}\n" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# Create the PyTorchJob.\n", - "TrainingClient().create_job(\n", - " name=job_name,\n", - " train_func=train_function,\n", - " parameters=parameters,\n", - " num_workers=2, # You can modify number of workers or number of GPUs.\n", - " num_procs_per_worker=2,\n", - " resources_per_worker={\"gpu\": 2},\n", - " packages_to_install=[\n", - " \"transformers==4.38.2\",\n", - " \"datasets==2.21.0\",\n", - " \"SentencePiece==0.2.0\",\n", - " ],\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "tags": [] - }, - "source": [ - "### Check the PyTorchJob conditions\n", - "\n", - "Use `TrainingClient()` APIs to get information about created PyTorchJob." - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": { - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "PyTorchJob Conditions\n", - "[{'last_transition_time': datetime.datetime(2024, 10, 16, 19, 24, 17, tzinfo=tzutc()),\n", - " 'last_update_time': datetime.datetime(2024, 10, 16, 19, 24, 17, tzinfo=tzutc()),\n", - " 'message': 'PyTorchJob fsdp-fine-tuning is created.',\n", - " 'reason': 'PyTorchJobCreated',\n", - " 'status': 'True',\n", - " 'type': 'Created'}, {'last_transition_time': datetime.datetime(2024, 10, 16, 19, 24, 18, tzinfo=tzutc()),\n", - " 'last_update_time': datetime.datetime(2024, 10, 16, 19, 24, 18, tzinfo=tzutc()),\n", - " 'message': 'PyTorchJob fsdp-fine-tuning is running.',\n", - " 'reason': 'PyTorchJobRunning',\n", - " 'status': 'True',\n", - " 'type': 'Running'}]\n", - "----------------------------------------\n", - "PyTorchJob is running\n" - ] - } - ], - "source": [ - "print(\"PyTorchJob Conditions\")\n", - "print(TrainingClient().get_job_conditions(job_name))\n", - "print(\"-\" * 40)\n", - "\n", - "# Wait until PyTorchJob has the Running condition.\n", - "job = TrainingClient().wait_for_job_conditions(\n", - " job_name,\n", - " expected_conditions={\"Running\"},\n", - ")\n", - "print(\"PyTorchJob is running\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the PyTorchJob pod names\n", - "\n", - "Since we define 2 workers, PyTorchJob will create 1 master pod and 1 worker pod to run FSDP fine-tuning." - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "['fsdp-fine-tuning-master-0', 'fsdp-fine-tuning-worker-0']" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "TrainingClient().get_job_pod_names(job_name)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "execution": { - "iopub.status.busy": "2022-09-01T20:10:25.759950Z", - "iopub.status.idle": "2022-09-01T20:10:25.760581Z", - "shell.execute_reply": "2022-09-01T20:10:25.760353Z", - "shell.execute_reply.started": "2022-09-01T20:10:25.760328Z" - }, - "tags": [] - }, - "source": [ - "### Get the PyTorchJob training logs\n", - "\n", - "Model parameters are sharded across all workers and GPU devices." - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": { - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Pod fsdp-fine-tuning-master-0]: WARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\n", - "[Pod fsdp-fine-tuning-master-0]: [2024-10-16 19:24:47,178] torch.distributed.run: [WARNING] \n", - "[Pod fsdp-fine-tuning-master-0]: [2024-10-16 19:24:47,178] torch.distributed.run: [WARNING] *****************************************\n", - "[Pod fsdp-fine-tuning-master-0]: [2024-10-16 19:24:47,178] torch.distributed.run: [WARNING] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. \n", - "[Pod fsdp-fine-tuning-master-0]: [2024-10-16 19:24:47,178] torch.distributed.run: [WARNING] *****************************************\n", - "[Pod fsdp-fine-tuning-master-0]: FSDP Training for WORLD_SIZE: 4, RANK: 0, LOCAL_RANK: 0\n", - "[Pod fsdp-fine-tuning-master-0]: Downloading the t5-base model\n", - "[Pod fsdp-fine-tuning-master-0]: /opt/conda/lib/python3.10/site-packages/huggingface_hub/file_download.py:1142: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n", - "[Pod fsdp-fine-tuning-master-0]: warnings.warn(\n", - "[Pod fsdp-fine-tuning-master-0]: FSDP Training for WORLD_SIZE: 4, RANK: 1, LOCAL_RANK: 1\n", - "[Pod fsdp-fine-tuning-master-0]: /opt/conda/lib/python3.10/site-packages/huggingface_hub/file_download.py:1142: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n", - "[Pod fsdp-fine-tuning-master-0]: warnings.warn(\n", - "[Pod fsdp-fine-tuning-master-0]: You are using the default legacy behaviour of the . This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565\n", - "[Pod fsdp-fine-tuning-master-0]: You are using the default legacy behaviour of the . This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565\n", - "[Pod fsdp-fine-tuning-master-0]: Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n", - "[Pod fsdp-fine-tuning-master-0]: Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n", - "[Pod fsdp-fine-tuning-master-0]: Downloading the Wikihow dataset\n", - "Downloading data: 100%|██████████| 619M/619M [00:11<00:00, 55.0MB/s] \n", - "Generating train split: 215365 examples [00:08, 26087.53 examples/s]\n", - "[Pod fsdp-fine-tuning-master-0]: Training is started...\n", - "[Pod fsdp-fine-tuning-master-0]: Train Epoch: \t1, Loss: \t0.3802\n", - "[Pod fsdp-fine-tuning-master-0]: Train Epoch: \t2, Loss: \t0.2659\n", - "[Pod fsdp-fine-tuning-master-0]: FSDP training time: 107 seconds\n" - ] - } - ], - "source": [ - "logs, _ = TrainingClient().get_job_logs(job_name, follow=True)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "execution": { - "iopub.execute_input": "2024-03-01T23:44:15.511173Z", - "iopub.status.busy": "2024-03-01T23:44:15.510932Z", - "iopub.status.idle": "2024-03-01T23:44:15.539921Z", - "shell.execute_reply": "2024-03-01T23:44:15.539352Z", - "shell.execute_reply.started": "2024-03-01T23:44:15.511155Z" - }, - "tags": [] - }, - "source": [ - "## Delete the PyTorchJob\n", - "\n", - "You can delete the created PyTorchJob." - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "TrainingClient().delete_job(name=job_name)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.11" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/examples/pytorch/image-classification/Train-CNN-with-FashionMNIST.ipynb b/examples/pytorch/image-classification/Train-CNN-with-FashionMNIST.ipynb deleted file mode 100644 index 002bd8a3fd..0000000000 --- a/examples/pytorch/image-classification/Train-CNN-with-FashionMNIST.ipynb +++ /dev/null @@ -1,658 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "90d43b56-97e5-45e2-8e67-4488ed31d2df", - "metadata": { - "tags": [] - }, - "source": [ - "# Run PyTorchJob From Function\n", - "\n", - "In this Notebook we are going to create [Kubeflow PyTorchJob](https://www.kubeflow.org/docs/components/training/pytorch/).\n", - "\n", - "The PyTorchJob will run distributive training using [DistributedDataParallel strategy](https://pytorch.org/docs/stable/generated/torch.nn.parallel.DistributedDataParallel.html)." - ] - }, - { - "cell_type": "markdown", - "id": "a8bb6564-fde3-4c28-841c-012122643dd9", - "metadata": { - "tags": [] - }, - "source": [ - "## Install Kubeflow Python SDKs\n", - "\n", - "You need to install PyTorch packages and Kubeflow SDKs to run this Notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d49f072e-2221-48bb-9f6d-561713d1a45c", - "metadata": {}, - "outputs": [], - "source": [ - "!pip install torch==2.1.2\n", - "!pip install torchvision==0.19.1\n", - "\n", - "# TODO (andreyvelich): Change to release version when SDK with the new APIs is published.\n", - "!pip install git+https://github.com/kubeflow/training-operator.git#subdirectory=sdk/python" - ] - }, - { - "cell_type": "markdown", - "id": "e9331a05-9127-4b3a-8077-31157e267827", - "metadata": {}, - "source": [ - "## Create Train Script for CNN Model\n", - "\n", - "This is simple **Convolutional Neural Network (CNN)** model for recognizing different picture of clothing using [Fashion MNIST Dataset](https://github.com/zalandoresearch/fashion-mnist)." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "69f21f33-5c64-452c-90c4-977fc0dadb3b", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "def train_pytorch_model(parameters):\n", - " import logging\n", - " import os\n", - "\n", - " import torch\n", - " import torch.distributed as dist\n", - " import torch.nn.functional as F\n", - " from torch import nn\n", - " from torch.utils.data import DistributedSampler\n", - " from torchvision import datasets, transforms\n", - "\n", - " logging.basicConfig(\n", - " format=\"%(asctime)s %(levelname)-8s %(message)s\",\n", - " datefmt=\"%Y-%m-%dT%H:%M:%SZ\",\n", - " level=logging.INFO,\n", - " )\n", - "\n", - " # Create PyTorch CNN Model.\n", - " class Net(nn.Module):\n", - " def __init__(self):\n", - " super(Net, self).__init__()\n", - " self.conv1 = nn.Conv2d(1, 20, 5, 1)\n", - " self.conv2 = nn.Conv2d(20, 50, 5, 1)\n", - " self.fc1 = nn.Linear(4 * 4 * 50, 500)\n", - " self.fc2 = nn.Linear(500, 10)\n", - "\n", - " def forward(self, x):\n", - " x = F.relu(self.conv1(x))\n", - " x = F.max_pool2d(x, 2, 2)\n", - " x = F.relu(self.conv2(x))\n", - " x = F.max_pool2d(x, 2, 2)\n", - " x = x.view(-1, 4 * 4 * 50)\n", - " x = F.relu(self.fc1(x))\n", - " x = self.fc2(x)\n", - " return F.log_softmax(x, dim=1)\n", - "\n", - " # IF GPU is available, nccl dist backend is used. Otherwise, gloo dist backend is used.\n", - " if torch.cuda.is_available():\n", - " device = \"cuda\"\n", - " backend = \"nccl\"\n", - " else:\n", - " device = \"cpu\"\n", - " backend = \"gloo\"\n", - " \n", - " logging.info(f\"Using Device: {device}, Backend: {backend}\")\n", - "\n", - " # Setup PyTorch DDP. Distributed environment will be set automatically by Training Operator.\n", - " dist.init_process_group(backend=backend)\n", - " Distributor = torch.nn.parallel.DistributedDataParallel\n", - " local_rank = int(os.getenv(\"LOCAL_RANK\", 0))\n", - " logging.info(\n", - " \"Distributed Training for WORLD_SIZE: {}, RANK: {}, LOCAL_RANK: {}\".format(\n", - " dist.get_world_size(),\n", - " dist.get_rank(),\n", - " local_rank,\n", - " )\n", - " )\n", - "\n", - " # Attach model to the correct device.\n", - " device = torch.device(f\"{device}:{local_rank}\")\n", - " model = Net().to(device)\n", - " model = Distributor(model)\n", - " optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.5)\n", - "\n", - " # Get Fashion MNIST Dataset.\n", - " dataset = datasets.FashionMNIST(\n", - " \"./data\",\n", - " train=True,\n", - " download=True,\n", - " transform=transforms.Compose([transforms.ToTensor()]),\n", - " )\n", - "\n", - " # Every PyTorchJob worker gets distributed sampler of dataset.\n", - " train_loader = torch.utils.data.DataLoader(\n", - " dataset,\n", - " batch_size=128,\n", - " sampler=DistributedSampler(dataset),\n", - " )\n", - "\n", - " # Start Training.\n", - " logging.info(f\"Start training for RANK: {dist.get_rank()}. WORLD_SIZE: {dist.get_world_size()}\")\n", - "\n", - " for epoch in range(int(parameters[\"NUM_EPOCHS\"])):\n", - " model.train()\n", - "\n", - " for batch_idx, (data, target) in enumerate(train_loader):\n", - " # Attach tensors to the device.\n", - " data = data.to(device)\n", - " target = target.to(device)\n", - "\n", - " optimizer.zero_grad()\n", - " output = model(data)\n", - " loss = F.nll_loss(output, target)\n", - " loss.backward()\n", - " optimizer.step()\n", - " if batch_idx % 10 == 0 and dist.get_rank() == 0:\n", - " logging.info(\n", - " \"Train Epoch: {} [{}/{} ({:.0f}%)]\\tloss={:.4f}\".format(\n", - " epoch,\n", - " batch_idx * len(data),\n", - " len(train_loader.dataset),\n", - " 100.0 * batch_idx / len(train_loader),\n", - " loss.item(),\n", - " )\n", - " )\n", - " if dist.get_rank() == 0:\n", - " logging.info(\"Training is finished\")" - ] - }, - { - "cell_type": "markdown", - "id": "8cfe8739-1f94-476a-80e3-dd6e3237d9ed", - "metadata": { - "execution": { - "iopub.execute_input": "2022-09-01T19:32:37.813779Z", - "iopub.status.busy": "2022-09-01T19:32:37.812759Z", - "iopub.status.idle": "2022-09-01T19:32:37.827050Z", - "shell.execute_reply": "2022-09-01T19:32:37.825186Z", - "shell.execute_reply.started": "2022-09-01T19:32:37.813690Z" - } - }, - "source": [ - "## Run Training Locally in the Notebook\n", - "\n", - "We are going to download Fashion MNIST Dataset and start local training." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "9e2c6fd8-d0ba-4bc6-ac90-d4cf09751ace", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2024-10-08T13:58:29Z INFO Using Device: cpu, Backend: gloo\n", - "2024-10-08T13:58:29Z INFO Distributed Training for WORLD_SIZE: 1, RANK: 0, LOCAL_RANK: 0\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz\n", - "Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to ./data/FashionMNIST/raw/train-images-idx3-ubyte.gz\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 26421880/26421880 [00:02<00:00, 9155631.80it/s] \n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Extracting ./data/FashionMNIST/raw/train-images-idx3-ubyte.gz to ./data/FashionMNIST/raw\n", - "\n", - "Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz\n", - "Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw/train-labels-idx1-ubyte.gz\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 29515/29515 [00:00<00:00, 1364085.84it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Extracting ./data/FashionMNIST/raw/train-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw\n", - "\n", - "Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 4422102/4422102 [00:00<00:00, 8802674.51it/s]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Extracting ./data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST/raw\n", - "\n", - "Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz\n", - "Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 5148/5148 [00:00<00:00, 8424610.61it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Extracting ./data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw\n", - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n", - "2024-10-08T13:58:33Z INFO Start training for RANK: 0. WORLD_SIZE: 1\n", - "2024-10-08T13:58:33Z INFO Train Epoch: 0 [0/60000 (0%)]\tloss=2.3017\n", - "2024-10-08T13:58:33Z INFO Train Epoch: 0 [1280/60000 (2%)]\tloss=2.2850\n", - "2024-10-08T13:58:34Z INFO Train Epoch: 0 [2560/60000 (4%)]\tloss=2.2844\n", - "2024-10-08T13:58:34Z INFO Train Epoch: 0 [3840/60000 (6%)]\tloss=2.2718\n", - "2024-10-08T13:58:34Z INFO Train Epoch: 0 [5120/60000 (9%)]\tloss=2.2489\n", - "2024-10-08T13:58:35Z INFO Train Epoch: 0 [6400/60000 (11%)]\tloss=2.2405\n", - "2024-10-08T13:58:35Z INFO Train Epoch: 0 [7680/60000 (13%)]\tloss=2.2178\n", - "2024-10-08T13:58:35Z INFO Train Epoch: 0 [8960/60000 (15%)]\tloss=2.1755\n", - "2024-10-08T13:58:35Z INFO Train Epoch: 0 [10240/60000 (17%)]\tloss=2.1326\n", - "2024-10-08T13:58:36Z INFO Train Epoch: 0 [11520/60000 (19%)]\tloss=2.0784\n", - "2024-10-08T13:58:36Z INFO Train Epoch: 0 [12800/60000 (21%)]\tloss=1.9585\n", - "2024-10-08T13:58:36Z INFO Train Epoch: 0 [14080/60000 (23%)]\tloss=1.8107\n", - "2024-10-08T13:58:36Z INFO Train Epoch: 0 [15360/60000 (26%)]\tloss=1.6047\n", - "2024-10-08T13:58:37Z INFO Train Epoch: 0 [16640/60000 (28%)]\tloss=1.4722\n", - "2024-10-08T13:58:37Z INFO Train Epoch: 0 [17920/60000 (30%)]\tloss=1.3473\n", - "2024-10-08T13:58:37Z INFO Train Epoch: 0 [19200/60000 (32%)]\tloss=1.2142\n", - "2024-10-08T13:58:38Z INFO Train Epoch: 0 [20480/60000 (34%)]\tloss=1.1584\n", - "2024-10-08T13:58:38Z INFO Train Epoch: 0 [21760/60000 (36%)]\tloss=1.0055\n", - "2024-10-08T13:58:38Z INFO Train Epoch: 0 [23040/60000 (38%)]\tloss=0.9729\n", - "2024-10-08T13:58:38Z INFO Train Epoch: 0 [24320/60000 (41%)]\tloss=1.0776\n", - "2024-10-08T13:58:39Z INFO Train Epoch: 0 [25600/60000 (43%)]\tloss=1.1153\n", - "2024-10-08T13:58:39Z INFO Train Epoch: 0 [26880/60000 (45%)]\tloss=0.9125\n", - "2024-10-08T13:58:39Z INFO Train Epoch: 0 [28160/60000 (47%)]\tloss=1.0451\n", - "2024-10-08T13:58:39Z INFO Train Epoch: 0 [29440/60000 (49%)]\tloss=1.0821\n", - "2024-10-08T13:58:40Z INFO Train Epoch: 0 [30720/60000 (51%)]\tloss=0.7935\n", - "2024-10-08T13:58:40Z INFO Train Epoch: 0 [32000/60000 (53%)]\tloss=1.0418\n", - "2024-10-08T13:58:40Z INFO Train Epoch: 0 [33280/60000 (55%)]\tloss=0.8537\n", - "2024-10-08T13:58:41Z INFO Train Epoch: 0 [34560/60000 (58%)]\tloss=0.8402\n", - "2024-10-08T13:58:41Z INFO Train Epoch: 0 [35840/60000 (60%)]\tloss=0.9968\n", - "2024-10-08T13:58:41Z INFO Train Epoch: 0 [37120/60000 (62%)]\tloss=0.9956\n", - "2024-10-08T13:58:42Z INFO Train Epoch: 0 [38400/60000 (64%)]\tloss=1.0038\n", - "2024-10-08T13:58:42Z INFO Train Epoch: 0 [39680/60000 (66%)]\tloss=0.8188\n", - "2024-10-08T13:58:42Z INFO Train Epoch: 0 [40960/60000 (68%)]\tloss=1.0646\n", - "2024-10-08T13:58:42Z INFO Train Epoch: 0 [42240/60000 (70%)]\tloss=0.6951\n", - "2024-10-08T13:58:43Z INFO Train Epoch: 0 [43520/60000 (72%)]\tloss=0.9384\n", - "2024-10-08T13:58:43Z INFO Train Epoch: 0 [44800/60000 (75%)]\tloss=0.9681\n", - "2024-10-08T13:58:43Z INFO Train Epoch: 0 [46080/60000 (77%)]\tloss=0.7699\n", - "2024-10-08T13:58:44Z INFO Train Epoch: 0 [47360/60000 (79%)]\tloss=0.9631\n", - "2024-10-08T13:58:44Z INFO Train Epoch: 0 [48640/60000 (81%)]\tloss=0.9253\n", - "2024-10-08T13:58:44Z INFO Train Epoch: 0 [49920/60000 (83%)]\tloss=0.6612\n", - "2024-10-08T13:58:45Z INFO Train Epoch: 0 [51200/60000 (85%)]\tloss=0.9142\n", - "2024-10-08T13:58:45Z INFO Train Epoch: 0 [52480/60000 (87%)]\tloss=0.7794\n", - "2024-10-08T13:58:46Z INFO Train Epoch: 0 [53760/60000 (90%)]\tloss=0.8052\n", - "2024-10-08T13:58:46Z INFO Train Epoch: 0 [55040/60000 (92%)]\tloss=0.8630\n", - "2024-10-08T13:58:46Z INFO Train Epoch: 0 [56320/60000 (94%)]\tloss=0.7686\n", - "2024-10-08T13:58:47Z INFO Train Epoch: 0 [57600/60000 (96%)]\tloss=0.7940\n", - "2024-10-08T13:58:47Z INFO Train Epoch: 0 [58880/60000 (98%)]\tloss=0.6830\n", - "2024-10-08T13:58:47Z INFO Training is finished\n" - ] - } - ], - "source": [ - "# Set dist env variables to run the above training locally on the Notebook.\n", - "import os\n", - "\n", - "os.environ[\"RANK\"] = \"0\"\n", - "os.environ[\"LOCAL_RANK\"] = \"0\"\n", - "os.environ[\"WORLD_SIZE\"] = \"1\"\n", - "os.environ[\"MASTER_ADDR\"] = \"localhost\"\n", - "os.environ[\"MASTER_PORT\"] = \"1234\"\n", - "\n", - "# Train Model locally in the Notebook.\n", - "train_pytorch_model({\"NUM_EPOCHS\": \"1\"})" - ] - }, - { - "cell_type": "markdown", - "id": "5aae47e3-be31-468e-8f38-89e1e2f1c764", - "metadata": { - "tags": [] - }, - "source": [ - "## Start Distributive Training with PyTorchJob\n", - "\n", - "Before creating PyTorchJob, you have to create `TrainingClient()`. It uses [Kubernetes Python client](https://github.com/kubernetes-client/python) to communicate with Kubernetes API server. You can set path and context for [the kubeconfig file](https://kubernetes.io/docs/concepts/configuration/organize-cluster-access-kubeconfig/). The default location for the kubeconfig is `~/.kube/config`.\n", - "\n", - "Kubeflow Training Operator automatically set the appropriate env variables (`MASTER_PORT`, `MASTER_ADDR`, `WORLD_SIZE`, `RANK`) for each PyTorchJob container.\n", - "\n", - "PyTorchJob will train model on 3 epochs with 3 PyTorch workers." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "eb1acd34-ebcf-409b-8bb3-0225cee37110", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "from kubeflow.training import TrainingClient, constants\n", - "\n", - "# Start PyTorchJob Training.\n", - "pytorchjob_name = \"train-pytorch\"\n", - "\n", - "# Since we set `job_kind = PyTorchJob` APIs are going to use PyTorchJob as a default Job kind.\n", - "training_client = TrainingClient(job_kind=constants.PYTORCHJOB_KIND)\n", - "\n", - "training_client.create_job(\n", - " name=pytorchjob_name,\n", - " train_func=train_pytorch_model,\n", - " parameters={\"NUM_EPOCHS\": \"3\"}, # Input parameters for the train function.\n", - " num_workers=2, # How many PyTorch Nodes will be created.\n", - " num_procs_per_worker=2, # How many procs per node will be used (e.g. number of CPUs/GPUs in a single Node)\n", - " resources_per_worker={\"cpu\": \"2\"}\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "e44c3ad7-62c4-4b58-b52a-15fd8746b772", - "metadata": {}, - "source": [ - "### Check the PyTorchJob Status\n", - "\n", - "Use `TrainingClient()` APIs to get information about created PyTorchJob." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "4141f6c2-c38f-4972-b68a-35d150ef7485", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "PyTorchJob Status: False\n" - ] - } - ], - "source": [ - "print(f\"PyTorchJob Status: {training_client.is_job_running(name=pytorchjob_name)}\")" - ] - }, - { - "cell_type": "markdown", - "id": "42e10587-7ac2-45bf-9c4f-d418e1585974", - "metadata": {}, - "source": [ - "### Get PyTorchJob Pod Names\n", - "\n", - "Since we used 3 workers, PyTorchJob will create 1 master pod and 2 worker pods to execute distributed training." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "49b53308-a19b-45e8-942f-4333e727ee48", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "['train-pytorch-master-0', 'train-pytorch-worker-0']" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "training_client.get_job_pod_names(pytorchjob_name)" - ] - }, - { - "cell_type": "markdown", - "id": "b91d332d-487c-4a95-937d-26ffb6199cda", - "metadata": { - "execution": { - "iopub.status.busy": "2022-09-01T20:10:25.759950Z", - "iopub.status.idle": "2022-09-01T20:10:25.760581Z", - "shell.execute_reply": "2022-09-01T20:10:25.760353Z", - "shell.execute_reply.started": "2022-09-01T20:10:25.760328Z" - }, - "tags": [] - }, - "source": [ - "### Get PyTorchJob Training Logs\n", - "\n", - "We can get the logs from the master pod.\n", - "\n", - "Every worker processes 20000 data samples on each epoch since we distribute 60000 samples across 3 workers." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "5232d542-d4bf-4c51-8b11-ad0534fb0b9d", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[2024-10-08 13:25:08,740] torch.distributed.run: [WARNING] master_addr is only used for static rdzv_backend and when rdzv_endpoint is not specified.\n", - "[2024-10-08 13:25:08,741] torch.distributed.run: [WARNING] \n", - "[2024-10-08 13:25:08,741] torch.distributed.run: [WARNING] *****************************************\n", - "[2024-10-08 13:25:08,741] torch.distributed.run: [WARNING] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. \n", - "[2024-10-08 13:25:08,741] torch.distributed.run: [WARNING] *****************************************\n", - "[W socket.cpp:663] [c10d] The IPv6 network addresses of (train-pytorch-worker-0, 23456) cannot be retrieved (gai error: -2 - Name or service not known).\n", - "[W socket.cpp:663] [c10d] The IPv6 network addresses of (train-pytorch-worker-0, 23456) cannot be retrieved (gai error: -2 - Name or service not known).\n", - "[W socket.cpp:663] [c10d] The IPv6 network addresses of (train-pytorch-worker-0, 23456) cannot be retrieved (gai error: -2 - Name or service not known).\n", - "[W socket.cpp:663] [c10d] The IPv6 network addresses of (train-pytorch-worker-0, 23456) cannot be retrieved (gai error: -2 - Name or service not known).\n", - "[W socket.cpp:663] [c10d] The IPv6 network addresses of (train-pytorch-worker-0, 23456) cannot be retrieved (gai error: -2 - Name or service not known).\n", - "2024-10-08T13:25:15Z INFO Using Device: cpu, Backend: gloo\n", - "2024-10-08T13:25:15Z INFO Using Device: cpu, Backend: gloo\n", - "2024-10-08T13:25:16Z INFO Distributed Training for WORLD_SIZE: 4, RANK: 0, LOCAL_RANK: 0\n", - "2024-10-08T13:25:16Z INFO Distributed Training for WORLD_SIZE: 4, RANK: 1, LOCAL_RANK: 1\n", - "Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz\n", - "Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz\n", - "Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to ./data/FashionMNIST/raw/train-images-idx3-ubyte.gz\n", - "Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to ./data/FashionMNIST/raw/train-images-idx3-ubyte.gz\n", - "100%|██████████| 26421880/26421880 [00:02<00:00, 12700502.50it/s]\n", - "100%|██████████| 26421880/26421880 [00:02<00:00, 12593356.31it/s]\n", - "Extracting ./data/FashionMNIST/raw/train-images-idx3-ubyte.gz to ./data/FashionMNIST/raw\n", - "Extracting ./data/FashionMNIST/raw/train-images-idx3-ubyte.gz to ./data/FashionMNIST/raw\n", - "\n", - "Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz\n", - "\n", - "Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz\n", - "Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw/train-labels-idx1-ubyte.gz\n", - "Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw/train-labels-idx1-ubyte.gz\n", - "100%|██████████| 29515/29515 [00:00<00:00, 212712.93it/s]\n", - "Extracting ./data/FashionMNIST/raw/train-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw\n", - "\n", - "Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz\n", - "100%|██████████| 29515/29515 [00:00<00:00, 212353.88it/s]\n", - "Extracting ./data/FashionMNIST/raw/train-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw\n", - "\n", - "Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz\n", - "Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz\n", - "Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz\n", - "100%|██████████| 4422102/4422102 [00:05<00:00, 744014.92it/s] \n", - "Extracting ./data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST/raw\n", - "\n", - "Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz\n", - "Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz\n", - "100%|██████████| 5148/5148 [00:00<00:00, 48197046.86it/s]t/s]\n", - "Extracting ./data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw\n", - "\n", - "2024-10-08T13:25:27Z INFO Start training for RANK: 0. WORLD_SIZE: 4\n", - "100%|██████████| 4422102/4422102 [00:07<00:00, 581699.71it/s] \n", - "Extracting ./data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST/raw\n", - "\n", - "Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz\n", - "Using downloaded and verified file: ./data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz\n", - "Extracting ./data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw\n", - "\n", - "2024-10-08T13:25:28Z INFO Start training for RANK: 1. WORLD_SIZE: 4\n", - "2024-10-08T13:25:29Z INFO Train Epoch: 0 [0/60000 (0%)]\tloss=2.3060\n", - "2024-10-08T13:25:29Z INFO Train Epoch: 0 [1280/60000 (8%)]\tloss=2.2977\n", - "2024-10-08T13:25:30Z INFO Train Epoch: 0 [2560/60000 (17%)]\tloss=2.2900\n", - "2024-10-08T13:25:31Z INFO Train Epoch: 0 [3840/60000 (25%)]\tloss=2.2840\n", - "2024-10-08T13:25:31Z INFO Train Epoch: 0 [5120/60000 (34%)]\tloss=2.2755\n", - "2024-10-08T13:25:32Z INFO Train Epoch: 0 [6400/60000 (42%)]\tloss=2.2699\n", - "2024-10-08T13:25:33Z INFO Train Epoch: 0 [7680/60000 (51%)]\tloss=2.2518\n", - "2024-10-08T13:25:34Z INFO Train Epoch: 0 [8960/60000 (59%)]\tloss=2.2496\n", - "2024-10-08T13:25:34Z INFO Train Epoch: 0 [10240/60000 (68%)]\tloss=2.2273\n", - "2024-10-08T13:25:35Z INFO Train Epoch: 0 [11520/60000 (76%)]\tloss=2.1986\n", - "2024-10-08T13:25:36Z INFO Train Epoch: 0 [12800/60000 (85%)]\tloss=2.1755\n", - "2024-10-08T13:25:37Z INFO Train Epoch: 0 [14080/60000 (93%)]\tloss=2.1046\n", - "2024-10-08T13:25:37Z INFO Train Epoch: 1 [0/60000 (0%)]\tloss=2.0498\n", - "2024-10-08T13:25:38Z INFO Train Epoch: 1 [1280/60000 (8%)]\tloss=1.9492\n", - "2024-10-08T13:25:39Z INFO Train Epoch: 1 [2560/60000 (17%)]\tloss=1.8004\n", - "2024-10-08T13:25:39Z INFO Train Epoch: 1 [3840/60000 (25%)]\tloss=1.6088\n", - "2024-10-08T13:25:40Z INFO Train Epoch: 1 [5120/60000 (34%)]\tloss=1.3772\n", - "2024-10-08T13:25:41Z INFO Train Epoch: 1 [6400/60000 (42%)]\tloss=1.2621\n", - "2024-10-08T13:25:41Z INFO Train Epoch: 1 [7680/60000 (51%)]\tloss=1.1353\n", - "2024-10-08T13:25:42Z INFO Train Epoch: 1 [8960/60000 (59%)]\tloss=1.0971\n", - "2024-10-08T13:25:43Z INFO Train Epoch: 1 [10240/60000 (68%)]\tloss=1.0772\n", - "2024-10-08T13:25:44Z INFO Train Epoch: 1 [11520/60000 (76%)]\tloss=1.0657\n", - "2024-10-08T13:25:44Z INFO Train Epoch: 1 [12800/60000 (85%)]\tloss=1.0127\n", - "2024-10-08T13:25:45Z INFO Train Epoch: 1 [14080/60000 (93%)]\tloss=0.9161\n", - "2024-10-08T13:25:46Z INFO Train Epoch: 2 [0/60000 (0%)]\tloss=1.3036\n", - "2024-10-08T13:25:46Z INFO Train Epoch: 2 [1280/60000 (8%)]\tloss=0.8902\n", - "2024-10-08T13:25:47Z INFO Train Epoch: 2 [2560/60000 (17%)]\tloss=0.9369\n", - "2024-10-08T13:25:48Z INFO Train Epoch: 2 [3840/60000 (25%)]\tloss=0.9562\n", - "2024-10-08T13:25:49Z INFO Train Epoch: 2 [5120/60000 (34%)]\tloss=0.8001\n", - "2024-10-08T13:25:49Z INFO Train Epoch: 2 [6400/60000 (42%)]\tloss=0.8546\n", - "2024-10-08T13:25:50Z INFO Train Epoch: 2 [7680/60000 (51%)]\tloss=0.8226\n", - "2024-10-08T13:25:51Z INFO Train Epoch: 2 [8960/60000 (59%)]\tloss=0.9489\n", - "2024-10-08T13:25:52Z INFO Train Epoch: 2 [10240/60000 (68%)]\tloss=0.8800\n", - "2024-10-08T13:25:52Z INFO Train Epoch: 2 [11520/60000 (76%)]\tloss=0.8957\n", - "2024-10-08T13:25:53Z INFO Train Epoch: 2 [12800/60000 (85%)]\tloss=0.8961\n", - "2024-10-08T13:25:54Z INFO Train Epoch: 2 [14080/60000 (93%)]\tloss=0.7958\n", - "2024-10-08T13:25:54Z INFO Training is finished\n", - "2024-10-08T13:25:54Z INFO Training is finished\n", - "[2024-10-08 13:25:59,467] torch.distributed.elastic.agent.server.api: [ERROR] Error waiting on exit barrier. Elapsed: 0.004681587219238281 seconds\n", - "[2024-10-08 13:25:59,468] torch.distributed.elastic.rendezvous.dynamic_rendezvous: [WARNING] The node 'train-pytorch-master-0_7_0' has failed to shutdown the rendezvous 'none' due to an error of type RendezvousConnectionError.\n", - "\n" - ] - } - ], - "source": [ - "logs, _ = training_client.get_job_logs(pytorchjob_name)\n", - "\n", - "print(logs[\"train-pytorch-master-0\"])" - ] - }, - { - "cell_type": "markdown", - "id": "17b0ca43-1936-4708-b03b-3ab9ac2bbdea", - "metadata": {}, - "source": [ - "## Delete PyTorchJob\n", - "\n", - "When PyTorchJob is finished, you can delete the resource." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "32ae88fd-5b5d-4ba1-a560-9a35c5ac17de", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "training_client.delete_job(pytorchjob_name)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b9641e9f-551d-44d5-872b-002fffaedcef", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "training", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.20" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/examples/pytorch/image-classification/create-pytorchjob.ipynb b/examples/pytorch/image-classification/create-pytorchjob.ipynb deleted file mode 100644 index f6cf4f9f67..0000000000 --- a/examples/pytorch/image-classification/create-pytorchjob.ipynb +++ /dev/null @@ -1,557 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "pycharm": { - "name": "#%% md\n" - } - }, - "source": [ - "# Create PyTorchJob using Kubeflow Training SDK" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "pycharm": { - "name": "#%% md\n" - } - }, - "source": [ - "This is a sample for Kubeflow Training SDK `kubeflow-training`.\n", - "\n", - "The notebook shows how to use Kubeflow Training SDK to create, get, wait, check and delete PyTorchJob." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "training_python_sdk='kubeflow-training'\n", - "namespace='kubeflow-user-example-com'" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "tags": [] - }, - "source": [ - "## Install Kubeflow Training Python SDKs\n", - "\n", - "You need to install Kubeflow Training SDK to run this Notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# TODO (andreyvelich): Change to release version when SDK with the new APIs is published.\n", - "# Install Kubeflow Python SDK\n", - "!pip install {training_python_sdk}" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [], - "source": [ - "from kubernetes.client import V1PodTemplateSpec\n", - "from kubernetes.client import V1ObjectMeta\n", - "from kubernetes.client import V1PodSpec\n", - "from kubernetes.client import V1Container\n", - "\n", - "from kubeflow.training import KubeflowOrgV1ReplicaSpec\n", - "from kubeflow.training import KubeflowOrgV1PyTorchJob\n", - "from kubeflow.training import KubeflowOrgV1PyTorchJobSpec\n", - "from kubeflow.training import KubeflowOrgV1RunPolicy\n", - "from kubeflow.training import TrainingClient\n", - "\n", - "from kubeflow.training import constants" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "pycharm": { - "name": "#%% md\n" - } - }, - "source": [ - "## Define PyTorchJob" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "pycharm": { - "name": "#%% md\n" - } - }, - "source": [ - "The demo only creates a worker of PyTorchJob to run mnist sample." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [], - "source": [ - "name = \"pytorch-dist-mnist-gloo\"\n", - "container_name = \"pytorch\"\n", - "\n", - "container = V1Container(\n", - " name=container_name,\n", - " image=\"kubeflow/pytorch-dist-mnist:latest\",\n", - " args=[\"--backend\", \"gloo\"],\n", - ")\n", - "\n", - "replica_spec = KubeflowOrgV1ReplicaSpec(\n", - " replicas=1,\n", - " restart_policy=\"OnFailure\",\n", - " template=V1PodTemplateSpec(\n", - " metadata=V1ObjectMeta(\n", - " name=name,\n", - " namespace=namespace,\n", - " annotations={\n", - " \"sidecar.istio.io/inject\": \"false\"\n", - " }\n", - " ),\n", - " spec=V1PodSpec(\n", - " containers=[container]\n", - " )\n", - " )\n", - ")\n", - "\n", - "pytorchjob = KubeflowOrgV1PyTorchJob(\n", - " api_version=constants.API_VERSION,\n", - " kind=constants.PYTORCHJOB_KIND,\n", - " metadata=V1ObjectMeta(name=name, namespace=namespace),\n", - " spec=KubeflowOrgV1PyTorchJobSpec(\n", - " run_policy=KubeflowOrgV1RunPolicy(clean_pod_policy=\"None\"),\n", - " pytorch_replica_specs={\n", - " \"Master\": replica_spec,\n", - " \"Worker\": replica_spec\n", - " },\n", - " ),\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "pycharm": { - "name": "#%% md\n" - } - }, - "source": [ - "## Create PyTorchJob\n", - "\n", - "You have to create Training Client to deploy your PyTorchJob in you cluster." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "PyTorchJob kubeflow-user-example-com/pytorch-dist-mnist-gloo has been created\n" - ] - } - ], - "source": [ - "# Namespace will be reused in every APIs.\n", - "training_client = TrainingClient(namespace=namespace)\n", - "\n", - "# `job_kind` is set in `TrainingClient`\n", - "training_client.create_job(pytorchjob)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "pycharm": { - "name": "#%% md\n" - } - }, - "source": [ - "## Get the Created PyTorchJob\n", - "\n", - "You can verify the created PyTorchJob name" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "'pytorch-dist-mnist-gloo'" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "training_client.get_job(name).metadata.name" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "pycharm": { - "name": "#%% md\n" - } - }, - "source": [ - "## Get the PyTorchJob Conditions" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "[{'last_transition_time': datetime.datetime(2023, 9, 8, 21, 14, 59, tzinfo=tzutc()),\n", - " 'last_update_time': datetime.datetime(2023, 9, 8, 21, 14, 59, tzinfo=tzutc()),\n", - " 'message': 'PyTorchJob pytorch-dist-mnist-gloo is created.',\n", - " 'reason': 'PyTorchJobCreated',\n", - " 'status': 'True',\n", - " 'type': 'Created'},\n", - " {'last_transition_time': datetime.datetime(2023, 9, 8, 21, 15, 45, tzinfo=tzutc()),\n", - " 'last_update_time': datetime.datetime(2023, 9, 8, 21, 15, 45, tzinfo=tzutc()),\n", - " 'message': 'PyTorchJob pytorch-dist-mnist-gloo is running.',\n", - " 'reason': 'JobRunning',\n", - " 'status': 'True',\n", - " 'type': 'Running'}]" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "training_client.get_job_conditions(name=name)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "pycharm": { - "name": "#%% md\n" - } - }, - "source": [ - "## Wait Until PyTorchJob Finishes" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "NAME STATE TIME\n", - "pytorch-dist-mnist-gloo Running 2023-09-08 21:15:45+00:00\n", - "pytorch-dist-mnist-gloo Running 2023-09-08 21:15:45+00:00\n", - "pytorch-dist-mnist-gloo Succeeded 2023-09-08 21:26:44+00:00\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Succeeded number of replicas: 1\n" - ] - } - ], - "source": [ - "pytorchjob = training_client.wait_for_job_conditions(name=name)\n", - "\n", - "print(f\"Succeeded number of replicas: {pytorchjob.status.replica_statuses['Master'].succeeded}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "pycharm": { - "name": "#%% md\n" - } - }, - "source": [ - "## Verify if PyTorchJob is Succeeded" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "training_client.is_job_succeeded(name=name)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "pycharm": { - "name": "#%% md\n" - } - }, - "source": [ - "## Get the PyTorchJob Training Logs" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "The logs of pod pytorch-dist-mnist-gloo-master-0:\n", - " Using distributed PyTorch with gloo backend\n", - "Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz\n", - "Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz\n", - "Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz\n", - "Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz\n", - "Processing...\n", - "Done!\n", - "Train Epoch: 1 [0/60000 (0%)]\tloss=2.3000\n", - "Train Epoch: 1 [640/60000 (1%)]\tloss=2.2135\n", - "Train Epoch: 1 [1280/60000 (2%)]\tloss=2.1704\n", - "Train Epoch: 1 [1920/60000 (3%)]\tloss=2.0766\n", - "Train Epoch: 1 [2560/60000 (4%)]\tloss=1.8679\n", - "Train Epoch: 1 [3200/60000 (5%)]\tloss=1.4135\n", - "Train Epoch: 1 [3840/60000 (6%)]\tloss=1.0003\n", - "Train Epoch: 1 [4480/60000 (7%)]\tloss=0.7762\n", - "Train Epoch: 1 [5120/60000 (9%)]\tloss=0.4598\n", - "Train Epoch: 1 [5760/60000 (10%)]\tloss=0.4860\n", - "Train Epoch: 1 [6400/60000 (11%)]\tloss=0.4389\n", - "Train Epoch: 1 [7040/60000 (12%)]\tloss=0.4084\n", - "Train Epoch: 1 [7680/60000 (13%)]\tloss=0.4602\n", - "Train Epoch: 1 [8320/60000 (14%)]\tloss=0.4289\n", - "Train Epoch: 1 [8960/60000 (15%)]\tloss=0.3990\n", - "Train Epoch: 1 [9600/60000 (16%)]\tloss=0.3852\n", - "Train Epoch: 1 [10240/60000 (17%)]\tloss=0.2984\n", - "Train Epoch: 1 [10880/60000 (18%)]\tloss=0.5029\n", - "Train Epoch: 1 [11520/60000 (19%)]\tloss=0.5236\n", - "Train Epoch: 1 [12160/60000 (20%)]\tloss=0.3378\n", - "Train Epoch: 1 [12800/60000 (21%)]\tloss=0.3674\n", - "Train Epoch: 1 [13440/60000 (22%)]\tloss=0.4508\n", - "Train Epoch: 1 [14080/60000 (23%)]\tloss=0.3034\n", - "Train Epoch: 1 [14720/60000 (25%)]\tloss=0.3574\n", - "Train Epoch: 1 [15360/60000 (26%)]\tloss=0.3313\n", - "Train Epoch: 1 [16000/60000 (27%)]\tloss=0.4405\n", - "Train Epoch: 1 [16640/60000 (28%)]\tloss=0.3642\n", - "Train Epoch: 1 [17280/60000 (29%)]\tloss=0.3172\n", - "Train Epoch: 1 [17920/60000 (30%)]\tloss=0.2016\n", - "Train Epoch: 1 [18560/60000 (31%)]\tloss=0.4978\n", - "Train Epoch: 1 [19200/60000 (32%)]\tloss=0.3254\n", - "Train Epoch: 1 [19840/60000 (33%)]\tloss=0.1191\n", - "Train Epoch: 1 [20480/60000 (34%)]\tloss=0.1905\n", - "Train Epoch: 1 [21120/60000 (35%)]\tloss=0.1408\n", - "Train Epoch: 1 [21760/60000 (36%)]\tloss=0.3147\n", - "Train Epoch: 1 [22400/60000 (37%)]\tloss=0.1505\n", - "Train Epoch: 1 [23040/60000 (38%)]\tloss=0.2898\n", - "Train Epoch: 1 [23680/60000 (39%)]\tloss=0.4685\n", - "Train Epoch: 1 [24320/60000 (41%)]\tloss=0.2158\n", - "Train Epoch: 1 [24960/60000 (42%)]\tloss=0.1521\n", - "Train Epoch: 1 [25600/60000 (43%)]\tloss=0.2248\n", - "Train Epoch: 1 [26240/60000 (44%)]\tloss=0.2623\n", - "Train Epoch: 1 [26880/60000 (45%)]\tloss=0.2335\n", - "Train Epoch: 1 [27520/60000 (46%)]\tloss=0.2623\n", - "Train Epoch: 1 [28160/60000 (47%)]\tloss=0.2126\n", - "Train Epoch: 1 [28800/60000 (48%)]\tloss=0.1328\n", - "Train Epoch: 1 [29440/60000 (49%)]\tloss=0.2779\n", - "Train Epoch: 1 [30080/60000 (50%)]\tloss=0.0943\n", - "Train Epoch: 1 [30720/60000 (51%)]\tloss=0.1285\n", - "Train Epoch: 1 [31360/60000 (52%)]\tloss=0.2455\n", - "Train Epoch: 1 [32000/60000 (53%)]\tloss=0.3396\n", - "Train Epoch: 1 [32640/60000 (54%)]\tloss=0.1523\n", - "Train Epoch: 1 [33280/60000 (55%)]\tloss=0.0916\n", - "Train Epoch: 1 [33920/60000 (57%)]\tloss=0.1448\n", - "Train Epoch: 1 [34560/60000 (58%)]\tloss=0.1989\n", - "Train Epoch: 1 [35200/60000 (59%)]\tloss=0.2183\n", - "Train Epoch: 1 [35840/60000 (60%)]\tloss=0.0638\n", - "Train Epoch: 1 [36480/60000 (61%)]\tloss=0.1373\n", - "Train Epoch: 1 [37120/60000 (62%)]\tloss=0.1147\n", - "Train Epoch: 1 [37760/60000 (63%)]\tloss=0.2355\n", - "Train Epoch: 1 [38400/60000 (64%)]\tloss=0.0636\n", - "Train Epoch: 1 [39040/60000 (65%)]\tloss=0.1065\n", - "Train Epoch: 1 [39680/60000 (66%)]\tloss=0.1599\n", - "Train Epoch: 1 [40320/60000 (67%)]\tloss=0.1090\n", - "Train Epoch: 1 [40960/60000 (68%)]\tloss=0.1774\n", - "Train Epoch: 1 [41600/60000 (69%)]\tloss=0.2307\n", - "Train Epoch: 1 [42240/60000 (70%)]\tloss=0.0736\n", - "Train Epoch: 1 [42880/60000 (71%)]\tloss=0.1553\n", - "Train Epoch: 1 [43520/60000 (72%)]\tloss=0.2793\n", - "Train Epoch: 1 [44160/60000 (74%)]\tloss=0.1428\n", - "Train Epoch: 1 [44800/60000 (75%)]\tloss=0.1179\n", - "Train Epoch: 1 [45440/60000 (76%)]\tloss=0.1205\n", - "Train Epoch: 1 [46080/60000 (77%)]\tloss=0.0767\n", - "Train Epoch: 1 [46720/60000 (78%)]\tloss=0.1946\n", - "Train Epoch: 1 [47360/60000 (79%)]\tloss=0.0703\n", - "Train Epoch: 1 [48000/60000 (80%)]\tloss=0.2094\n", - "Train Epoch: 1 [48640/60000 (81%)]\tloss=0.1378\n", - "Train Epoch: 1 [49280/60000 (82%)]\tloss=0.0950\n", - "Train Epoch: 1 [49920/60000 (83%)]\tloss=0.1066\n", - "Train Epoch: 1 [50560/60000 (84%)]\tloss=0.1182\n", - "Train Epoch: 1 [51200/60000 (85%)]\tloss=0.1455\n", - "Train Epoch: 1 [51840/60000 (86%)]\tloss=0.0684\n", - "Train Epoch: 1 [52480/60000 (87%)]\tloss=0.0241\n", - "Train Epoch: 1 [53120/60000 (88%)]\tloss=0.2626\n", - "Train Epoch: 1 [53760/60000 (90%)]\tloss=0.0922\n", - "Train Epoch: 1 [54400/60000 (91%)]\tloss=0.1301\n", - "Train Epoch: 1 [55040/60000 (92%)]\tloss=0.1921\n", - "Train Epoch: 1 [55680/60000 (93%)]\tloss=0.0346\n", - "Train Epoch: 1 [56320/60000 (94%)]\tloss=0.0358\n", - "Train Epoch: 1 [56960/60000 (95%)]\tloss=0.0767\n", - "Train Epoch: 1 [57600/60000 (96%)]\tloss=0.1167\n", - "Train Epoch: 1 [58240/60000 (97%)]\tloss=0.1932\n", - "Train Epoch: 1 [58880/60000 (98%)]\tloss=0.2062\n", - "Train Epoch: 1 [59520/60000 (99%)]\tloss=0.0647\n", - "\n", - "accuracy=0.9669\n", - "\n", - "\n" - ] - } - ], - "source": [ - "training_client.get_job_logs(name=name)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "pycharm": { - "name": "#%% md\n" - } - }, - "source": [ - "## Delete the PyTorchJob" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "PyTorchJob kubeflow-user-example-com/pytorch-dist-mnist-gloo has been deleted\n" - ] - } - ], - "source": [ - "training_client.delete_job(name)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.12" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/examples/pytorch/language-modeling/train_api_hf_dataset.ipynb b/examples/pytorch/language-modeling/train_api_hf_dataset.ipynb deleted file mode 100644 index c5869196ca..0000000000 --- a/examples/pytorch/language-modeling/train_api_hf_dataset.ipynb +++ /dev/null @@ -1,145 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# install kubeflow-training extra 'huggingface'\n", - "!pip install -U 'kubeflow-training[huggingface]'" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "# import the libraries\n", - "from kubeflow.training.api.training_client import TrainingClient\n", - "from kubeflow.storage_initializer.s3 import S3DatasetParams\n", - "from kubeflow.storage_initializer.hugging_face import (\n", - " HuggingFaceModelParams,\n", - " HuggingFaceTrainerParams,\n", - " HuggingFaceDatasetParams,\n", - ")\n", - "from kubeflow.storage_initializer.constants import INIT_CONTAINER_MOUNT_PATH\n", - "from peft import LoraConfig\n", - "import transformers\n", - "from transformers import TrainingArguments\n", - "from kubeflow.training import constants" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [], - "source": [ - "# create a training client, pass config_file parameter if you want to use kubeconfig other than \"~/.kube/config\"\n", - "client = TrainingClient()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "USING HUGGING FACE HUB AS THE DATASET STORE" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# mention the model, datasets and training parameters\n", - "client.train(\n", - " name=\"huggingface-test\",\n", - " num_workers=2,\n", - " num_procs_per_worker=1,\n", - " # specify the storage class if you don't want to use the default one for the storage-initializer PVC\n", - " # storage_config={\n", - " # \"size\": \"10Gi\",\n", - " # \"storage_class\": \"\",\n", - " # },\n", - " model_provider_parameters=HuggingFaceModelParams(\n", - " model_uri=\"hf://TinyLlama/TinyLlama-1.1B-Chat-v1.0\",\n", - " transformer_type=transformers.AutoModelForCausalLM,\n", - " ),\n", - " # it is assumed for text related tasks, you have 'text' column in the dataset.\n", - " # for more info on how dataset is loaded check load_and_preprocess_data function in sdk/python/kubeflow/trainer/hf_llm_training.py\n", - " dataset_provider_parameters=HuggingFaceDatasetParams(repo_id=\"imdatta0/ultrachat_1k\"),\n", - " trainer_parameters=HuggingFaceTrainerParams(\n", - " lora_config=LoraConfig(\n", - " r=8,\n", - " lora_alpha=8,\n", - " lora_dropout=0.1,\n", - " bias=\"none\",\n", - " task_type=\"CAUSAL_LM\",\n", - " ),\n", - " training_parameters=TrainingArguments(\n", - " num_train_epochs=1,\n", - " per_device_train_batch_size=1,\n", - " gradient_accumulation_steps=1,\n", - " gradient_checkpointing=True,\n", - " gradient_checkpointing_kwargs={\n", - " \"use_reentrant\": False\n", - " }, # this is mandatory if checkpointng is enabled\n", - " warmup_steps=0.02,\n", - " learning_rate=1,\n", - " lr_scheduler_type=\"cosine\",\n", - " bf16=False,\n", - " logging_steps=0.01,\n", - " output_dir=INIT_CONTAINER_MOUNT_PATH,\n", - " optim=f\"sgd\",\n", - " save_steps=0.01,\n", - " save_total_limit=3,\n", - " disable_tqdm=False,\n", - " resume_from_checkpoint=True,\n", - " remove_unused_columns=True,\n", - " ddp_backend=\"nccl\", # change the backend to gloo if you want cpu based training and remove the gpu key in resources_per_worker\n", - " ),\n", - " ),\n", - " resources_per_worker={\n", - " \"gpu\": 1,\n", - " \"cpu\": 8,\n", - " \"memory\": \"8Gi\",\n", - " }, # remove the gpu key if you don't want to attach gpus to the pods\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# check the logs of the job\n", - "client.get_job_logs(name=\"huggingface-test\", job_kind=constants.PYTORCHJOB_KIND)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "myenv3.11", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.6" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/examples/pytorch/language-modeling/train_api_s3_dataset.ipynb b/examples/pytorch/language-modeling/train_api_s3_dataset.ipynb deleted file mode 100644 index 332ba38c3e..0000000000 --- a/examples/pytorch/language-modeling/train_api_s3_dataset.ipynb +++ /dev/null @@ -1,162 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# install kubeflow-training extra 'huggingface'\n", - "!pip install -U 'kubeflow-training[huggingface]'" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "# import the libraries\n", - "from kubeflow.training.api.training_client import TrainingClient\n", - "from kubeflow.storage_initializer.hugging_face import (\n", - " HuggingFaceModelParams,\n", - " HuggingFaceTrainerParams,\n", - ")\n", - "from kubeflow.storage_initializer.s3 import S3DatasetParams\n", - "from kubeflow.storage_initializer.constants import INIT_CONTAINER_MOUNT_PATH\n", - "from peft import LoraConfig\n", - "import transformers\n", - "from transformers import TrainingArguments\n", - "from kubeflow.training import constants" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "# create a training client, pass config_file parameter if you want to use kubeconfig other than \"~/.kube/config\"\n", - "client = TrainingClient()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "USING S3 AS THE DATASET SOURCE" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Need to set S3 credentials\n", - "s3_access_key = \"\"\n", - "s3_secret_key = \"\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# mention the model, datasets and training parameters\n", - "client.train(\n", - " name=\"s3-test\",\n", - " num_workers=2,\n", - " num_procs_per_worker=1,\n", - " # specify the storage class if you don't want to use the default one for the storage-initializer PVC\n", - " # storage_config={\n", - " # \"size\": \"10Gi\",\n", - " # \"storage_class\": \"\",\n", - " # },\n", - " model_provider_parameters=HuggingFaceModelParams(\n", - " model_uri=\"hf://TinyLlama/TinyLlama-1.1B-Chat-v1.0\",\n", - " transformer_type=transformers.AutoModelForCausalLM,\n", - " ),\n", - " # it is assumed for text related tasks, you have 'text' column in the dataset.\n", - " # for more info on how dataset is loaded check load_and_preprocess_data function in sdk/python/kubeflow/trainer/hf_llm_training.py\n", - " dataset_provider_parameters=S3DatasetParams(\n", - " endpoint_url=\"http://10.117.63.3\",\n", - " bucket_name=\"test\",\n", - " file_key=\"imdatta0___ultrachat_1k\",\n", - " region_name=\"us-east-1\",\n", - " access_key=s3_access_key,\n", - " secret_key=s3_secret_key,\n", - " ),\n", - " trainer_parameters=HuggingFaceTrainerParams(\n", - " lora_config=LoraConfig(\n", - " r=8,\n", - " lora_alpha=8,\n", - " lora_dropout=0.2,\n", - " bias=\"none\",\n", - " task_type=\"CAUSAL_LM\",\n", - " ),\n", - " training_parameters=TrainingArguments(\n", - " num_train_epochs=1,\n", - " per_device_train_batch_size=1,\n", - " gradient_accumulation_steps=1,\n", - " gradient_checkpointing=True,\n", - " gradient_checkpointing_kwargs={\n", - " \"use_reentrant\": False\n", - " }, # this is mandatory if checkpointng is enabled\n", - " warmup_steps=0.02,\n", - " learning_rate=1,\n", - " lr_scheduler_type=\"cosine\",\n", - " bf16=False,\n", - " logging_steps=0.01,\n", - " output_dir=INIT_CONTAINER_MOUNT_PATH,\n", - " optim=f\"sgd\",\n", - " save_steps=0.01,\n", - " save_total_limit=3,\n", - " disable_tqdm=False,\n", - " resume_from_checkpoint=True,\n", - " remove_unused_columns=True,\n", - " ddp_backend=\"nccl\", # change the backend to gloo if you want cpu based training and remove the gpu key in resources_per_worker\n", - " ),\n", - " ),\n", - " resources_per_worker={\n", - " \"gpu\": 1,\n", - " \"cpu\": 8,\n", - " \"memory\": \"8Gi\",\n", - " }, # remove the gpu key if you don't want to attach gpus to the pods\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# check the logs of the job\n", - "client.get_job_logs(name=\"s3-test\", job_kind=constants.PYTORCHJOB_KIND)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "myenv3.11", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.6" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/examples/pytorch/mnist/Dockerfile b/examples/pytorch/mnist/Dockerfile deleted file mode 100644 index 92e6c5e220..0000000000 --- a/examples/pytorch/mnist/Dockerfile +++ /dev/null @@ -1,15 +0,0 @@ -# We need to use the nvcr.io/nvidia/pytorch image as a base image to support both linux/amd64 and linux_arm64 platforms. -# PyTorch=2.2.0, cuda=12.3.2 -# Ref: https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-24-01.html#rel-24-01 -FROM nvcr.io/nvidia/pytorch:24.01-py3 - -RUN pip install tensorboardX==2.6.2 -RUN mkdir -p /opt/mnist - -WORKDIR /opt/mnist/src -ADD mnist.py /opt/mnist/src/mnist.py - -RUN chgrp -R 0 /opt/mnist \ - && chmod -R g+rwX /opt/mnist - -ENTRYPOINT ["python", "/opt/mnist/src/mnist.py"] diff --git a/examples/pytorch/mnist/Dockerfile-mpi b/examples/pytorch/mnist/Dockerfile-mpi deleted file mode 100644 index 57b69ef692..0000000000 --- a/examples/pytorch/mnist/Dockerfile-mpi +++ /dev/null @@ -1,15 +0,0 @@ -# We need to use the nvcr.io/nvidia/pytorch image as a base image to support both linux/amd64 and linux_arm64 platforms. -# PyTorch=2.2.0, cuda=12.3.2 -# Ref: https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-24-01.html#rel-24-01 -FROM nvcr.io/nvidia/pytorch:24.01-py3 - -RUN pip install tensorboardX==2.6.2 -RUN mkdir -p /opt/mnist - -WORKDIR /opt/mnist/src -ADD mnist.py /opt/mnist/src/mnist.py - -RUN chgrp -R 0 /opt/mnist \ - && chmod -R g+rwX /opt/mnist - -ENTRYPOINT ["mpirun", "-n", "1", "--allow-run-as-root", "python", "/opt/mnist/src/mnist.py"] diff --git a/examples/pytorch/mnist/Makefile b/examples/pytorch/mnist/Makefile deleted file mode 100644 index 4d1ffe50f6..0000000000 --- a/examples/pytorch/mnist/Makefile +++ /dev/null @@ -1,55 +0,0 @@ -# Copyright 2021 The Kubeflow Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# Requirements: -# https://github.com/mattrobenolt/jinja2-cli -# pip install jinja2-clie -IMG = gcr.io/kubeflow-examples/pytorch-dist-mnist -PUBLIC = gcr.io/kubeflow-examples/pytorch-dist-mnist -DIR := ${CURDIR} - -# List any changed files. -CHANGED_FILES := $(shell git diff-files --relative=examples/dist-mnist) - -ifeq ($(strip $(CHANGED_FILES)),) -# Changed files is empty; not dirty -# Don't include --dirty because it could be dirty if files outside the ones we care -# about changed. -TAG := $(shell date +v%Y%m%d)-$(shell git describe --always) -else -TAG := $(shell date +v%Y%m%d)-$(shell git describe --always --dirty)-$(shell git diff | shasum -a256 | cut -c -6) -endif - -all: build - -# To build without the cache set the environment variable -# export DOCKER_BUILD_OPTS=--no-cache -build: - docker build ${DOCKER_BUILD_OPTS} -t $(IMG):$(TAG) . - docker tag ${DOCKER_BUILD_OPTS} $(IMG):$(TAG) $(IMG):latest - @echo Built $(IMG):$(TAG) - -# Build but don't attach the latest tag. This allows manual testing/inspection of the image -# first. -push: build - gcloud docker -- push $(IMG):$(TAG) - @echo Pushed $(IMG) with :$(TAG) tags - -push-latest: push - gcloud container images add-tag --quiet $(IMG):$(TAG) $(IMG):latest --verbosity=info - echo created $(IMG):latest - -push-public: push-latest - gcloud container images add-tag --quiet $(IMG):$(TAG) $(PUBLIC):$(TAG) --verbosity=info - gcloud container images add-tag --quiet $(IMG):$(TAG) $(PUBLIC):latest --verbosity=info diff --git a/examples/pytorch/mnist/README.md b/examples/pytorch/mnist/README.md deleted file mode 100644 index 674fdfcc3d..0000000000 --- a/examples/pytorch/mnist/README.md +++ /dev/null @@ -1,26 +0,0 @@ -### Distributed MNIST Examples - -This folder contains an example where mnist is trained. This example is also used for e2e testing. - -The python script used to train mnist with pytorch takes in several arguments that can be used -to switch the distributed backends. The manifests to launch the distributed training of this mnist -file using the pytorch operator are under the respective version folders: [v1](./v1). -Each folder contains manifests with example usage of the different backends. - -**Note**: PyTorch job doesn’t work in a user namespace by default because of Istio [automatic sidecar injection](https://istio.io/v1.3/docs/setup/additional-setup/sidecar-injection/#automatic-sidecar-injection). In order to get it running, it needs annotation sidecar.istio.io/inject: "false" to disable it for either PyTorch pods or namespace. - -**Build Image** - -The default image name and tag is `kubeflow/pytorch-dist-mnist-test:1.0`. - -```shell -docker build -f Dockerfile -t kubeflow/pytorch-dist-mnist-test:1.0 ./ -``` - -**Create the mnist PyTorch job** - -The below example uses the gloo backend. - -```shell -kubectl create -f ./v1/pytorch_job_mnist_gloo.yaml -``` diff --git a/examples/pytorch/mnist/mnist.py b/examples/pytorch/mnist/mnist.py deleted file mode 100644 index 4ccd051999..0000000000 --- a/examples/pytorch/mnist/mnist.py +++ /dev/null @@ -1,224 +0,0 @@ -from __future__ import print_function - -import argparse -import os - -import torch -import torch.distributed as dist -import torch.nn as nn -import torch.nn.functional as F -import torch.optim as optim -from tensorboardX import SummaryWriter -from torch.utils.data import DistributedSampler -from torchvision import datasets, transforms - - -class Net(nn.Module): - def __init__(self): - super(Net, self).__init__() - self.conv1 = nn.Conv2d(1, 20, 5, 1) - self.conv2 = nn.Conv2d(20, 50, 5, 1) - self.fc1 = nn.Linear(4 * 4 * 50, 500) - self.fc2 = nn.Linear(500, 10) - - def forward(self, x): - x = F.relu(self.conv1(x)) - x = F.max_pool2d(x, 2, 2) - x = F.relu(self.conv2(x)) - x = F.max_pool2d(x, 2, 2) - x = x.view(-1, 4 * 4 * 50) - x = F.relu(self.fc1(x)) - x = self.fc2(x) - return F.log_softmax(x, dim=1) - - -def train(args, model, device, train_loader, epoch, writer): - model.train() - optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum) - - for batch_idx, (data, target) in enumerate(train_loader): - # Attach tensors to the device. - data, target = data.to(device), target.to(device) - - optimizer.zero_grad() - output = model(data) - loss = F.nll_loss(output, target) - loss.backward() - optimizer.step() - if batch_idx % args.log_interval == 0: - print( - "Train Epoch: {} [{}/{} ({:.0f}%)]\tloss={:.4f}".format( - epoch, - batch_idx * len(data), - len(train_loader.dataset), - 100.0 * batch_idx / len(train_loader), - loss.item(), - ) - ) - niter = epoch * len(train_loader) + batch_idx - writer.add_scalar("loss", loss.item(), niter) - - -def test(model, device, test_loader, writer, epoch): - model.eval() - - correct = 0 - with torch.no_grad(): - for data, target in test_loader: - # Attach tensors to the device. - data, target = data.to(device), target.to(device) - - output = model(data) - # Get the index of the max log-probability. - pred = output.max(1, keepdim=True)[1] - correct += pred.eq(target.view_as(pred)).sum().item() - - print("\naccuracy={:.4f}\n".format(float(correct) / len(test_loader.dataset))) - writer.add_scalar("accuracy", float(correct) / len(test_loader.dataset), epoch) - - -def main(): - # Training settings - parser = argparse.ArgumentParser(description="PyTorch FashionMNIST Example") - parser.add_argument( - "--batch-size", - type=int, - default=64, - metavar="N", - help="input batch size for training (default: 64)", - ) - parser.add_argument( - "--test-batch-size", - type=int, - default=1000, - metavar="N", - help="input batch size for testing (default: 1000)", - ) - parser.add_argument( - "--epochs", - type=int, - default=1, - metavar="N", - help="number of epochs to train (default: 10)", - ) - parser.add_argument( - "--lr", - type=float, - default=0.01, - metavar="LR", - help="learning rate (default: 0.01)", - ) - parser.add_argument( - "--momentum", - type=float, - default=0.5, - metavar="M", - help="SGD momentum (default: 0.5)", - ) - parser.add_argument( - "--no-cuda", - action="store_true", - default=False, - help="disables CUDA training", - ) - parser.add_argument( - "--seed", - type=int, - default=1, - metavar="S", - help="random seed (default: 1)", - ) - parser.add_argument( - "--log-interval", - type=int, - default=10, - metavar="N", - help="how many batches to wait before logging training status", - ) - parser.add_argument( - "--save-model", - action="store_true", - default=False, - help="For Saving the current Model", - ) - parser.add_argument( - "--dir", - default="logs", - metavar="L", - help="directory where summary logs are stored", - ) - - parser.add_argument( - "--backend", - type=str, - help="Distributed backend", - choices=[dist.Backend.GLOO, dist.Backend.NCCL, dist.Backend.MPI], - default=dist.Backend.GLOO, - ) - - args = parser.parse_args() - use_cuda = not args.no_cuda and torch.cuda.is_available() - if use_cuda: - print("Using CUDA") - if args.backend != dist.Backend.NCCL: - print( - "Warning. Please use `nccl` distributed backend for the best performance using GPUs" - ) - - writer = SummaryWriter(args.dir) - - torch.manual_seed(args.seed) - - device = torch.device("cuda" if use_cuda else "cpu") - - # Attach model to the device. - model = Net().to(device) - - print("Using distributed PyTorch with {} backend".format(args.backend)) - # Set distributed training environment variables to run this training script locally. - if "WORLD_SIZE" not in os.environ: - os.environ["RANK"] = "0" - os.environ["WORLD_SIZE"] = "1" - os.environ["MASTER_ADDR"] = "localhost" - os.environ["MASTER_PORT"] = "1234" - - print(f"World Size: {os.environ['WORLD_SIZE']}. Rank: {os.environ['RANK']}") - - dist.init_process_group(backend=args.backend) - model = nn.parallel.DistributedDataParallel(model) - - # Get FashionMNIST train and test dataset. - train_ds = datasets.FashionMNIST( - "../data", - train=True, - download=True, - transform=transforms.Compose([transforms.ToTensor()]), - ) - test_ds = datasets.FashionMNIST( - "../data", - train=False, - download=True, - transform=transforms.Compose([transforms.ToTensor()]), - ) - # Add train and test loaders. - train_loader = torch.utils.data.DataLoader( - train_ds, - batch_size=args.batch_size, - sampler=DistributedSampler(train_ds), - ) - test_loader = torch.utils.data.DataLoader( - test_ds, - batch_size=args.test_batch_size, - sampler=DistributedSampler(test_ds), - ) - - for epoch in range(1, args.epochs + 1): - train(args, model, device, train_loader, epoch, writer) - test(model, device, test_loader, writer, epoch) - - if args.save_model: - torch.save(model.state_dict(), "mnist_cnn.pt") - - -if __name__ == "__main__": - main() diff --git a/examples/pytorch/mnist/v1/pytorch_job_mnist_gloo.yaml b/examples/pytorch/mnist/v1/pytorch_job_mnist_gloo.yaml deleted file mode 100644 index 3e42a2685f..0000000000 --- a/examples/pytorch/mnist/v1/pytorch_job_mnist_gloo.yaml +++ /dev/null @@ -1,32 +0,0 @@ -apiVersion: "kubeflow.org/v1" -kind: "PyTorchJob" -metadata: - name: "pytorch-dist-mnist-gloo" -spec: - pytorchReplicaSpecs: - Master: - replicas: 1 - restartPolicy: OnFailure - template: - spec: - containers: - - name: pytorch - image: kubeflow/pytorch-dist-mnist:latest - args: ["--backend", "gloo"] - # Comment out the below resources to use the CPU. - resources: - limits: - nvidia.com/gpu: 1 - Worker: - replicas: 1 - restartPolicy: OnFailure - template: - spec: - containers: - - name: pytorch - image: kubeflow/pytorch-dist-mnist:latest - args: ["--backend", "gloo"] - # Comment out the below resources to use the CPU. - resources: - limits: - nvidia.com/gpu: 1 diff --git a/examples/pytorch/mnist/v1/pytorch_job_mnist_mpi.yaml b/examples/pytorch/mnist/v1/pytorch_job_mnist_mpi.yaml deleted file mode 100644 index 53b8da80ea..0000000000 --- a/examples/pytorch/mnist/v1/pytorch_job_mnist_mpi.yaml +++ /dev/null @@ -1,32 +0,0 @@ -apiVersion: "kubeflow.org/v1" -kind: "PyTorchJob" -metadata: - name: "pytorch-dist-mnist-mpi" -spec: - pytorchReplicaSpecs: - Master: - replicas: 1 - restartPolicy: OnFailure - template: - spec: - containers: - - name: pytorch - image: kubeflow/pytorch-dist-mnist:latest - args: ["--backend", "mpi"] - # Comment out the below resources to use the CPU. - resources: - limits: - nvidia.com/gpu: 1 - Worker: - replicas: 1 - restartPolicy: OnFailure - template: - spec: - containers: - - name: pytorch - image: kubeflow/pytorch-dist-mnist:latest - args: ["--backend", "mpi"] - # Comment out the below resources to use the CPU. - resources: - limits: - nvidia.com/gpu: 1 diff --git a/examples/pytorch/mnist/v1/pytorch_job_mnist_nccl.yaml b/examples/pytorch/mnist/v1/pytorch_job_mnist_nccl.yaml deleted file mode 100644 index 0807abe32f..0000000000 --- a/examples/pytorch/mnist/v1/pytorch_job_mnist_nccl.yaml +++ /dev/null @@ -1,30 +0,0 @@ -apiVersion: "kubeflow.org/v1" -kind: "PyTorchJob" -metadata: - name: "pytorch-dist-mnist-nccl" -spec: - pytorchReplicaSpecs: - Master: - replicas: 1 - restartPolicy: OnFailure - template: - spec: - containers: - - name: pytorch - image: kubeflow/pytorch-dist-mnist:latest - args: ["--backend", "nccl"] - resources: - limits: - nvidia.com/gpu: 1 - Worker: - replicas: 1 - restartPolicy: OnFailure - template: - spec: - containers: - - name: pytorch - image: kubeflow/pytorch-dist-mnist:latest - args: ["--backend", "nccl"] - resources: - limits: - nvidia.com/gpu: 1 diff --git a/examples/pytorch/simple.yaml b/examples/pytorch/simple.yaml deleted file mode 100644 index 232d0ed69f..0000000000 --- a/examples/pytorch/simple.yaml +++ /dev/null @@ -1,33 +0,0 @@ -apiVersion: "kubeflow.org/v1" -kind: PyTorchJob -metadata: - name: pytorch-simple - namespace: kubeflow -spec: - pytorchReplicaSpecs: - Master: - replicas: 1 - restartPolicy: OnFailure - template: - spec: - containers: - - name: pytorch - image: docker.io/kubeflowkatib/pytorch-mnist:v1beta1-45c5727 - imagePullPolicy: Always - command: - - "python3" - - "/opt/pytorch-mnist/mnist.py" - - "--epochs=1" - Worker: - replicas: 1 - restartPolicy: OnFailure - template: - spec: - containers: - - name: pytorch - image: docker.io/kubeflowkatib/pytorch-mnist:v1beta1-45c5727 - imagePullPolicy: Always - command: - - "python3" - - "/opt/pytorch-mnist/mnist.py" - - "--epochs=1" diff --git a/examples/pytorch/smoke-dist/Dockerfile b/examples/pytorch/smoke-dist/Dockerfile deleted file mode 100644 index 2760ee2e6f..0000000000 --- a/examples/pytorch/smoke-dist/Dockerfile +++ /dev/null @@ -1,8 +0,0 @@ -# We need to use the nvcr.io/nvidia/pytorch image as a base image to support both linux/amd64 and linux_arm64 platforms. -# PyTorch=2.2.0, cuda=12.3.2 -# Ref: https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-24-01.html#rel-24-01 -FROM nvcr.io/nvidia/pytorch:24.01-py3 - -RUN mkdir -p /opt/mlkube -COPY examples/pytorch/smoke-dist/dist_sendrecv.py /opt/mlkube/ -ENTRYPOINT ["python", "/opt/mlkube/dist_sendrecv.py"] diff --git a/examples/pytorch/smoke-dist/README.md b/examples/pytorch/smoke-dist/README.md deleted file mode 100644 index ccfc1c928c..0000000000 --- a/examples/pytorch/smoke-dist/README.md +++ /dev/null @@ -1,17 +0,0 @@ -### Distributed send/recv e2e test - -This folder containers Dockerfile and distributed send/recv test. - -**Build Image** - -The default image name and tag is `kubeflow/pytorch-dist-sendrecv-test:1.0`. - -```shell -docker build -f Dockerfile -t kubeflow/pytorch-dist-sendrecv-test:1.0 ./ -``` - -**Create the PyTorch job** - -``` -kubectl create -f ./pytorch_job_sendrecv.yaml -``` diff --git a/examples/pytorch/smoke-dist/dist_sendrecv.py b/examples/pytorch/smoke-dist/dist_sendrecv.py deleted file mode 100644 index 287f38b284..0000000000 --- a/examples/pytorch/smoke-dist/dist_sendrecv.py +++ /dev/null @@ -1,56 +0,0 @@ -import logging -import os - -import torch -import torch.distributed as dist - - -def run(): - """Simple Send/Recv for testing Master <--> Workers communication""" - rank = dist.get_rank() - size = dist.get_world_size() - inp = torch.randn(2, 2) - result = torch.zeros(2, 2) - if rank == 0: - # Send the input tensor to all workers - for i in range(1, size): - dist.send(tensor=inp, dst=i) - # Receive the result tensor from all workers - dist.recv(tensor=result, src=i) - logging.info("Result from worker %d : %s", i, result) - else: - # Receive input tensor from master - dist.recv(tensor=inp, src=0) - # Elementwise tensor multiplication - result = torch.mul(inp, inp) - # Send the result tensor back to master - dist.send(tensor=result, dst=0) - - -def init_processes(fn, backend="gloo"): - """Initialize the distributed environment.""" - dist.init_process_group(backend) - fn() - - -def main(): - logging.info("Torch version: %s", torch.__version__) - - port = os.environ.get("MASTER_PORT", "{}") - logging.info("MASTER_PORT: %s", port) - - addr = os.environ.get("MASTER_ADDR", "{}") - logging.info("MASTER_ADDR: %s", addr) - - world_size = os.environ.get("WORLD_SIZE", "{}") - logging.info("WORLD_SIZE: %s", world_size) - - rank = os.environ.get("RANK", "{}") - logging.info("RANK: %s", rank) - - init_processes(run) - - -if __name__ == "__main__": - logging.getLogger().setLevel(logging.INFO) - main() diff --git a/examples/pytorch/smoke-dist/pytorch_job_sendrecv.yaml b/examples/pytorch/smoke-dist/pytorch_job_sendrecv.yaml deleted file mode 100644 index fc19bc210e..0000000000 --- a/examples/pytorch/smoke-dist/pytorch_job_sendrecv.yaml +++ /dev/null @@ -1,22 +0,0 @@ -apiVersion: "kubeflow.org/v1" -kind: "PyTorchJob" -metadata: - name: "pytorch-dist-basic-sendrecv" -spec: - pytorchReplicaSpecs: - Master: - replicas: 1 - restartPolicy: OnFailure - template: - spec: - containers: - - name: pytorch - image: kubeflow/pytorch-dist-sendrecv-test:latest - Worker: - replicas: 3 - restartPolicy: OnFailure - template: - spec: - containers: - - name: pytorch - image: kubeflow/pytorch-dist-sendrecv-test:latest diff --git a/examples/pytorch/text-classification/Fine-Tune-BERT-LLM.ipynb b/examples/pytorch/text-classification/Fine-Tune-BERT-LLM.ipynb deleted file mode 100644 index d5afd1910c..0000000000 --- a/examples/pytorch/text-classification/Fine-Tune-BERT-LLM.ipynb +++ /dev/null @@ -1,882 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Fine-Tune BERT LLM for Sentiment Analysis with Kubeflow PyTorchJob" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This Notebook will fine-tune Bidirectional Encoder Representations from Transformers (BERT) model with Yelp dataset to analyze text sentiment using distributed training with [Kubeflow PyTorchJob](https://www.kubeflow.org/docs/components/training/overview/).\n", - "\n", - "Pretrained BERT model: https://huggingface.co/google-bert/bert-base-cased\n", - "\n", - "Yelp review full dataset: https://huggingface.co/datasets/yelp_review_full\n", - "\n", - "This Notebook requires:\n", - "\n", - "- At least **3 GPU** on your Kubernetes cluster to fine-tune BERT model on 3 workers.\n", - "- AWS S3 bucket to export fine-tuned model.\n", - "\n", - "This example is based on [the HuggingFace fine-tuning tutorial](https://huggingface.co/docs/transformers/en/training)." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "tags": [] - }, - "source": [ - "## Install required packages\n", - "\n", - "We need to install HuggingFace packages to run this Notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "!pip install transformers datasets boto3\n", - "\n", - "!pip install git+https://github.com/kubeflow/training-operator.git#subdirectory=sdk/python" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "tags": [] - }, - "source": [ - "## Get samples from Yelp reviews dataset\n", - "\n", - "The Yelp reviews full star dataset is constructed by randomly taking 130,000 training samples and 10,000 testing samples for each review star from 1 to 5.\n", - "\n", - "In total there are 650,000 training samples and 50,000 testing samples.\n", - "\n", - "We are going to use this dataset to fine-tune BERT model." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'label': 4,\n", - " 'text': \"Top notch doctor in a top notch practice. Can't say I am surprised \"\n", - " 'when I was referred to him by another doctor who I think is '\n", - " 'wonderful and because he went to one of the best medical schools in '\n", - " 'the country. \\\\nIt is really easy to get an appointment. There is '\n", - " 'minimal wait to be seen and his bedside manner is great.'}\n", - "{'label': 1,\n", - " 'text': 'Average run of the mill store. Associates are young teens and they '\n", - " \"really don't know where anything is. Luckily I am able to get \"\n", - " 'around to find everything. Found my puppy treats and moved on.'}\n" - ] - } - ], - "source": [ - "from pprint import pprint\n", - "\n", - "from datasets import load_dataset\n", - "\n", - "# Test only 100 samples in the Notebook.\n", - "dataset = load_dataset(\"yelp_review_full\", split=\"train[:100]\")\n", - "\n", - "# Print some test data.\n", - "pprint(dataset[5])\n", - "pprint(dataset[30])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Create script to fine-tune BERT model\n", - "\n", - "We need to wrap our fine-tuning script in a function to create Kubeflow PyTorchJob." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "def train_func(parameters):\n", - " import os\n", - "\n", - " import boto3\n", - " import evaluate\n", - " import numpy as np\n", - " from datasets import load_dataset\n", - " from datasets.distributed import split_dataset_by_node\n", - " from transformers import (\n", - " AutoModelForSequenceClassification,\n", - " AutoTokenizer,\n", - " Trainer,\n", - " TrainingArguments,\n", - " )\n", - "\n", - " # [1] Download BERT model, tokenizer, and Yelp dataset.\n", - " print(\"-\" * 40)\n", - " print(\"Download BERT Model\")\n", - " model = AutoModelForSequenceClassification.from_pretrained(\n", - " \"bert-base-cased\",\n", - " num_labels=5,\n", - " )\n", - " tokenizer = AutoTokenizer.from_pretrained(\"bert-base-cased\")\n", - "\n", - " print(\"-\" * 40)\n", - " print(\"Download Yelp Review Dataset\")\n", - "\n", - " # Use only 4000 data samples to reduce tokenization and training time.\n", - " # Training samples - 3600, test samples - 400\n", - " # Remove split to take all samples: dataset = load_dataset(\"yelp_review_full\")\n", - " dataset = load_dataset(\"yelp_review_full\", split=\"train[:4000]\")\n", - " dataset = dataset.train_test_split(test_size=0.1, stratify_by_column=\"label\")\n", - "\n", - " # [2] Preprocess dataset.\n", - " def tokenize_function(examples):\n", - " return tokenizer(examples[\"text\"], padding=\"max_length\", truncation=True)\n", - "\n", - " # Map Yelp review dataset to BERT tokenizer.\n", - " print(\"-\" * 40)\n", - " print(\"Map Yelp review dataset to BERT Tokenizer\")\n", - " tokenized_ds = dataset.map(tokenize_function, batched=True)\n", - "\n", - " # Distribute train and test datasets between PyTorch workers.\n", - " # Every worker will process chunk of training data.\n", - " # RANK and WORLD_SIZE will be set by Kubeflow Training Operator.\n", - " RANK = int(os.environ[\"RANK\"])\n", - " WORLD_SIZE = int(os.environ[\"WORLD_SIZE\"])\n", - " distributed_ds_train = split_dataset_by_node(\n", - " tokenized_ds[\"train\"],\n", - " rank=RANK,\n", - " world_size=WORLD_SIZE,\n", - " )\n", - " distributed_ds_test = split_dataset_by_node(\n", - " tokenized_ds[\"test\"],\n", - " rank=RANK,\n", - " world_size=WORLD_SIZE,\n", - " )\n", - "\n", - " # Evaluate accuracy.\n", - " metric = evaluate.load(\"accuracy\")\n", - "\n", - " def compute_metrics(eval_pred):\n", - " logits, labels = eval_pred\n", - " predictions = np.argmax(logits, axis=-1)\n", - " return metric.compute(predictions=predictions, references=labels)\n", - "\n", - " # [3] Define Training args.\n", - " training_args = TrainingArguments(\n", - " output_dir=\"test_trainer\",\n", - " evaluation_strategy=\"epoch\",\n", - " disable_tqdm=True,\n", - " log_level=\"info\",\n", - " )\n", - "\n", - " # [4] Define Trainer.\n", - " trainer = Trainer(\n", - " model=model,\n", - " args=training_args,\n", - " train_dataset=distributed_ds_train,\n", - " eval_dataset=distributed_ds_test,\n", - " compute_metrics=compute_metrics,\n", - " )\n", - "\n", - " # [5] Fine-tune model.\n", - " print(\"-\" * 40)\n", - " print(f\"Start Distributed Training. RANK: {RANK} WORLD_SIZE: {WORLD_SIZE}\")\n", - "\n", - " trainer.train()\n", - "\n", - " print(\"-\" * 40)\n", - " print(\"Training is complete\")\n", - "\n", - " # [6] Export trained model to S3 from the worker with RANK = 0.\n", - " if RANK == 0:\n", - " trainer.save_model(\"./bert\")\n", - " s3 = boto3.resource(\"s3\")\n", - " bucket = s3.Bucket(parameters[\"BUCKET\"])\n", - " bucket.upload_file(\"bert/config.json\", \"bert/config.json\")\n", - " bucket.upload_file(\"bert/model.safetensors\", \"bert/model.safetensors\")\n", - "\n", - " print(\"-\" * 40)\n", - " print(\"Model is exported to S3\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Create Kubeflow PyTorchJob to fine-tune BERT on GPUs\n", - "\n", - "Use `TrainingClient()` to create PyTorchJob which will fine-tune BERT on **3 workers** using **1 GPU** for each worker.\n", - "\n", - "Your Kubernetes cluster should have sufficient **GPU** resources available." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "import uuid\n", - "from kubeflow.training import TrainingClient\n", - "\n", - "job_name = \"fine-tune-bert\"\n", - "\n", - "# Replace `kubeflow-examples` with your AWS S3 bucket.\n", - "bucket = \"kubeflow-examples\"" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# Create PyTorchJob\n", - "TrainingClient().create_job(\n", - " name=job_name,\n", - " train_func=train_func,\n", - " parameters={\"BUCKET\": bucket},\n", - " num_workers=3, # Number of PyTorch workers to use.\n", - " resources_per_worker={\n", - " \"cpu\": \"4\",\n", - " \"memory\": \"10G\",\n", - " \"gpu\": \"1\",\n", - " },\n", - " packages_to_install=[\n", - " \"boto3\",\n", - " \"transformers\",\n", - " \"datasets\",\n", - " \"evaluate\",\n", - " \"accelerate\",\n", - " \"scikit-learn\",\n", - " ], # PIP packages will be installed during PyTorchJob runtime.\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "tags": [] - }, - "source": [ - "### Check the PyTorchJob conditions\n", - "\n", - "Use `TrainingClient()` APIs to get information about created PyTorchJob." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "PyTorchJob Conditions\n", - "[{'last_transition_time': datetime.datetime(2024, 3, 15, 16, 31, 30, tzinfo=tzutc()),\n", - " 'last_update_time': datetime.datetime(2024, 3, 15, 16, 31, 30, tzinfo=tzutc()),\n", - " 'message': 'PyTorchJob fine-tune-bert is created.',\n", - " 'reason': 'PyTorchJobCreated',\n", - " 'status': 'True',\n", - " 'type': 'Created'}, {'last_transition_time': datetime.datetime(2024, 3, 15, 16, 31, 31, tzinfo=tzutc()),\n", - " 'last_update_time': datetime.datetime(2024, 3, 15, 16, 31, 31, tzinfo=tzutc()),\n", - " 'message': 'PyTorchJob fine-tune-bert is running.',\n", - " 'reason': 'PyTorchJobRunning',\n", - " 'status': 'True',\n", - " 'type': 'Running'}]\n", - "----------------------------------------\n", - "PyTorchJob is running\n" - ] - } - ], - "source": [ - "print(\"PyTorchJob Conditions\")\n", - "print(TrainingClient().get_job_conditions(job_name))\n", - "print(\"-\" * 40)\n", - "\n", - "# Wait until PyTorchJob has Running condition.\n", - "job = TrainingClient().wait_for_job_conditions(\n", - " job_name,\n", - " expected_conditions={\"Running\"},\n", - ")\n", - "print(\"PyTorchJob is running\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the PyTorchJob pod names\n", - "\n", - "Since we set 3 workers, PyTorchJob will create 1 master pod and 2 worker pods to execute distributed training." - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "['fine-tune-bert-master-0',\n", - " 'fine-tune-bert-worker-0',\n", - " 'fine-tune-bert-worker-1']" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "TrainingClient().get_job_pod_names(job_name)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "execution": { - "iopub.status.busy": "2022-09-01T20:10:25.759950Z", - "iopub.status.idle": "2022-09-01T20:10:25.760581Z", - "shell.execute_reply": "2022-09-01T20:10:25.760353Z", - "shell.execute_reply.started": "2022-09-01T20:10:25.760328Z" - }, - "tags": [] - }, - "source": [ - "### Get the PyTorchJob training logs\n", - "\n", - "Every worker processes 1200 training samples on each epoch since we distribute 3600 training samples across 3 workers." - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Pod fine-tune-bert-master-0]: WARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\n", - "[Pod fine-tune-bert-master-0]: WARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\n", - "[Pod fine-tune-bert-master-0]: ----------------------------------------\n", - "[Pod fine-tune-bert-master-0]: Download BERT Model\n", - "[Pod fine-tune-bert-master-0]: Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']\n", - "[Pod fine-tune-bert-master-0]: You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n", - "[Pod fine-tune-bert-master-0]: ----------------------------------------\n", - "[Pod fine-tune-bert-master-0]: Download Yelp Review Dataset\n", - "Downloading readme: 100%|██████████| 6.72k/6.72k [00:00<00:00, 30.2MB/s]\n", - "Downloading data: 100%|██████████| 299M/299M [00:05<00:00, 59.7MB/s] \n", - "Downloading data: 100%|██████████| 23.5M/23.5M [00:00<00:00, 51.6MB/s]\n", - "Generating train split: 100%|██████████| 650000/650000 [00:01<00:00, 368141.59 examples/s]\n", - "Generating test split: 100%|██████████| 50000/50000 [00:00<00:00, 360107.08 examples/s]\n", - "[Pod fine-tune-bert-master-0]: ----------------------------------------\n", - "[Pod fine-tune-bert-master-0]: Map Yelp review dataset to BERT Tokenizer\n", - "Map: 100%|██████████| 3600/3600 [00:01<00:00, 2452.88 examples/s]\n", - "Map: 100%|██████████| 400/400 [00:00<00:00, 2591.52 examples/s]\n", - "Downloading builder script: 100%|██████████| 4.20k/4.20k [00:00<00:00, 15.9MB/s]\n", - "[Pod fine-tune-bert-master-0]: /opt/conda/lib/python3.10/site-packages/accelerate/state.py:313: UserWarning: OMP_NUM_THREADS/MKL_NUM_THREADS unset, we set it at 16 to improve oob performance.\n", - "[Pod fine-tune-bert-master-0]: warnings.warn(\n", - "[Pod fine-tune-bert-master-0]: /opt/conda/lib/python3.10/site-packages/accelerate/accelerator.py:432: FutureWarning: Passing the following arguments to `Accelerator` is deprecated and will be removed in version 1.0 of Accelerate: dict_keys(['dispatch_batches', 'split_batches', 'even_batches', 'use_seedable_sampler']). Please pass an `accelerate.DataLoaderConfiguration` instead: \n", - "[Pod fine-tune-bert-master-0]: dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)\n", - "[Pod fine-tune-bert-master-0]: warnings.warn(\n", - "[Pod fine-tune-bert-master-0]: ----------------------------------------\n", - "[Pod fine-tune-bert-master-0]: Start Distributed Training. RANK: 0 WORLD_SIZE: 3\n", - "[Pod fine-tune-bert-master-0]: The following columns in the training set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `BertForSequenceClassification.forward`, you can safely ignore this message.\n", - "[Pod fine-tune-bert-master-0]: ***** Running training *****\n", - "[Pod fine-tune-bert-master-0]: Num examples = 1,200\n", - "[Pod fine-tune-bert-master-0]: Num Epochs = 3\n", - "[Pod fine-tune-bert-master-0]: Instantaneous batch size per device = 8\n", - "[Pod fine-tune-bert-master-0]: Total train batch size (w. parallel, distributed & accumulation) = 24\n", - "[Pod fine-tune-bert-master-0]: Gradient Accumulation steps = 1\n", - "[Pod fine-tune-bert-master-0]: Total optimization steps = 150\n", - "[Pod fine-tune-bert-master-0]: Number of trainable parameters = 108,314,117\n", - "[Pod fine-tune-bert-master-0]: [W reducer.cpp:1346] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())\n", - "[Pod fine-tune-bert-master-0]: The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `BertForSequenceClassification.forward`, you can safely ignore this message.\n", - "[Pod fine-tune-bert-master-0]: ***** Running Evaluation *****\n", - "[Pod fine-tune-bert-master-0]: Num examples = 134\n", - "[Pod fine-tune-bert-master-0]: Batch size = 8\n", - "[Pod fine-tune-bert-master-0]: {'eval_loss': 1.0521148443222046, 'eval_accuracy': 0.5746268656716418, 'eval_runtime': 0.5213, 'eval_samples_per_second': 257.033, 'eval_steps_per_second': 11.509, 'epoch': 1.0}\n", - "[Pod fine-tune-bert-master-0]: The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `BertForSequenceClassification.forward`, you can safely ignore this message.\n", - "[Pod fine-tune-bert-master-0]: ***** Running Evaluation *****\n", - "[Pod fine-tune-bert-master-0]: Num examples = 134\n", - "[Pod fine-tune-bert-master-0]: Batch size = 8\n", - "[Pod fine-tune-bert-master-0]: {'eval_loss': 0.9855704307556152, 'eval_accuracy': 0.5895522388059702, 'eval_runtime': 0.5239, 'eval_samples_per_second': 255.763, 'eval_steps_per_second': 11.452, 'epoch': 2.0}\n", - "[Pod fine-tune-bert-master-0]: The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `BertForSequenceClassification.forward`, you can safely ignore this message.\n", - "[Pod fine-tune-bert-master-0]: ***** Running Evaluation *****\n", - "[Pod fine-tune-bert-master-0]: Num examples = 134\n", - "[Pod fine-tune-bert-master-0]: Batch size = 8\n", - "[Pod fine-tune-bert-master-0]: {'eval_loss': 0.9247522354125977, 'eval_accuracy': 0.6492537313432836, 'eval_runtime': 0.527, 'eval_samples_per_second': 254.259, 'eval_steps_per_second': 11.385, 'epoch': 3.0}\n", - "[Pod fine-tune-bert-master-0]: Training completed. Do not forget to share your model on huggingface.co/models =)\n", - "[Pod fine-tune-bert-master-0]: {'train_runtime': 73.331, 'train_samples_per_second': 49.092, 'train_steps_per_second': 2.046, 'train_loss': 1.0898309326171876, 'epoch': 3.0}\n", - "[Pod fine-tune-bert-master-0]: ----------------------------------------\n", - "[Pod fine-tune-bert-master-0]: Training is complete\n", - "[Pod fine-tune-bert-master-0]: Saving model checkpoint to ./bert\n", - "[Pod fine-tune-bert-master-0]: Configuration saved in ./bert/config.json\n", - "[Pod fine-tune-bert-master-0]: Model weights saved in ./bert/model.safetensors\n", - "[Pod fine-tune-bert-master-0]: ----------------------------------------\n", - "[Pod fine-tune-bert-master-0]: Model is exported to S3\n" - ] - } - ], - "source": [ - "logs, _ = TrainingClient().get_job_logs(job_name, follow=True)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Download the fine-tuned model\n", - "\n", - "We can download our fine-tuned BERT model from S3 to evaluate it." - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "import boto3\n", - "import os\n", - "\n", - "s3 = boto3.resource(\"s3\")\n", - "bucket = s3.Bucket(bucket)\n", - "\n", - "# config.json is the model metadata.\n", - "# model.safetensors is the model weights & biases.\n", - "if not os.path.exists(\"bert\"):\n", - " os.makedirs(\"bert\")\n", - "bucket.download_file(\"bert/config.json\", \"bert/config.json\")\n", - "bucket.download_file(\"bert/model.safetensors\", \"bert/model.safetensors\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "tags": [] - }, - "source": [ - "### Test the fine-tuned BERT model\n", - "\n", - "We are going to use HuggingFace pipeline to test our model.\n", - "\n", - "We will ask for sentiment analysis task for our fine-tuned LLM." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "This is one of the best restaurants I've ever been to.\n", - "Star: 4\n", - "Score: 0.806443452835083\n", - "---------------------------\n", - "\n", - "\n", - "I am upset by using this service. It is very expensive and quality is bad.\n", - "Star: 1\n", - "Score: 0.6581875085830688\n", - "---------------------------\n" - ] - } - ], - "source": [ - "from transformers import AutoTokenizer, pipeline\n", - "\n", - "# During fine-tuning BERT tokenizer is not changed.\n", - "tokenizer = AutoTokenizer.from_pretrained(\"bert-base-cased\")\n", - "\n", - "# Use pipeline with sentiment-analysis task to evaluate our model.\n", - "nlp = pipeline(\"sentiment-analysis\", model=\"./bert\", tokenizer=tokenizer)\n", - "\n", - "good_review = \"This is one of the best restaurants I've ever been to.\"\n", - "bad_review = \"I am upset by using this service. It is very expensive and quality is bad.\"\n", - "\n", - "print(good_review)\n", - "res = nlp(good_review)\n", - "\n", - "print(\"Star: \", res[0][\"label\"][6])\n", - "print(\"Score: \", res[0][\"score\"])\n", - "print(\"---------------------------\\n\\n\")\n", - "\n", - "\n", - "print(bad_review)\n", - "res = nlp(bad_review)\n", - "\n", - "print(\"Star: \", res[0][\"label\"][6])\n", - "print(\"Score: \", res[0][\"score\"])\n", - "print(\"---------------------------\")" - ] - }, - { - "attachments": { - "348c13f1-f7df-4148-9c2e-268c05dc1d16.png": { - "image/png": "" - } - }, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Use Train API to Fine-Tune BERT LLM\n", - "\n", - "Kubeflow Training Operator SDK implements a `train` API to effectively fine-tune LLMs on multiple PyTorchJob workers with required configuration. It uses storage initializer to download pre-trained model and dataset, and distribute it across PyTorchJob workers using shared PVCs. After initialization step, pre-created HuggingFace LLM trainer will be executed on each PyTorchJob worker to fine-tune BERT model.\n", - "\n", - "This feature is in **Development Phase**, please provide your feedback by creating [the GitHub issues](https://github.com/kubeflow/training-operator/issues/new) or by using [the Kubeflow Slack channel #kubeflow-training-operator](https://kubeflow.slack.com/archives/C985VJN9F).\n", - "\n", - "To learn more about it check [this proposal](https://github.com/kubeflow/training-operator/blob/master/docs/proposals/train_api_proposal.md).\n", - "\n", - "**TODO (andreyvelich)**: Add docs link when they are ready.\n", - "\n", - "![train-api.png](attachment:348c13f1-f7df-4148-9c2e-268c05dc1d16.png)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Install Training Operator SDK to use `train` API\n", - "\n", - "You have to install `kubeflow-training` SDK with the HuggingFace dependencies to use `train` API.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!pip install \"kubeflow-training[huggingface] @ git+https://github.com/kubeflow/training-operator.git#subdirectory=sdk/python\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create PyTorchJob using train API\n", - "\n", - "If your Kubernetes environment [supports `ReadOnlyMany` and `ReadWriteOnce` access modes](https://kubernetes.io/docs/concepts/storage/persistent-volumes/#access-modes) for PersistentVolumeClaims (PVCs), you can use more than 1 PyTorchJob worker in `train` API." - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [], - "source": [ - "from kubeflow.training import TrainingClient\n", - "from kubeflow.storage_initializer.hugging_face import (\n", - " HuggingFaceModelParams,\n", - " HuggingFaceTrainerParams,\n", - " HuggingFaceDatasetParams,\n", - ")\n", - "\n", - "import transformers\n", - "from peft import LoraConfig\n", - "\n", - "job_name_train_api = \"fine-tune-bert-train-api\"\n", - "\n", - "# Set TOKENIZERS_PARALLELISM = false to avoid warnings from Transformers.\n", - "os.environ[\"TOKENIZERS_PARALLELISM\"] = \"false\"" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [], - "source": [ - "# In this example we will use 1 worker and 1 GPU to fine-tune BERT with `train` API.\n", - "TrainingClient().train(\n", - " name=job_name_train_api,\n", - " num_workers=1, # nnodes parameter for torchrun command.\n", - " num_procs_per_worker=1, # nproc-per-node parameter for torchrun command.\n", - " # BERT model URI and type of Transformer to train it.\n", - " model_provider_parameters=HuggingFaceModelParams(\n", - " model_uri=\"hf://google-bert/bert-base-cased\",\n", - " transformer_type=transformers.AutoModelForSequenceClassification,\n", - " ),\n", - " storage_config={\n", - " \"access_modes\": [\"ReadWriteOnce\"] # Since we use 1 Worker, PVC access mode is ReadWriteOnce.\n", - " },\n", - " # Use 3000 samples from Yelp dataset.\n", - " dataset_provider_parameters=HuggingFaceDatasetParams(\n", - " repo_id=\"yelp_review_full\",\n", - " split=\"train[:3000]\",\n", - " ),\n", - " # Specify HuggingFace Trainer parameters. In this example, we will skip evaluation and model checkpoints.\n", - " trainer_parameters=HuggingFaceTrainerParams(\n", - " training_parameters=transformers.TrainingArguments(\n", - " output_dir=\"test_trainer\",\n", - " save_strategy=\"no\",\n", - " evaluation_strategy=\"no\",\n", - " do_eval=False,\n", - " disable_tqdm=True,\n", - " log_level=\"info\",\n", - " ),\n", - " # Set LoRA config to reduce number of trainable model parameters. \n", - " lora_config=LoraConfig(\n", - " r=8,\n", - " lora_alpha=8,\n", - " lora_dropout=0.1,\n", - " bias=\"none\",\n", - " ),\n", - " ),\n", - " resources_per_worker={\n", - " \"gpu\": 1,\n", - " \"cpu\": 5,\n", - " \"memory\": \"10G\",\n", - " },\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the PyTorchJob containers\n", - "\n", - "When using `train` API, every PyTorchJob worker (Kubernetes Pod) should have `storage-initialize` initContainer and volume.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "PyTorchJob Init Containers\n", - "storage-initializer\n", - "----------------------------------------\n", - "PyTorchJob Volumes\n", - "storage-initializer\n" - ] - } - ], - "source": [ - "pytorchjob = TrainingClient().get_job(job_name_train_api)\n", - "\n", - "print(\"PyTorchJob Init Containers\")\n", - "for c in pytorchjob.spec.pytorch_replica_specs[\"Master\"].template.spec.init_containers:\n", - " print(c.name)\n", - "\n", - "print(\"-\" * 40)\n", - "\n", - "print(\"PyTorchJob Volumes\")\n", - "for v in pytorchjob.spec.pytorch_replica_specs[\"Master\"].template.spec.volumes:\n", - " print(v.name)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get the PyTorchJob training logs\n", - "\n", - "Use the same API to get created PyTorchJob logs.\n", - "\n", - "Since we used LoRA config, number of trainable parameters is smaller: **294 912**" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Pod fine-tune-bert-train-api-master-0]: 2024-03-15T16:45:47Z INFO Starting HuggingFace LLM Trainer\n", - "[Pod fine-tune-bert-train-api-master-0]: /usr/local/lib/python3.10/dist-packages/transformers/training_args.py:1741: FutureWarning: `--push_to_hub_token` is deprecated and will be removed in version 5 of 🤗 Transformers. Use `--hub_token` instead.\n", - "[Pod fine-tune-bert-train-api-master-0]: warnings.warn(\n", - "[Pod fine-tune-bert-train-api-master-0]: 2024-03-15T16:45:47Z INFO Setup model and tokenizer\n", - "[Pod fine-tune-bert-train-api-master-0]: Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']\n", - "[Pod fine-tune-bert-train-api-master-0]: You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n", - "[Pod fine-tune-bert-train-api-master-0]: 2024-03-15T16:45:48Z INFO Preprocess dataset\n", - "[Pod fine-tune-bert-train-api-master-0]: 2024-03-15T16:45:48Z INFO Load and preprocess dataset\n", - "[Pod fine-tune-bert-train-api-master-0]: 2024-03-15T16:45:48Z INFO Dataset specification: Dataset({\n", - "[Pod fine-tune-bert-train-api-master-0]: features: ['label', 'text'],\n", - "[Pod fine-tune-bert-train-api-master-0]: num_rows: 3000\n", - "[Pod fine-tune-bert-train-api-master-0]: })\n", - "[Pod fine-tune-bert-train-api-master-0]: 2024-03-15T16:45:48Z INFO ----------------------------------------\n", - "[Pod fine-tune-bert-train-api-master-0]: 2024-03-15T16:45:48Z INFO Tokenize dataset\n", - "Map: 100%|██████████| 3000/3000 [00:01<00:00, 2759.84 examples/s]\n", - "[Pod fine-tune-bert-train-api-master-0]: 2024-03-15T16:45:51Z INFO Evaluation dataset is not found\n", - "[Pod fine-tune-bert-train-api-master-0]: 2024-03-15T16:45:51Z INFO Distributed dataset across PyTorchJob workers. WORLD_SIZE: 1, RANK: 0\n", - "[Pod fine-tune-bert-train-api-master-0]: 2024-03-15T16:45:51Z INFO Setup LoRA config for model\n", - "[Pod fine-tune-bert-train-api-master-0]: 2024-03-15T16:45:51Z INFO Start model training\n", - "[Pod fine-tune-bert-train-api-master-0]: /usr/local/lib/python3.10/dist-packages/accelerate/accelerator.py:432: FutureWarning: Passing the following arguments to `Accelerator` is deprecated and will be removed in version 1.0 of Accelerate: dict_keys(['dispatch_batches', 'split_batches']). Please pass an `accelerate.DataLoaderConfiguration` instead: \n", - "[Pod fine-tune-bert-train-api-master-0]: dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False)\n", - "[Pod fine-tune-bert-train-api-master-0]: warnings.warn(\n", - "[Pod fine-tune-bert-train-api-master-0]: The following columns in the training set don't have a corresponding argument in `PeftModel.forward` and have been ignored: text. If text are not expected by `PeftModel.forward`, you can safely ignore this message.\n", - "[Pod fine-tune-bert-train-api-master-0]: ***** Running training *****\n", - "[Pod fine-tune-bert-train-api-master-0]: Num examples = 3,000\n", - "[Pod fine-tune-bert-train-api-master-0]: Num Epochs = 3\n", - "[Pod fine-tune-bert-train-api-master-0]: Instantaneous batch size per device = 8\n", - "[Pod fine-tune-bert-train-api-master-0]: Total train batch size (w. parallel, distributed & accumulation) = 8\n", - "[Pod fine-tune-bert-train-api-master-0]: Gradient Accumulation steps = 1\n", - "[Pod fine-tune-bert-train-api-master-0]: Total optimization steps = 1,125\n", - "[Pod fine-tune-bert-train-api-master-0]: Number of trainable parameters = 294,912\n", - "[Pod fine-tune-bert-train-api-master-0]: [W reducer.cpp:1346] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())\n", - "[Pod fine-tune-bert-train-api-master-0]: 2024-03-15T16:45:47Z INFO Starting HuggingFace LLM Trainer\n", - "[Pod fine-tune-bert-train-api-master-0]: /usr/local/lib/python3.10/dist-packages/transformers/training_args.py:1741: FutureWarning: `--push_to_hub_token` is deprecated and will be removed in version 5 of 🤗 Transformers. Use `--hub_token` instead.\n", - "[Pod fine-tune-bert-train-api-master-0]: warnings.warn(\n", - "[Pod fine-tune-bert-train-api-master-0]: 2024-03-15T16:45:47Z INFO Setup model and tokenizer\n", - "[Pod fine-tune-bert-train-api-master-0]: Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']\n", - "[Pod fine-tune-bert-train-api-master-0]: You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n", - "[Pod fine-tune-bert-train-api-master-0]: 2024-03-15T16:45:48Z INFO Preprocess dataset\n", - "[Pod fine-tune-bert-train-api-master-0]: 2024-03-15T16:45:48Z INFO Load and preprocess dataset\n", - "[Pod fine-tune-bert-train-api-master-0]: 2024-03-15T16:45:48Z INFO Dataset specification: Dataset({\n", - "[Pod fine-tune-bert-train-api-master-0]: features: ['label', 'text'],\n", - "[Pod fine-tune-bert-train-api-master-0]: num_rows: 3000\n", - "[Pod fine-tune-bert-train-api-master-0]: })\n", - "[Pod fine-tune-bert-train-api-master-0]: 2024-03-15T16:45:48Z INFO ----------------------------------------\n", - "[Pod fine-tune-bert-train-api-master-0]: 2024-03-15T16:45:48Z INFO Tokenize dataset\n", - "Map: 100%|██████████| 3000/3000 [00:01<00:00, 2759.84 examples/s]\n", - "[Pod fine-tune-bert-train-api-master-0]: 2024-03-15T16:45:51Z INFO Evaluation dataset is not found\n", - "[Pod fine-tune-bert-train-api-master-0]: 2024-03-15T16:45:51Z INFO Distributed dataset across PyTorchJob workers. WORLD_SIZE: 1, RANK: 0\n", - "[Pod fine-tune-bert-train-api-master-0]: 2024-03-15T16:45:51Z INFO Setup LoRA config for model\n", - "[Pod fine-tune-bert-train-api-master-0]: 2024-03-15T16:45:51Z INFO Start model training\n", - "[Pod fine-tune-bert-train-api-master-0]: /usr/local/lib/python3.10/dist-packages/accelerate/accelerator.py:432: FutureWarning: Passing the following arguments to `Accelerator` is deprecated and will be removed in version 1.0 of Accelerate: dict_keys(['dispatch_batches', 'split_batches']). Please pass an `accelerate.DataLoaderConfiguration` instead: \n", - "[Pod fine-tune-bert-train-api-master-0]: dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False)\n", - "[Pod fine-tune-bert-train-api-master-0]: warnings.warn(\n", - "[Pod fine-tune-bert-train-api-master-0]: The following columns in the training set don't have a corresponding argument in `PeftModel.forward` and have been ignored: text. If text are not expected by `PeftModel.forward`, you can safely ignore this message.\n", - "[Pod fine-tune-bert-train-api-master-0]: ***** Running training *****\n", - "[Pod fine-tune-bert-train-api-master-0]: Num examples = 3,000\n", - "[Pod fine-tune-bert-train-api-master-0]: Num Epochs = 3\n", - "[Pod fine-tune-bert-train-api-master-0]: Instantaneous batch size per device = 8\n", - "[Pod fine-tune-bert-train-api-master-0]: Total train batch size (w. parallel, distributed & accumulation) = 8\n", - "[Pod fine-tune-bert-train-api-master-0]: Gradient Accumulation steps = 1\n", - "[Pod fine-tune-bert-train-api-master-0]: Total optimization steps = 1,125\n", - "[Pod fine-tune-bert-train-api-master-0]: Number of trainable parameters = 294,912\n", - "[Pod fine-tune-bert-train-api-master-0]: [W reducer.cpp:1346] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())\n", - "[Pod fine-tune-bert-train-api-master-0]: {'loss': 0.7481, 'learning_rate': 2.777777777777778e-05, 'epoch': 1.33}\n", - "[Pod fine-tune-bert-train-api-master-0]: {'loss': 0.9313, 'learning_rate': 5.555555555555556e-06, 'epoch': 2.67}\n", - "[Pod fine-tune-bert-train-api-master-0]: Training completed. Do not forget to share your model on huggingface.co/models =)\n", - "[Pod fine-tune-bert-train-api-master-0]: {'train_runtime': 234.849, 'train_samples_per_second': 38.322, 'train_steps_per_second': 4.79, 'train_loss': 0.8460628526475694, 'epoch': 3.0}\n", - "[Pod fine-tune-bert-train-api-master-0]: 2024-03-15T16:49:47Z INFO Training is complete\n" - ] - } - ], - "source": [ - "logs, _ = TrainingClient().get_job_logs(job_name_train_api, follow=True)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "execution": { - "iopub.execute_input": "2024-03-01T23:44:15.511173Z", - "iopub.status.busy": "2024-03-01T23:44:15.510932Z", - "iopub.status.idle": "2024-03-01T23:44:15.539921Z", - "shell.execute_reply": "2024-03-01T23:44:15.539352Z", - "shell.execute_reply.started": "2024-03-01T23:44:15.511155Z" - }, - "tags": [] - }, - "source": [ - "## Delete the PyTorchJobs\n", - "\n", - "You can delete the created PyTorchJobs." - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "TrainingClient().delete_job(name=job_name)" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [], - "source": [ - "TrainingClient().delete_job(name=job_name_train_api)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.8" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/examples/tensorflow/dist-mnist/Dockerfile b/examples/tensorflow/dist-mnist/Dockerfile deleted file mode 100644 index e7fd5b0563..0000000000 --- a/examples/tensorflow/dist-mnist/Dockerfile +++ /dev/null @@ -1,24 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -FROM tensorflow/tensorflow:2.17.0 - -# Using keras-2.17 because of bug on keras-3.4.1 -# which used by default by TF-2.17 (https://github.com/tensorflow/tensorflow/issues/72388) -ENV TF_USE_LEGACY_KERAS 1 -RUN pip install tf_keras - -ADD examples/tensorflow/dist-mnist/ /var/tf_dist_mnist -ADD examples/tensorflow/mnist_utils.py /var/tf_dist_mnist -ENTRYPOINT ["python", "/var/tf_dist_mnist/dist_mnist.py"] diff --git a/examples/tensorflow/dist-mnist/README.md b/examples/tensorflow/dist-mnist/README.md deleted file mode 100644 index 306df6c9b1..0000000000 --- a/examples/tensorflow/dist-mnist/README.md +++ /dev/null @@ -1,18 +0,0 @@ -### Distributed mnist model for e2e test - -This folder containers Dockerfile and distributed mnist model for e2e test. - -**Build Image** - -The default image name and tag is `kubeflow/tf-dist-mnist-test:1.0`. - -To build this image on x86_64: -```shell -docker build -f Dockerfile -t kubeflow/tf-dist-mnist-test:1.0 ./ -``` - -**Create TFJob YAML** - -``` -kubectl create -f ./tf_job_mnist.yaml -``` diff --git a/examples/tensorflow/dist-mnist/dist_mnist.py b/examples/tensorflow/dist-mnist/dist_mnist.py deleted file mode 100755 index a443a61dc3..0000000000 --- a/examples/tensorflow/dist-mnist/dist_mnist.py +++ /dev/null @@ -1,194 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Distributed MNIST training and validation, with model replicas, using Parameter Server Strategy. - -A Sequential model with a Flatten layer, a Dense layer (128 ReLU units), -Dropout for regularization, and a final Dense layer with 10 softmax units for classification. -The parameters (weights and biases) are located on one parameter server (ps), while the ops -are executed on two worker nodes by default. The TF sessions also run on the -worker node. -This script can be run with multiple workers and parameter servers, with at least -one chief, one worker, and one parameter server. - -The coordination between the multiple worker invocations occurs due to -the definition of the parameters on the same ps devices. The parameter updates -from one worker is visible to all other workers. As such, the workers can -perform forward computation and gradient calculation in parallel, which -should lead to increased training speed for the simple model. -""" - -import argparse -import os -import time - -import mnist_utils as helper -import tensorflow as tf - -args = None - - -def init_parser(): - global args - parser = argparse.ArgumentParser() - parser.add_argument( - "--data_path", - type=str, - default="mnist.npz", - help="Path where to cache the dataset locally (relative to ~/.keras/datasets).", - ) - parser.add_argument( - "--dropout", - type=float, - default=0.9, - help="Keep probability for training dropout", - ) - parser.add_argument( - "--batch_size", type=int, default=100, help="Training batch size" - ) - parser.add_argument( - "--learning_rate", type=float, default=0.001, help="Learning rate" - ) - parser.add_argument( - "--epochs", type=int, default=5, help="Number of epochs for training" - ) - parser.add_argument( - "--fake_data", - nargs="?", - const=True, - type=bool, - default=False, - help="If true, uses fake data for unit testing.", - ) - args = parser.parse_args() - print(f"Run script with {args=}") - - -def main(): - # Set the environment variable to allow reporting worker and ps failure to the - # coordinator. This is a workaround and won't be necessary in the future. - os.environ["GRPC_FAIL_FAST"] = "use_caller" - - cluster_resolver = tf.distribute.cluster_resolver.TFConfigClusterResolver() - - # Get the cluster specification - cluster_spec = cluster_resolver.cluster_spec() - - # Get the number of PS replicas (parameter servers) - if "ps" in cluster_spec.jobs: - num_ps = cluster_spec.num_tasks("ps") - print(f"Number of PS replicas: {num_ps}") - else: - raise Exception("No PS replicas found in the cluster configuration.") - - if cluster_resolver.task_type in ("worker", "ps"): - # Start a TensorFlow server and wait. - server = tf.distribute.Server( - cluster_spec, - job_name=cluster_resolver.task_type, - task_index=cluster_resolver.task_id, - protocol=cluster_resolver.rpc_layer or "grpc", - start=True, - ) - server.join() - else: - # Run the coordinator. - - # Configure ParameterServerStrategy - variable_partitioner = ( - tf.distribute.experimental.partitioners.MinSizePartitioner( - min_shard_bytes=(256 << 10), max_shards=num_ps - ) - ) - - strategy = tf.distribute.ParameterServerStrategy( - cluster_resolver, variable_partitioner=variable_partitioner - ) - - # Load and preprocess data - train_ds, test_ds = helper.load_data( - fake_data=args.fake_data, data_path=args.data_path, repeat=True - ) - train_ds = helper.preprocess(ds=train_ds, batch_size=args.batch_size) - test_ds = helper.preprocess(ds=test_ds, batch_size=args.batch_size) - - # Distribute training across workers - with strategy.scope(): - model = helper.build_model( - dropout=args.dropout, - learning_rate=args.learning_rate, - ) - - # Start training - time_begin = time.time() - print(f"Training begins @ {time.ctime(time_begin)}") - - model.fit( - train_ds, - batch_size=args.batch_size, - epochs=args.epochs, - steps_per_epoch=6000 // args.batch_size * 2, - ) - - time_end = time.time() - print(f"Training ends @ {time.ctime(time_end)}") - training_time = time_end - time_begin - print(f"Training elapsed time: {training_time} s") - - # Validation - coordinator = tf.distribute.coordinator.ClusterCoordinator(strategy) - with strategy.scope(): - eval_accuracy = tf.keras.metrics.Accuracy() - - @tf.function - def eval_step(iterator): - """ - Perform an evaluation step across replicas. - - Args: - iterator: An iterator for the evaluation dataset. - """ - - def replica_fn(batch_data, labels): - # Generates output predictions - pred = model(batch_data, training=False) - # Get the predicted class by taking the argmax over the class probabilities (axis=1) - predicted_class = tf.argmax(pred, axis=1, output_type=tf.int64) - eval_accuracy.update_state(labels, predicted_class) - - batch_data, labels = next(iterator) - # Run the function on all workers using strategy.run - strategy.run(replica_fn, args=(batch_data, labels)) - - # Prepare the per-worker evaluation dataset and iterator - per_worker_eval_dataset = coordinator.create_per_worker_dataset(test_ds) - per_worker_eval_iterator = iter(per_worker_eval_dataset) - - # Calculate evaluation steps per epoch (e.g., based on dataset size and batch size) - eval_steps_per_epoch = 10000 // args.batch_size * 2 - - # Loop through the evaluation steps, scheduling them across the workers - for _ in range(eval_steps_per_epoch): - coordinator.schedule(eval_step, args=(per_worker_eval_iterator,)) - - # Wait for all scheduled evaluation steps to complete - coordinator.join() - - # Print the evaluation result (accuracy) - print("Evaluation accuracy: %f" % eval_accuracy.result()) - - -if __name__ == "__main__": - init_parser() - main() diff --git a/examples/tensorflow/dist-mnist/tf_job_mnist.yaml b/examples/tensorflow/dist-mnist/tf_job_mnist.yaml deleted file mode 100644 index c97d03b700..0000000000 --- a/examples/tensorflow/dist-mnist/tf_job_mnist.yaml +++ /dev/null @@ -1,32 +0,0 @@ -apiVersion: "kubeflow.org/v1" -kind: "TFJob" -metadata: - name: "dist-mnist-for-e2e-test" -spec: - tfReplicaSpecs: - Chief: - replicas: 1 - restartPolicy: Never - template: - spec: - containers: - - name: tensorflow - image: kubeflow/tf-dist-mnist-test:latest - - PS: - replicas: 1 - restartPolicy: Never - template: - spec: - containers: - - name: tensorflow - image: kubeflow/tf-dist-mnist-test:latest - - Worker: - replicas: 2 - restartPolicy: Never - template: - spec: - containers: - - name: tensorflow - image: kubeflow/tf-dist-mnist-test:latest diff --git a/examples/tensorflow/distribution_strategy/Dockerfile b/examples/tensorflow/distribution_strategy/Dockerfile deleted file mode 100644 index 8f7cc4e93b..0000000000 --- a/examples/tensorflow/distribution_strategy/Dockerfile +++ /dev/null @@ -1,11 +0,0 @@ -FROM python:3.9 - -RUN apt-get update && \ - apt-get install -y libhdf5-dev && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* - -RUN pip install tensorflow==2.11.0 tensorflow_datasets==4.7.0 - -COPY examples/tensorflow/distribution_strategy/multi_worker_strategy-with-keras.py / -ENTRYPOINT ["python", "/multi_worker_strategy-with-keras.py", "--saved_model_dir", "/train/saved_model/", "--checkpoint_dir", "/train/checkpoint"] diff --git a/examples/tensorflow/distribution_strategy/README.md b/examples/tensorflow/distribution_strategy/README.md deleted file mode 100644 index 25d1ddbf09..0000000000 --- a/examples/tensorflow/distribution_strategy/README.md +++ /dev/null @@ -1,29 +0,0 @@ -# Multi-worker training with Keras - -This directory contains a example for running multi-worker distributed training -using Tensorflow 2.1 keras API on Kubeflow. For more information about the -source code, please see TensorFlow tutorials [here](https://www.tensorflow.org/tutorials/distribute/keras) and [here](https://www.tensorflow.org/tutorials/distribute/multi_worker_with_keras) - -## Prerequisite - -Your cluster must be configured to use Multiple GPUs, -please follow the [instructions](https://www.kubeflow.org/docs/components/training/tftraining/#using-gpus) - -## Steps - -1. Build a image - ``` - docker build -f Dockerfile -t kubeflow/multi_worker_strategy:v1.0 . - ``` - -2. Specify your storageClassName and create a persistent volume claim to save - models and checkpoints - ``` - kubectl -n ${NAMESPACE} create -f pvc.yaml - ``` - -3. Create a TFJob, if you use some GPUs other than NVIDIA, please replace - `nvidia.com/gpu` with your GPU vendor in the `limits` section. - ``` - kubectl -n ${NAMESPACE} create -f multi_worker_tfjob.yaml - ``` diff --git a/examples/tensorflow/distribution_strategy/multi_worker_strategy-with-keras.py b/examples/tensorflow/distribution_strategy/multi_worker_strategy-with-keras.py deleted file mode 100644 index 5326faf8d5..0000000000 --- a/examples/tensorflow/distribution_strategy/multi_worker_strategy-with-keras.py +++ /dev/null @@ -1,168 +0,0 @@ -# Copyright 2020 The Kubeflow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""An example of multi-worker training with Keras model using Strategy API.""" - -from __future__ import absolute_import, division, print_function - -import argparse -import json -import os - -import tensorflow as tf -import tensorflow_datasets as tfds -from tensorflow.keras import layers, models - - -def make_datasets_unbatched(): - BUFFER_SIZE = 10000 - - # Scaling MNIST data from (0, 255] to (0., 1.] - def scale(image, label): - image = tf.cast(image, tf.float32) - image /= 255 - return image, label - - datasets, _ = tfds.load(name="mnist", with_info=True, as_supervised=True) - - return datasets["train"].map(scale).cache().shuffle(BUFFER_SIZE) - - -def build_and_compile_cnn_model(): - model = models.Sequential() - model.add(layers.Conv2D(32, (3, 3), activation="relu", input_shape=(28, 28, 1))) - model.add(layers.MaxPooling2D((2, 2))) - model.add(layers.Conv2D(64, (3, 3), activation="relu")) - model.add(layers.MaxPooling2D((2, 2))) - model.add(layers.Conv2D(64, (3, 3), activation="relu")) - model.add(layers.Flatten()) - model.add(layers.Dense(64, activation="relu")) - model.add(layers.Dense(10, activation="softmax")) - - model.summary() - - model.compile( - optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"] - ) - - return model - - -def decay(epoch): - if epoch < 3: # pylint: disable=no-else-return - return 1e-3 - if 3 <= epoch < 7: - return 1e-4 - return 1e-5 - - -def main(args): - - # MultiWorkerMirroredStrategy creates copies of all variables in the model's - # layers on each device across all workers - # if your GPUs don't support NCCL, replace "communication" with another - strategy = tf.distribute.MultiWorkerMirroredStrategy( - communication_options=tf.distribute.experimental.CommunicationOptions( - implementation=tf.distribute.experimental.CollectiveCommunication.AUTO - ) - ) - - BATCH_SIZE_PER_REPLICA = 64 - BATCH_SIZE = BATCH_SIZE_PER_REPLICA * strategy.num_replicas_in_sync - - with strategy.scope(): - ds_train = make_datasets_unbatched().batch(BATCH_SIZE).repeat() - options = tf.data.Options() - options.experimental_distribute.auto_shard_policy = ( - tf.data.experimental.AutoShardPolicy.DATA - ) - ds_train = ds_train.with_options(options) - # Model building/compiling need to be within `strategy.scope()`. - multi_worker_model = build_and_compile_cnn_model() - - # Define the checkpoint directory to store the checkpoints - checkpoint_dir = args.checkpoint_dir - - # Name of the checkpoint files - checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}") - - # Function for decaying the learning rate. - # You can define any decay function you need. - # Callback for printing the LR at the end of each epoch. - class PrintLR(tf.keras.callbacks.Callback): - - def on_epoch_end(self, epoch, logs=None): # pylint: disable=no-self-use - print( - "\nLearning rate for epoch {} is {}".format( - epoch + 1, multi_worker_model.optimizer.lr.numpy() - ) - ) - - callbacks = [ - tf.keras.callbacks.TensorBoard(log_dir="./logs"), - tf.keras.callbacks.ModelCheckpoint( - filepath=checkpoint_prefix, save_weights_only=True - ), - tf.keras.callbacks.LearningRateScheduler(decay), - PrintLR(), - ] - - # Keras' `model.fit()` trains the model with specified number of epochs and - # number of steps per epoch. Note that the numbers here are for demonstration - # purposes only and may not sufficiently produce a model with good quality. - multi_worker_model.fit(ds_train, epochs=10, steps_per_epoch=70, callbacks=callbacks) - - # Saving a model - # Let `is_chief` be a utility function that inspects the cluster spec and - # current task type and returns True if the worker is the chief and False - # otherwise. - def is_chief(): - return TASK_INDEX == 0 - - if is_chief(): - model_path = args.saved_model_dir - - else: - # Save to a path that is unique across workers. - model_path = args.saved_model_dir + "/worker_tmp_" + str(TASK_INDEX) - - multi_worker_model.save(model_path) - - -if __name__ == "__main__": - os.environ["NCCL_DEBUG"] = "INFO" - - tfds.disable_progress_bar() - - # to decide if a worker is chief, get TASK_INDEX in Cluster info - tf_config = json.loads(os.environ.get("TF_CONFIG") or "{}") - TASK_INDEX = tf_config["task"]["index"] - - parser = argparse.ArgumentParser() - parser.add_argument( - "--saved_model_dir", - type=str, - required=True, - help="Tensorflow export directory.", - ) - - parser.add_argument( - "--checkpoint_dir", - type=str, - required=True, - help="Tensorflow checkpoint directory.", - ) - - parsed_args = parser.parse_args() - main(parsed_args) diff --git a/examples/tensorflow/distribution_strategy/multi_worker_tfjob.yaml b/examples/tensorflow/distribution_strategy/multi_worker_tfjob.yaml deleted file mode 100644 index b9500f2d5e..0000000000 --- a/examples/tensorflow/distribution_strategy/multi_worker_tfjob.yaml +++ /dev/null @@ -1,26 +0,0 @@ -apiVersion: kubeflow.org/v1 -kind: TFJob -metadata: - name: multi-worker -spec: - runPolicy: - cleanPodPolicy: None - tfReplicaSpecs: - Worker: - replicas: 2 - restartPolicy: Never - template: - spec: - containers: - - name: tensorflow - image: kubeflow/tf-multi-worker-strategy:latest - volumeMounts: - - mountPath: /train - name: training - resources: - limits: - nvidia.com/gpu: 1 - volumes: - - name: training - persistentVolumeClaim: - claimName: strategy-volume diff --git a/examples/tensorflow/distribution_strategy/pvc.yaml b/examples/tensorflow/distribution_strategy/pvc.yaml deleted file mode 100644 index 0036c1fb45..0000000000 --- a/examples/tensorflow/distribution_strategy/pvc.yaml +++ /dev/null @@ -1,13 +0,0 @@ -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: strategy-volume - labels: - app: strategy-volume -spec: - storageClassName: "Your storageClassName" - accessModes: - - ReadWriteMany - resources: - requests: - storage: 10Gi diff --git a/examples/tensorflow/image-classification/create-tfjob.ipynb b/examples/tensorflow/image-classification/create-tfjob.ipynb deleted file mode 100644 index 182e977ea4..0000000000 --- a/examples/tensorflow/image-classification/create-tfjob.ipynb +++ /dev/null @@ -1,405 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "pycharm": { - "name": "#%% md\n" - } - }, - "source": [ - "# Create TFJob using Kubeflow Training SDK" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "pycharm": { - "name": "#%% md\n" - } - }, - "source": [ - "This is a sample for Kubeflow Training SDK `kubeflow-training`.\n", - "\n", - "The notebook shows how to use Kubeflow TFJob SDK to create, get, wait, check and delete TFJob." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Install Kubeflow Training Python SDKs\n", - "\n", - "You need to install Kubeflow Training SDK to run this Notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# TODO (andreyvelich): Change to release version when SDK with the new APIs is published.\n", - "!pip install git+https://github.com/kubeflow/training-operator.git#subdirectory=sdk/python" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [], - "source": [ - "from kubernetes.client import V1PodTemplateSpec\n", - "from kubernetes.client import V1ObjectMeta\n", - "from kubernetes.client import V1PodSpec\n", - "from kubernetes.client import V1Container\n", - "\n", - "\n", - "from kubeflow.training import KubeflowOrgV1ReplicaSpec\n", - "from kubeflow.training import KubeflowOrgV1TFJob\n", - "from kubeflow.training import KubeflowOrgV1TFJobSpec\n", - "from kubeflow.training import KubeflowOrgV1RunPolicy\n", - "from kubeflow.training import TrainingClient\n", - "\n", - "from kubeflow.training import constants" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "pycharm": { - "name": "#%% md\n" - } - }, - "source": [ - "## Define TFJob" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "pycharm": { - "name": "#%% md\n" - } - }, - "source": [ - "The demo runs Tensorflow MNIST example with 2 workers, chief, and parameter server for TFJob." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [], - "source": [ - "name = \"mnist\"\n", - "namespace = \"kubeflow-user-example-com\"\n", - "container_name = \"tensorflow\"\n", - "\n", - "container = V1Container(\n", - " name=container_name,\n", - " image=\"gcr.io/kubeflow-ci/tf-mnist-with-summaries:1.0\",\n", - " command=[\n", - " \"python\",\n", - " \"/var/tf_mnist/mnist_with_summaries.py\",\n", - " \"--log_dir=/train/logs\", \"--learning_rate=0.01\",\n", - " \"--batch_size=150\"\n", - " ]\n", - ")\n", - "\n", - "worker = KubeflowOrgV1ReplicaSpec(\n", - " replicas=2,\n", - " restart_policy=\"Never\",\n", - " template=V1PodTemplateSpec(\n", - " spec=V1PodSpec(\n", - " containers=[container]\n", - " )\n", - " )\n", - ")\n", - "\n", - "chief = KubeflowOrgV1ReplicaSpec(\n", - " replicas=1,\n", - " restart_policy=\"Never\",\n", - " template=V1PodTemplateSpec(\n", - " spec=V1PodSpec(\n", - " containers=[container]\n", - " )\n", - " )\n", - ")\n", - "\n", - "ps = KubeflowOrgV1ReplicaSpec(\n", - " replicas=1,\n", - " restart_policy=\"Never\",\n", - " template=V1PodTemplateSpec(\n", - " spec=V1PodSpec(\n", - " containers=[container]\n", - " )\n", - " )\n", - ")\n", - "\n", - "tfjob = KubeflowOrgV1TFJob(\n", - " api_version=constants.API_VERSION,\n", - " kind=constants.TFJOB_KIND,\n", - " metadata=V1ObjectMeta(name=\"mnist\",namespace=namespace),\n", - " spec=KubeflowOrgV1TFJobSpec(\n", - " run_policy=KubeflowOrgV1RunPolicy(clean_pod_policy=\"None\"),\n", - " tf_replica_specs={\"Worker\": worker,\n", - " \"Chief\": chief,\n", - " \"PS\": ps}\n", - " )\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Create TFJob\n", - "\n", - "You have to create Training Client to deploy your TFJob in you cluster." - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": { - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "TFJob kubeflow-user-example-com/mnist has been created\n" - ] - } - ], - "source": [ - "# Namespace and Job kind will be reused in every APIs.\n", - "training_client = TrainingClient(namespace=namespace, job_kind=constants.TFJOB_KIND)\n", - "training_client.create_job(tfjob)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Get the Created TFJob\n", - "\n", - "You can verify the created TFJob status." - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": { - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "{'completion_time': None,\n", - " 'conditions': [{'last_transition_time': datetime.datetime(2023, 9, 8, 21, 42, 34, tzinfo=tzutc()),\n", - " 'last_update_time': datetime.datetime(2023, 9, 8, 21, 42, 34, tzinfo=tzutc()),\n", - " 'message': 'TFJob mnist is created.',\n", - " 'reason': 'TFJobCreated',\n", - " 'status': 'True',\n", - " 'type': 'Created'},\n", - " {'last_transition_time': datetime.datetime(2023, 9, 8, 21, 42, 35, tzinfo=tzutc()),\n", - " 'last_update_time': datetime.datetime(2023, 9, 8, 21, 42, 35, tzinfo=tzutc()),\n", - " 'message': 'TFJob kubeflow-user-example-com/mnist is running.',\n", - " 'reason': 'TFJobRunning',\n", - " 'status': 'True',\n", - " 'type': 'Running'}],\n", - " 'last_reconcile_time': None,\n", - " 'replica_statuses': {'Chief': {'active': 1,\n", - " 'failed': None,\n", - " 'label_selector': None,\n", - " 'selector': None,\n", - " 'succeeded': None},\n", - " 'PS': {'active': 1,\n", - " 'failed': None,\n", - " 'label_selector': None,\n", - " 'selector': None,\n", - " 'succeeded': None},\n", - " 'Worker': {'active': 2,\n", - " 'failed': None,\n", - " 'label_selector': None,\n", - " 'selector': None,\n", - " 'succeeded': None}},\n", - " 'start_time': datetime.datetime(2023, 9, 8, 21, 42, 34, tzinfo=tzutc())}" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "training_client.get_job(name).status" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Get the TFJob Conditions" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": { - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "[{'last_transition_time': datetime.datetime(2023, 9, 8, 21, 42, 34, tzinfo=tzutc()),\n", - " 'last_update_time': datetime.datetime(2023, 9, 8, 21, 42, 34, tzinfo=tzutc()),\n", - " 'message': 'TFJob mnist is created.',\n", - " 'reason': 'TFJobCreated',\n", - " 'status': 'True',\n", - " 'type': 'Created'},\n", - " {'last_transition_time': datetime.datetime(2023, 9, 8, 21, 42, 35, tzinfo=tzutc()),\n", - " 'last_update_time': datetime.datetime(2023, 9, 8, 21, 42, 35, tzinfo=tzutc()),\n", - " 'message': 'TFJob kubeflow-user-example-com/mnist is running.',\n", - " 'reason': 'TFJobRunning',\n", - " 'status': 'True',\n", - " 'type': 'Running'}]" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "training_client.get_job_conditions(name)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Wait Until TFJob Finishes" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [], - "source": [ - "training_client.wait_for_job_conditions(name)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Verify if TFJob is Succeeded" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "scrolled": true - }, - "outputs": [], - "source": [ - "training_client.is_job_succeeded(name)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Get the TFJob Training Logs" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [], - "source": [ - "training_client.get_job_logs(name)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "pycharm": { - "name": "#%% md\n" - } - }, - "source": [ - "## Delete the TFJob" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [], - "source": [ - "training_client.delete_job(name)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.12" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/examples/tensorflow/mnist_utils.py b/examples/tensorflow/mnist_utils.py deleted file mode 100644 index 5cd436e376..0000000000 --- a/examples/tensorflow/mnist_utils.py +++ /dev/null @@ -1,140 +0,0 @@ -# Copyright 2024 The Kubeflow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -""" -Utility functions for loading, preprocessing, and building models for MNIST data. - -This module provides functions to load the MNIST dataset, preprocess it into TensorFlow datasets, -and build a simple neural network model using TensorFlow's Keras API. -""" - -import numpy as np -import tensorflow as tf -from tensorflow.keras.datasets import mnist - - -def load_data(fake_data=False, data_path=None, repeat=False): - """ - Loads the MNIST dataset and converts it into TensorFlow datasets. - - Args: - fake_data (bool, optional): If `True`, loads a fake dataset for testing purposes. - If `False`, loads the real MNIST dataset. - data_path (str, optional): Path where to cache the dataset locally. - If `None`, the dataset is loaded to the default location. - repeat (bool, optional): If `True`, makes the dataset repeat indefinitely. - - Returns: - train_ds (tf.data.Dataset): Dataset containing the training data (images and labels). - test_ds (tf.data.Dataset): Dataset containing the test data (images and labels). - """ - if fake_data: - (x_train, y_train), (x_test, y_test) = load_fake_data() - else: - (x_train, y_train), (x_test, y_test) = ( - mnist.load_data(path=data_path) if data_path else mnist.load_data() - ) - # Create TensorFlow datasets from the NumPy arrays - train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train)) - test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)) - if repeat: - return train_ds.repeat(), test_ds.repeat() - return train_ds, test_ds - - -def load_fake_data(): - x_train = np.random.randint(0, 256, (60000, 28, 28)).astype(np.uint8) - y_train = np.random.randint(0, 10, (60000,)).astype(np.uint8) - x_test = np.random.randint(0, 256, (10000, 28, 28)).astype(np.uint8) - y_test = np.random.randint(0, 10, (10000,)).astype(np.uint8) - - return (x_train, y_train), (x_test, y_test) - - -def build_model(dropout=0.9, learning_rate=0.001): - """ - Builds a simple neural network model using Keras Sequential API. - - Args: - dropout (float, optional): Keep probability for training dropout. - learning_rate (float, optional): The learning rate for the Adam optimizer. - - Returns: - model (tf.keras.Model): The compiled Keras model. - """ - model = tf.keras.Sequential( - [ - tf.keras.layers.Input( - shape=(28, 28, 1) - ), # Input layer with the shape of MNIST images - tf.keras.layers.Flatten(), - tf.keras.layers.Dense( - 128, activation="relu" - ), # Dense layer with 128 neurons and ReLU activation - tf.keras.layers.Dropout( - 1 - dropout - ), # Dropout layer to prevent overfitting - tf.keras.layers.Dense( - 10, activation="softmax" - ), # Output layer with 10 neurons (one for each class) - ] - ) - # Define an optimizer with a specific learning rate - optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate) - # Compile the model with Adam optimizer and sparse categorical crossentropy loss - model.compile( - optimizer=optimizer, - loss="sparse_categorical_crossentropy", - metrics=["accuracy"], - ) - return model - - -def preprocess(ds, batch_size): - """ - Preprocesses the dataset by normalizing the images, shuffling, batching, and prefetching. - - Args: - ds (tf.data.Dataset): The dataset to preprocess (either training or testing data). - batch_size (int): The number of samples per batch of data. - - - Returns: - ds (tf.data.Dataset): The preprocessed dataset. - """ - - def normalize_img(image, label): - """ - Normalizes images by scaling pixel values from the range [0, 255] to [0, 1]. - - Args: - image (tf.Tensor): The image tensor. - label (tf.Tensor): The corresponding label tensor. - - Returns: - tuple: The normalized image and the corresponding label. - """ - image = tf.cast(image, tf.float32) / 255.0 - return image, label - - # Map the normalization function across the dataset - ds = ds.map(normalize_img, num_parallel_calls=tf.data.experimental.AUTOTUNE) - ds = ds.shuffle( - buffer_size=10000 - ) # Shuffle the dataset with a buffer size of 10,000 - ds = ds.batch(batch_size) # Batch the dataset - ds = ds.prefetch( - buffer_size=tf.data.experimental.AUTOTUNE - ) # Prefetch to improve performance. - return ds diff --git a/examples/tensorflow/mnist_with_summaries/Dockerfile b/examples/tensorflow/mnist_with_summaries/Dockerfile deleted file mode 100644 index 77a6232a36..0000000000 --- a/examples/tensorflow/mnist_with_summaries/Dockerfile +++ /dev/null @@ -1,19 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -FROM tensorflow/tensorflow:2.17.0 - -ADD examples/tensorflow/mnist_with_summaries/ /var/tf_mnist -ADD examples/tensorflow/mnist_utils.py /var/tf_mnist -ENTRYPOINT ["python", "/var/tf_mnist/mnist_with_summaries.py"] diff --git a/examples/tensorflow/mnist_with_summaries/README.md b/examples/tensorflow/mnist_with_summaries/README.md deleted file mode 100644 index f6b76dd631..0000000000 --- a/examples/tensorflow/mnist_with_summaries/README.md +++ /dev/null @@ -1,16 +0,0 @@ -### Simple mnist example with persistent volume - -This is a simple example using an MNIST model that outputs a TF summary. -The example also mounts a persistent volume for output, making it suitable -for integrating with other components like Katib. - -The source code is borrowed from TensorFlow tutorials [here](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/tutorials/mnist/mnist_with_summaries.py). - -To build this image on x86_64: -```shell -docker build -f Dockerfile -t kubeflow/tf-mnist-with-summaries:1.0 ./ -``` - -Usage: -1. Add the persistent volume and claim: `kubectl apply -f tfevent-volume/.` -1. Deploy the TFJob: `kubectl apply -f tf_job_mnist.yaml` diff --git a/examples/tensorflow/mnist_with_summaries/mnist_with_summaries.py b/examples/tensorflow/mnist_with_summaries/mnist_with_summaries.py deleted file mode 100644 index b2971538cb..0000000000 --- a/examples/tensorflow/mnist_with_summaries/mnist_with_summaries.py +++ /dev/null @@ -1,105 +0,0 @@ -# Copyright 2015 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the 'License'); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an 'AS IS' BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""A simple MNIST classifier which displays summaries in TensorBoard. -This is an unimpressive MNIST model, but it is a good example of using -tf.name_scope to make a graph legible in the TensorBoard graph explorer, and of -naming summary tags so that they are grouped meaningfully in TensorBoard. -It demonstrates the functionality of every TensorBoard dashboard. -""" -import argparse -import os - -import mnist_utils as helper -import tensorflow as tf - -args = None - - -def init_parser(): - global args - parser = argparse.ArgumentParser() - parser.add_argument( - "--fake_data", - nargs="?", - const=True, - type=bool, - default=False, - help="If true, uses fake data for unit testing.", - ) - parser.add_argument( - "--epochs", type=int, default=5, help="Number of epochs for training." - ) - parser.add_argument( - "--learning_rate", type=float, default=0.001, help="Initial learning rate" - ) - parser.add_argument( - "--batch_size", type=int, default=64, help="Training batch size" - ) - parser.add_argument( - "--dropout", - type=float, - default=0.9, - help="Keep probability for training dropout.", - ) - parser.add_argument( - "--data_path", - type=str, - default="mnist.npz", - help="Path where to cache the dataset locally (relative to ~/.keras/datasets).", - ) - parser.add_argument( - "--log_dir", - type=str, - default=os.path.join( - os.getenv("TEST_TMPDIR", "/tmp"), - "tensorflow/mnist/logs/mnist_with_summaries", - ), - help="Summaries log directory", - ) - args = parser.parse_args() - print(f"Run script with {args=}") - - -def main(): - """ - The main function to load data, preprocess it, build the model, and train it. - """ - # Load and preprocess data - train_ds, test_ds = helper.load_data( - data_path=args.data_path, fake_data=args.fake_data - ) - train_ds = helper.preprocess(ds=train_ds, batch_size=args.batch_size) - test_ds = helper.preprocess(ds=test_ds, batch_size=args.batch_size) - - # Build model - model = helper.build_model(dropout=args.dropout, learning_rate=args.learning_rate) - - # Setup TensorBoard - tensorboard_callback = tf.keras.callbacks.TensorBoard( - log_dir=args.log_dir, histogram_freq=1 - ) - - # Train the model - model.fit( - train_ds, - epochs=args.epochs, - validation_data=test_ds, - callbacks=[tensorboard_callback], - ) - - -if __name__ == "__main__": - init_parser() - main() diff --git a/examples/tensorflow/mnist_with_summaries/tf_job_mnist.yaml b/examples/tensorflow/mnist_with_summaries/tf_job_mnist.yaml deleted file mode 100644 index 3c1884a05c..0000000000 --- a/examples/tensorflow/mnist_with_summaries/tf_job_mnist.yaml +++ /dev/null @@ -1,30 +0,0 @@ -apiVersion: "kubeflow.org/v1" -kind: "TFJob" -metadata: - name: "mnist" - namespace: kubeflow -spec: - runPolicy: - cleanPodPolicy: None - tfReplicaSpecs: - Worker: - replicas: 1 - restartPolicy: Never - template: - spec: - containers: - - name: tensorflow - image: kubeflow/tf-mnist-with-summaries:latest - command: - - "python" - - "/var/tf_mnist/mnist_with_summaries.py" - - "--log_dir=/train/logs" - - "--learning_rate=0.01" - - "--batch_size=150" - volumeMounts: - - mountPath: "/train" - name: "training" - volumes: - - name: "training" - persistentVolumeClaim: - claimName: "tfevent-volume" diff --git a/examples/tensorflow/mnist_with_summaries/tfevent-volume/tfevent-pv.yaml b/examples/tensorflow/mnist_with_summaries/tfevent-volume/tfevent-pv.yaml deleted file mode 100644 index cf41c6f982..0000000000 --- a/examples/tensorflow/mnist_with_summaries/tfevent-volume/tfevent-pv.yaml +++ /dev/null @@ -1,15 +0,0 @@ -apiVersion: v1 -kind: PersistentVolume -metadata: - name: tfevent-volume - labels: - type: local - app: tfjob -spec: - capacity: - storage: 10Gi - storageClassName: standard - accessModes: - - ReadWriteMany - hostPath: - path: /tmp/data diff --git a/examples/tensorflow/mnist_with_summaries/tfevent-volume/tfevent-pvc.yaml b/examples/tensorflow/mnist_with_summaries/tfevent-volume/tfevent-pvc.yaml deleted file mode 100644 index 6bab17d803..0000000000 --- a/examples/tensorflow/mnist_with_summaries/tfevent-volume/tfevent-pvc.yaml +++ /dev/null @@ -1,14 +0,0 @@ -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: tfevent-volume - namespace: kubeflow - labels: - type: local - app: tfjob -spec: - accessModes: - - ReadWriteMany - resources: - requests: - storage: 10Gi diff --git a/examples/tensorflow/simple.yaml b/examples/tensorflow/simple.yaml deleted file mode 100644 index 4c2a0a76e6..0000000000 --- a/examples/tensorflow/simple.yaml +++ /dev/null @@ -1,18 +0,0 @@ -apiVersion: "kubeflow.org/v1" -kind: TFJob -metadata: - name: tfjob-simple - namespace: kubeflow -spec: - tfReplicaSpecs: - Worker: - replicas: 2 - restartPolicy: OnFailure - template: - spec: - containers: - - name: tensorflow - image: kubeflow/tf-mnist-with-summaries:latest - command: - - "python" - - "/var/tf_mnist/mnist_with_summaries.py" diff --git a/examples/tensorflow/tf_sample/Dockerfile b/examples/tensorflow/tf_sample/Dockerfile deleted file mode 100644 index 5ce15848f5..0000000000 --- a/examples/tensorflow/tf_sample/Dockerfile +++ /dev/null @@ -1,5 +0,0 @@ -FROM tensorflow/tensorflow:2.17.0 -RUN pip install retrying -RUN mkdir -p /opt/kubeflow -COPY examples/tensorflow/tf_sample/tf_smoke.py /opt/kubeflow/ -ENTRYPOINT ["python", "/opt/kubeflow/tf_smoke.py"] diff --git a/examples/tensorflow/tf_sample/Makefile b/examples/tensorflow/tf_sample/Makefile deleted file mode 100644 index 5055001182..0000000000 --- a/examples/tensorflow/tf_sample/Makefile +++ /dev/null @@ -1,38 +0,0 @@ -IMG = gcr.io/kubeflow-examples/tf_smoke - -# List any changed files. We only include files in the notebooks directory. -# because that is the code in the docker image. -# In particular we exclude changes to the ksonnet configs. -CHANGED_FILES := $(shell git diff-files --relative=tensorflow/tf_sample) - -ifeq ($(strip $(CHANGED_FILES)),) -# Changed files is empty; not dirty -# Don't include --dirty because it could be dirty if files outside the ones we care -# about changed. -GIT_VERSION := $(shell git describe --always) -else -GIT_VERSION := $(shell git describe --always)-dirty-$(shell git diff | shasum -a256 | cut -c -6) -endif - -TAG := $(shell date +v%Y%m%d)-$(GIT_VERSION) -all: build - -# To build without the cache set the environment variable -# export DOCKER_BUILD_OPTS=--no-cache -build: - docker build ${DOCKER_BUILD_OPTS} -t $(IMG):$(TAG) . \ - --label=git-verions=$(GIT_VERSION) - docker tag $(IMG):$(TAG) $(IMG):latest - @echo Built $(IMG):latest - @echo Built $(IMG):$(TAG) - - -# Build but don't attach the latest tag. This allows manual testing/inspection of the image -# first. -push: build - gcloud docker -- push $(IMG):$(TAG) - @echo Pushed $(IMG) with :$(TAG) tags - -push-latest: push - gcloud container images add-tag --quiet $(IMG):$(TAG) $(IMG):latest --verbosity=info - echo created $(IMG):latest diff --git a/examples/tensorflow/tf_sample/setup.py b/examples/tensorflow/tf_sample/setup.py deleted file mode 100644 index 355838a2aa..0000000000 --- a/examples/tensorflow/tf_sample/setup.py +++ /dev/null @@ -1,26 +0,0 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""A setup.py file for the tf_sample package.""" -from setuptools import find_packages, setup - -REQUIRED_PACKAGES = [] - -setup( - name="tf_sample", - version="0.1.1", - author="Jeremy Lewi", - author_email="jlewi@google.com", - install_requires=REQUIRED_PACKAGES, - packages=find_packages(), - description="Sample TF program", - requires=[], -) diff --git a/examples/tensorflow/tf_sample/tf_smoke.py b/examples/tensorflow/tf_sample/tf_smoke.py deleted file mode 100644 index 2fc5c3899d..0000000000 --- a/examples/tensorflow/tf_sample/tf_smoke.py +++ /dev/null @@ -1,88 +0,0 @@ -""" -Run a distributed TensorFlow program using -MultiWorkerMirroredStrategy to verify we can execute ops. - -The program does a simple matrix multiplication. - -With MultiWorkerMirroredStrategy, the operations are distributed across multiple workers, -and each worker performs the matrix multiplication. The strategy handles the distribution -of operations and aggregation of results. - -This way we can verify that distributed training is working by executing ops on all devices. -""" - -import argparse -import time - -import numpy as np -import retrying -import tensorflow as tf - -# Set up the MultiWorkerMirroredStrategy to distribute computation across multiple workers. -strategy = tf.distribute.MultiWorkerMirroredStrategy() - - -def parse_args(): - """Parse the command line arguments.""" - parser = argparse.ArgumentParser() - - parser.add_argument( - "--sleep_secs", default=0, type=int, help="Amount of time to sleep at the end" - ) - - # TODO(jlewi): We ignore unknown arguments because the backend is currently - # setting some flags to empty values like metadata path. - args, _ = parser.parse_known_args() - return args - - -# Add retries to deal with things like gRPC errors that result in -# UnavailableError. -@retrying.retry( - wait_exponential_multiplier=1000, - wait_exponential_max=10000, - stop_max_delay=60 * 3 * 1000, -) -def matrix_multiplication_fn(): - """ - Perform matrix multiplication on two example matrices using TensorFlow. - - Returns: - tf.Tensor: The result of the matrix multiplication. - """ - width = 10 - height = 10 - a = np.arange(width * height).reshape(height, width).astype(np.float32) - b = np.arange(width * height).reshape(height, width).astype(np.float32) - - # Perform matrix multiplication - c = tf.matmul(a, b) - tf.print(f"Result for this device: {c}") - - return c - - -def run(): - """ - Run the distributed matrix multiplication operation across multiple devices. - """ - with strategy.scope(): - tf.print(f"Number of devices: {strategy.num_replicas_in_sync}") - - result = strategy.run(matrix_multiplication_fn) - - # Reduce results across devices to get a single result - reduced_result = strategy.reduce(tf.distribute.ReduceOp.SUM, result, axis=None) - tf.print( - "Summed result of matrix multiplication across all devices:", reduced_result - ) - - -if __name__ == "__main__": - args = parse_args() - - # Execute the distributed matrix multiplication. - run() - if args.sleep_secs: - print(f"Sleeping for {args.sleep_secs} seconds") - time.sleep(args.sleep_secs) diff --git a/examples/xgboost/lightgbm-dist/Dockerfile b/examples/xgboost/lightgbm-dist/Dockerfile deleted file mode 100644 index b05c4020fb..0000000000 --- a/examples/xgboost/lightgbm-dist/Dockerfile +++ /dev/null @@ -1,47 +0,0 @@ -# inspired from https://github.com/microsoft/LightGBM/blob/v4.1.0/docker/dockerfile-cli -FROM ubuntu:20.04 as builder - -ENV \ - DEBIAN_FRONTEND=noninteractive \ - LANG=C.UTF-8 \ - LC_ALL=C.UTF-8 - -RUN apt-get update -y && \ - apt-get install -y --no-install-recommends \ - ca-certificates \ - cmake \ - build-essential \ - gcc \ - g++ \ - git \ - libomp-dev && \ - rm -rf /var/lib/apt/lists/* - -RUN git clone \ - --recursive \ - --branch v4.1.0 \ - --depth 1 \ - https://github.com/Microsoft/LightGBM && \ - mkdir LightGBM/build && \ - cd LightGBM/build && \ - cmake .. && \ - make -j4 && \ - make install && \ - cd "${HOME}" && \ - rm -rf LightGBM - -FROM python:3.7 - -COPY requirements.txt . -RUN pip install -r requirements.txt -COPY --from=builder /usr/local/bin/lightgbm /usr/local/bin/lightgbm - -WORKDIR /app - -# Download the example data -RUN mkdir data -ADD https://raw.githubusercontent.com/microsoft/LightGBM/stable/examples/parallel_learning/binary.train data/. -ADD https://raw.githubusercontent.com/microsoft/LightGBM/stable/examples/parallel_learning/binary.test data/. -COPY *.py ./ - -ENTRYPOINT [ "python", "/app/main.py" ] diff --git a/examples/xgboost/lightgbm-dist/README.md b/examples/xgboost/lightgbm-dist/README.md deleted file mode 100644 index 616425f3cf..0000000000 --- a/examples/xgboost/lightgbm-dist/README.md +++ /dev/null @@ -1,203 +0,0 @@ -### Distributed Lightgbm Job train - -This folder containers Dockerfile and Python scripts to run a distributed Lightgbm training using the XGBoost operator. -The code is based in this [example](https://github.com/microsoft/LightGBM/tree/master/examples/parallel_learning) in the official github repository of the library. - - -**Start the training** - -``` -kubectl create -f xgboostjob_v1_lightgbm_dist_training.yaml -``` - -**Look at the job status** -``` - kubectl get -o yaml XGBoostJob/lightgbm-dist-train-test - ``` -Here is sample output when the job is running. The output result like this - -``` -apiVersion: kubeflow.org/v1 -kind: XGBoostJob -metadata: - annotations: - kubectl.kubernetes.io/last-applied-configuration: | - {"apiVersion":"xgboostjob.kubeflow.org/v1","kind":"XGBoostJob","metadata":{"annotations":{},"name":"lightgbm-dist-train-test","namespace":"default"},"spec":{"xgbReplicaSpecs":{"Master":{"replicas":1,"restartPolicy":"Never","template":{"apiVersion":"v1","kind":"Pod","spec":{"containers":[{"args":["--job_type=Train","--boosting_type=gbdt","--objective=binary","--metric=binary_logloss,auc","--metric_freq=1","--is_training_metric=true","--max_bin=255","--data=data/binary.train","--valid_data=data/binary.test","--num_trees=100","--learning_rate=01","--num_leaves=63","--tree_learner=feature","--feature_fraction=0.8","--bagging_freq=5","--bagging_fraction=0.8","--min_data_in_leaf=50","--min_sum_hessian_in_leaf=50","--is_enable_sparse=true","--use_two_round_loading=false","--is_save_binary_file=false"],"image":"kubeflow/lightgbm-dist-py-test:1.0","imagePullPolicy":"Never","name":"xgboostjob","ports":[{"containerPort":9991,"name":"xgboostjob-port"}]}]}}},"Worker":{"replicas":2,"restartPolicy":"ExitCode","template":{"apiVersion":"v1","kind":"Pod","spec":{"containers":[{"args":["--job_type=Train","--boosting_type=gbdt","--objective=binary","--metric=binary_logloss,auc","--metric_freq=1","--is_training_metric=true","--max_bin=255","--data=data/binary.train","--valid_data=data/binary.test","--num_trees=100","--learning_rate=01","--num_leaves=63","--tree_learner=feature","--feature_fraction=0.8","--bagging_freq=5","--bagging_fraction=0.8","--min_data_in_leaf=50","--min_sum_hessian_in_leaf=50","--is_enable_sparse=true","--use_two_round_loading=false","--is_save_binary_file=false"],"image":"kubeflow/lightgbm-dist-py-test:1.0","imagePullPolicy":"Never","name":"xgboostjob","ports":[{"containerPort":9991,"name":"xgboostjob-port"}]}]}}}}}} - creationTimestamp: "2020-10-14T15:31:23Z" - generation: 7 - managedFields: - - apiVersion: xgboostjob.kubeflow.org/v1 - fieldsType: FieldsV1 - fieldsV1: - f:metadata: - f:annotations: - .: {} - f:kubectl.kubernetes.io/last-applied-configuration: {} - f:spec: - .: {} - f:xgbReplicaSpecs: - .: {} - f:Master: - .: {} - f:replicas: {} - f:restartPolicy: {} - f:template: - .: {} - f:spec: {} - f:Worker: - .: {} - f:replicas: {} - f:restartPolicy: {} - f:template: - .: {} - f:spec: {} - manager: kubectl-client-side-apply - operation: Update - time: "2020-10-14T15:31:23Z" - - apiVersion: xgboostjob.kubeflow.org/v1 - fieldsType: FieldsV1 - fieldsV1: - f:spec: - f:RunPolicy: - .: {} - f:cleanPodPolicy: {} - f:xgbReplicaSpecs: - f:Master: - f:template: - f:metadata: - .: {} - f:creationTimestamp: {} - f:spec: - f:containers: {} - f:Worker: - f:template: - f:metadata: - .: {} - f:creationTimestamp: {} - f:spec: - f:containers: {} - f:status: - .: {} - f:completionTime: {} - f:conditions: {} - f:replicaStatuses: - .: {} - f:Master: - .: {} - f:succeeded: {} - f:Worker: - .: {} - f:succeeded: {} - manager: main - operation: Update - time: "2020-10-14T15:34:44Z" - name: lightgbm-dist-train-test - namespace: default - resourceVersion: "38923" - selfLink: /apis/xgboostjob.kubeflow.org/v1/namespaces/default/xgboostjobs/lightgbm-dist-train-test - uid: b2b887d0-445b-498b-8852-26c8edc98dc7 -spec: - RunPolicy: - cleanPodPolicy: None - xgbReplicaSpecs: - Master: - replicas: 1 - restartPolicy: Never - template: - metadata: - creationTimestamp: null - spec: - containers: - - args: - - --job_type=Train - - --boosting_type=gbdt - - --objective=binary - - --metric=binary_logloss,auc - - --metric_freq=1 - - --is_training_metric=true - - --max_bin=255 - - --data=data/binary.train - - --valid_data=data/binary.test - - --num_trees=100 - - --learning_rate=01 - - --num_leaves=63 - - --tree_learner=feature - - --feature_fraction=0.8 - - --bagging_freq=5 - - --bagging_fraction=0.8 - - --min_data_in_leaf=50 - - --min_sum_hessian_in_leaf=50 - - --is_enable_sparse=true - - --use_two_round_loading=false - - --is_save_binary_file=false - image: kubeflow/lightgbm-dist-py-test:1.0 - imagePullPolicy: Never - name: xgboostjob - ports: - - containerPort: 9991 - name: xgboostjob-port - resources: {} - Worker: - replicas: 2 - restartPolicy: ExitCode - template: - metadata: - creationTimestamp: null - spec: - containers: - - args: - - --job_type=Train - - --boosting_type=gbdt - - --objective=binary - - --metric=binary_logloss,auc - - --metric_freq=1 - - --is_training_metric=true - - --max_bin=255 - - --data=data/binary.train - - --valid_data=data/binary.test - - --num_trees=100 - - --learning_rate=01 - - --num_leaves=63 - - --tree_learner=feature - - --feature_fraction=0.8 - - --bagging_freq=5 - - --bagging_fraction=0.8 - - --min_data_in_leaf=50 - - --min_sum_hessian_in_leaf=50 - - --is_enable_sparse=true - - --use_two_round_loading=false - - --is_save_binary_file=false - image: kubeflow/lightgbm-dist-py-test:1.0 - imagePullPolicy: Never - name: xgboostjob - ports: - - containerPort: 9991 - name: xgboostjob-port - resources: {} -status: - completionTime: "2020-10-14T15:34:44Z" - conditions: - - lastTransitionTime: "2020-10-14T15:31:23Z" - lastUpdateTime: "2020-10-14T15:31:23Z" - message: xgboostJob lightgbm-dist-train-test is created. - reason: XGBoostJobCreated - status: "True" - type: Created - - lastTransitionTime: "2020-10-14T15:31:23Z" - lastUpdateTime: "2020-10-14T15:31:23Z" - message: XGBoostJob lightgbm-dist-train-test is running. - reason: XGBoostJobRunning - status: "False" - type: Running - - lastTransitionTime: "2020-10-14T15:34:44Z" - lastUpdateTime: "2020-10-14T15:34:44Z" - message: XGBoostJob lightgbm-dist-train-test is successfully completed. - reason: XGBoostJobSucceeded - status: "True" - type: Succeeded - replicaStatuses: - Master: - succeeded: 1 - Worker: - succeeded: 2 -``` diff --git a/examples/xgboost/lightgbm-dist/main.py b/examples/xgboost/lightgbm-dist/main.py deleted file mode 100644 index 25a96055f5..0000000000 --- a/examples/xgboost/lightgbm-dist/main.py +++ /dev/null @@ -1,78 +0,0 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import argparse -import logging -import os - -from train import train -from utils import generate_machine_list_file, generate_train_conf_file - -logger = logging.getLogger(__name__) - - -def main(args, extra_args): - - master_addr = os.environ["MASTER_ADDR"] - master_port = os.environ["MASTER_PORT"] - worker_addrs = os.environ["WORKER_ADDRS"] - worker_port = os.environ["WORKER_PORT"] - world_size = int(os.environ["WORLD_SIZE"]) - rank = int(os.environ["RANK"]) - - logger.info( - "extract cluster info from env variables \n" - f"master_addr: {master_addr} \n" - f"master_port: {master_port} \n" - f"worker_addrs: {worker_addrs} \n" - f"worker_port: {worker_port} \n" - f"world_size: {world_size} \n" - f"rank: {rank} \n" - ) - - if args.job_type == "Predict": - logging.info("starting the predict job") - - elif args.job_type == "Train": - logging.info("starting the train job") - logging.info(f"extra args:\n {extra_args}") - machine_list_filepath = generate_machine_list_file( - master_addr, master_port, worker_addrs, worker_port - ) - logging.info(f"machine list generated in: {machine_list_filepath}") - local_port = worker_port if rank else master_port - config_file = generate_train_conf_file( - machine_list_file=machine_list_filepath, - world_size=world_size, - output_model="model.txt", - local_port=local_port, - extra_args=extra_args, - ) - logging.info(f"config generated in: {config_file}") - train(config_file) - logging.info("Finish distributed job") - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - - parser.add_argument( - "--job_type", - help="Job type to execute", - choices=["Train", "Predict"], - required=True, - ) - - logging.basicConfig(format="%(message)s") - logging.getLogger().setLevel(logging.INFO) - args, extra_args = parser.parse_known_args() - main(args, extra_args) diff --git a/examples/xgboost/lightgbm-dist/requirements.txt b/examples/xgboost/lightgbm-dist/requirements.txt deleted file mode 100644 index 41e859c19e..0000000000 --- a/examples/xgboost/lightgbm-dist/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -numpy==1.20.3 -scipy==1.6.2 -scikit-learn==1.0.1 -pandas==1.3.0 diff --git a/examples/xgboost/lightgbm-dist/train.py b/examples/xgboost/lightgbm-dist/train.py deleted file mode 100644 index 578b6fc45e..0000000000 --- a/examples/xgboost/lightgbm-dist/train.py +++ /dev/null @@ -1,26 +0,0 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import logging -import subprocess - -logger = logging.getLogger(__name__) - - -def train(train_config_filepath: str): - cmd = ["lightgbm", f"config={train_config_filepath}"] - proc = subprocess.Popen(cmd, stdout=subprocess.PIPE) - line = proc.stdout.readline() - while line: - logger.info((line.decode("utf-8").strip())) - line = proc.stdout.readline() diff --git a/examples/xgboost/lightgbm-dist/utils.py b/examples/xgboost/lightgbm-dist/utils.py deleted file mode 100644 index 4115e721bc..0000000000 --- a/examples/xgboost/lightgbm-dist/utils.py +++ /dev/null @@ -1,91 +0,0 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import logging -import re -import socket -import tempfile -from time import sleep -from typing import List, Union - -logger = logging.getLogger(__name__) - - -def generate_machine_list_file( - master_addr: str, master_port: str, worker_addrs: str, worker_port: str -) -> str: - logger.info("starting to extract system env") - - filename = tempfile.NamedTemporaryFile(delete=False).name - - def _get_ips( - master_addr_name, - worker_addr_names, - max_retries=10, - sleep_secs=10, - current_retry=0, - ): - try: - worker_addr_ips = [] - master_addr_ip = socket.gethostbyname(master_addr_name) - - for addr in worker_addr_names.split(","): - worker_addr_ips.append(socket.gethostbyname(addr)) - - except socket.gaierror as ex: - if "Name or service not known" in str(ex) and current_retry < max_retries: - sleep(sleep_secs) - master_addr_ip, worker_addr_ips = _get_ips( - master_addr_name, - worker_addr_names, - max_retries=max_retries, - sleep_secs=sleep_secs, - current_retry=current_retry + 1, - ) - else: - raise ValueError("Couldn't get address names") - - return master_addr_ip, worker_addr_ips - - master_ip, worker_ips = _get_ips(master_addr, worker_addrs) - - with open(filename, "w") as file: - print(f"{master_ip} {master_port}", file=file) - for addr in worker_ips: - print(f"{addr} {worker_port}", file=file) - - return filename - - -def generate_train_conf_file( - machine_list_file: str, - world_size: int, - output_model: str, - local_port: Union[int, str], - extra_args: List[str], -) -> str: - - filename = tempfile.NamedTemporaryFile(delete=False).name - - with open(filename, "w") as file: - print("task = train", file=file) - print(f"output_model = {output_model}", file=file) - print(f"num_machines = {world_size}", file=file) - print(f"local_listen_port = {local_port}", file=file) - print(f"machine_list_file = {machine_list_file}", file=file) - for arg in extra_args: - m = re.match(r"--(.+)=([^\s]+)", arg) - if m is not None: - k, v = m.groups() - print(f"{k} = {v}", file=file) - - return filename diff --git a/examples/xgboost/lightgbm-dist/xgboostjob_v1_lightgbm_dist_training.yaml b/examples/xgboost/lightgbm-dist/xgboostjob_v1_lightgbm_dist_training.yaml deleted file mode 100644 index 35487b1511..0000000000 --- a/examples/xgboost/lightgbm-dist/xgboostjob_v1_lightgbm_dist_training.yaml +++ /dev/null @@ -1,74 +0,0 @@ -apiVersion: kubeflow.org/v1 -kind: XGBoostJob -metadata: - name: lightgbm-dist-train-test -spec: - xgbReplicaSpecs: - Master: - replicas: 1 - restartPolicy: Never - template: - spec: - containers: - - name: xgboost - image: kubeflow/lightgbm-dist-py-test:1.0 - ports: - - containerPort: 9991 - name: xgboostjob-port - imagePullPolicy: Never - args: - - --job_type=Train - - --boosting_type=gbdt - - --objective=binary - - --metric=binary_logloss,auc - - --metric_freq=1 - - --is_training_metric=true - - --max_bin=255 - - --data=data/binary.train - - --valid_data=data/binary.test - - --num_trees=100 - - --learning_rate=01 - - --num_leaves=63 - - --tree_learner=feature - - --feature_fraction=0.8 - - --bagging_freq=5 - - --bagging_fraction=0.8 - - --min_data_in_leaf=50 - - --min_sum_hessian_in_leaf=50 - - --is_enable_sparse=true - - --use_two_round_loading=false - - --is_save_binary_file=false - Worker: - replicas: 2 - restartPolicy: ExitCode - template: - spec: - containers: - - name: xgboost - image: kubeflow/lightgbm-dist-py-test:1.0 - ports: - - containerPort: 9991 - name: xgboostjob-port - imagePullPolicy: Never - args: - - --job_type=Train - - --boosting_type=gbdt - - --objective=binary - - --metric=binary_logloss,auc - - --metric_freq=1 - - --is_training_metric=true - - --max_bin=255 - - --data=data/binary.train - - --valid_data=data/binary.test - - --num_trees=100 - - --learning_rate=01 - - --num_leaves=63 - - --tree_learner=feature - - --feature_fraction=0.8 - - --bagging_freq=5 - - --bagging_fraction=0.8 - - --min_data_in_leaf=50 - - --min_sum_hessian_in_leaf=50 - - --is_enable_sparse=true - - --use_two_round_loading=false - - --is_save_binary_file=false diff --git a/examples/xgboost/smoke-dist/Dockerfile b/examples/xgboost/smoke-dist/Dockerfile deleted file mode 100644 index ab373d7f1b..0000000000 --- a/examples/xgboost/smoke-dist/Dockerfile +++ /dev/null @@ -1,14 +0,0 @@ -# Install python 3.6 -FROM python:3.6 - -RUN mkdir -p /opt/mlkube - -# Download the rabit tracker and xgboost code. -COPY tracker.py /opt/mlkube/ -COPY xgboost_smoke_test.py /opt/mlkube/ -COPY requirements.txt /opt/mlkube/ - -# Install requirements -RUN pip install -r /opt/mlkube/requirements.txt - -ENTRYPOINT ["python", "/opt/mlkube/xgboost_smoke_test.py"] diff --git a/examples/xgboost/smoke-dist/README.md b/examples/xgboost/smoke-dist/README.md deleted file mode 100644 index c0ce8e6cc2..0000000000 --- a/examples/xgboost/smoke-dist/README.md +++ /dev/null @@ -1,87 +0,0 @@ -### Distributed send/recv e2e test for xgboost rabit - -This folder containers Dockerfile and distributed send/recv test. - - -**Start and test XGBoost Rabit tracker** - -``` -kubectl create -f xgboostjob_v1alpha1_rabit_test.yaml -``` - -**Look at the job status** -``` - kubectl get -o yaml XGBoostJob/xgboost-dist-test - ``` -Here is sample output when the job is running. The output result like this -``` -apiVersion: kubeflow.org/v1 -kind: XGBoostJob -metadata: - creationTimestamp: "2019-06-21T03:32:57Z" - generation: 7 - name: xgboost-dist-test - namespace: default - resourceVersion: "258466" - uid: 431dc182-93d5-11e9-bbab-080027dfbfe2 -spec: - RunPolicy: - cleanPodPolicy: None - xgbReplicaSpecs: - Master: - replicas: 1 - restartPolicy: Never - template: - metadata: - creationTimestamp: null - spec: - containers: - - image: docker.io/kubeflow/xgboost-dist-rabit-test:latest - imagePullPolicy: Always - name: xgboostjob - ports: - - containerPort: 9991 - name: xgboostjob-port - resources: {} - Worker: - replicas: 2 - restartPolicy: Never - template: - metadata: - creationTimestamp: null - spec: - containers: - - image: docker.io/kubeflow/xgboost-dist-rabit-test:latest - imagePullPolicy: Always - name: xgboostjob - ports: - - containerPort: 9991 - name: xgboostjob-port - resources: {} -status: - completionTime: "2019-06-21T03:33:03Z" - conditions: - - lastTransitionTime: "2019-06-21T03:32:57Z" - lastUpdateTime: "2019-06-21T03:32:57Z" - message: xgboostJob xgboost-dist-test is created. - reason: XGBoostJobCreated - status: "True" - type: Created - - lastTransitionTime: "2019-06-21T03:32:57Z" - lastUpdateTime: "2019-06-21T03:32:57Z" - message: XGBoostJob xgboost-dist-test is running. - reason: XGBoostJobRunning - status: "False" - type: Running - - lastTransitionTime: "2019-06-21T03:33:03Z" - lastUpdateTime: "2019-06-21T03:33:03Z" - message: XGBoostJob xgboost-dist-test is successfully completed. - reason: XGBoostJobSucceeded - status: "True" - type: Succeeded - replicaStatuses: - Master: - succeeded: 1 - Worker: - succeeded: 2 -``` diff --git a/examples/xgboost/smoke-dist/requirements.txt b/examples/xgboost/smoke-dist/requirements.txt deleted file mode 100644 index 524b87ab12..0000000000 --- a/examples/xgboost/smoke-dist/requirements.txt +++ /dev/null @@ -1,6 +0,0 @@ -numpy>=1.16.3 -Cython>=0.29.4 -requests>=2.21.0 -urllib3>=1.21.1 -scipy>=1.4.1 -xgboost==1.5.2 diff --git a/examples/xgboost/smoke-dist/tracker.py b/examples/xgboost/smoke-dist/tracker.py deleted file mode 100644 index 9c9367a3b7..0000000000 --- a/examples/xgboost/smoke-dist/tracker.py +++ /dev/null @@ -1,527 +0,0 @@ -""" -Tracker script for DMLC -Implements the tracker control protocol - - start dmlc jobs - - start ps scheduler and rabit tracker - - help nodes to establish links with each other -Tianqi Chen --------------------------- -This was taken from -https://github.com/dmlc/dmlc-core/blob/master/tracker/dmlc_tracker/tracker.py -See LICENSE here -https://github.com/dmlc/dmlc-core/blob/master/LICENSE -No code modified or added except for this explanatory comment. -""" - -# pylint: disable=invalid-name, missing-docstring, too-many-arguments -# pylint: disable=too-many-locals -# pylint: disable=too-many-branches, too-many-statements -from __future__ import absolute_import - -import argparse -import logging -import os -import socket -import struct -import subprocess -import sys -import time -from threading import Thread - - -class ExSocket(object): - """ - Extension of socket to handle recv and send of special data - """ - - def __init__(self, sock): - self.sock = sock - - def recvall(self, nbytes): - res = [] - nread = 0 - while nread < nbytes: - chunk = self.sock.recv(min(nbytes - nread, 1024)) - nread += len(chunk) - res.append(chunk) - return b"".join(res) - - def recvint(self): - return struct.unpack("@i", self.recvall(4))[0] - - def sendint(self, n): - self.sock.sendall(struct.pack("@i", n)) - - def sendstr(self, s): - self.sendint(len(s)) - self.sock.sendall(s.encode()) - - def recvstr(self): - slen = self.recvint() - return self.recvall(slen).decode() - - -# magic number used to verify existence of data -kMagic = 0xFF99 - - -def get_some_ip(host): - return socket.getaddrinfo(host, None)[0][4][0] - - -def get_family(addr): - return socket.getaddrinfo(addr, None)[0][0] - - -class SlaveEntry(object): - def __init__(self, sock, s_addr): - slave = ExSocket(sock) - self.sock = slave - self.host = get_some_ip(s_addr[0]) - magic = slave.recvint() - assert magic == kMagic, "invalid magic number=%d from %s" % (magic, self.host) - slave.sendint(kMagic) - self.rank = slave.recvint() - self.world_size = slave.recvint() - self.jobid = slave.recvstr() - self.cmd = slave.recvstr() - self.wait_accept = 0 - self.port = None - - def decide_rank(self, job_map): - if self.rank >= 0: - return self.rank - if self.jobid != "NULL" and self.jobid in job_map: - return job_map[self.jobid] - return -1 - - def assign_rank(self, rank, wait_conn, tree_map, parent_map, ring_map): - self.rank = rank - nnset = set(tree_map[rank]) - rprev, rnext = ring_map[rank] - self.sock.sendint(rank) - # send parent rank - self.sock.sendint(parent_map[rank]) - # send world size - self.sock.sendint(len(tree_map)) - self.sock.sendint(len(nnset)) - # send the rprev and next link - for r in nnset: - self.sock.sendint(r) - # send prev link - if rprev != -1 and rprev != rank: - nnset.add(rprev) - self.sock.sendint(rprev) - else: - self.sock.sendint(-1) - # send next link - if rnext != -1 and rnext != rank: - nnset.add(rnext) - self.sock.sendint(rnext) - else: - self.sock.sendint(-1) - while True: - ngood = self.sock.recvint() - goodset = set([]) - for _ in range(ngood): - goodset.add(self.sock.recvint()) - assert goodset.issubset(nnset) - badset = nnset - goodset - conset = [] - for r in badset: - if r in wait_conn: - conset.append(r) - self.sock.sendint(len(conset)) - self.sock.sendint(len(badset) - len(conset)) - for r in conset: - self.sock.sendstr(wait_conn[r].host) - self.sock.sendint(wait_conn[r].port) - self.sock.sendint(r) - nerr = self.sock.recvint() - if nerr != 0: - continue - self.port = self.sock.recvint() - rmset = [] - # all connection was successuly setup - for r in conset: - wait_conn[r].wait_accept -= 1 - if wait_conn[r].wait_accept == 0: - rmset.append(r) - for r in rmset: - wait_conn.pop(r, None) - self.wait_accept = len(badset) - len(conset) - return rmset - - -class RabitTracker(object): - """ - tracker for rabit - """ - - def __init__(self, hostIP, nslave, port=9091, port_end=9999): - sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - for port in range(port, port_end): - try: - sock.bind((hostIP, port)) - self.port = port - break - except socket.error as e: - if e.errno in [98, 48]: - continue - else: - raise - sock.listen(256) - self.sock = sock - self.hostIP = hostIP - self.thread = None - self.start_time = None - self.end_time = None - self.nslave = nslave - logging.info("start listen on %s:%d", hostIP, self.port) - - def __del__(self): - self.sock.close() - - @staticmethod - def get_neighbor(rank, nslave): - rank = rank + 1 - ret = [] - if rank > 1: - ret.append(rank // 2 - 1) - if rank * 2 - 1 < nslave: - ret.append(rank * 2 - 1) - if rank * 2 < nslave: - ret.append(rank * 2) - return ret - - def slave_envs(self): - """ - get enviroment variables for slaves - can be passed in as args or envs - """ - return {"DMLC_TRACKER_URI": self.hostIP, "DMLC_TRACKER_PORT": self.port} - - def get_tree(self, nslave): - tree_map = {} - parent_map = {} - for r in range(nslave): - tree_map[r] = self.get_neighbor(r, nslave) - parent_map[r] = (r + 1) // 2 - 1 - return tree_map, parent_map - - def find_share_ring(self, tree_map, parent_map, r): - """ - get a ring structure that tends to share nodes with the tree - return a list starting from r - """ - nset = set(tree_map[r]) - cset = nset - set([parent_map[r]]) - if len(cset) == 0: - return [r] - rlst = [r] - cnt = 0 - for v in cset: - vlst = self.find_share_ring(tree_map, parent_map, v) - cnt += 1 - if cnt == len(cset): - vlst.reverse() - rlst += vlst - return rlst - - def get_ring(self, tree_map, parent_map): - """ - get a ring connection used to recover local data - """ - assert parent_map[0] == -1 - rlst = self.find_share_ring(tree_map, parent_map, 0) - assert len(rlst) == len(tree_map) - ring_map = {} - nslave = len(tree_map) - for r in range(nslave): - rprev = (r + nslave - 1) % nslave - rnext = (r + 1) % nslave - ring_map[rlst[r]] = (rlst[rprev], rlst[rnext]) - return ring_map - - def get_link_map(self, nslave): - """ - get the link map, this is a bit hacky, call for better algorithm - to place similar nodes together - """ - tree_map, parent_map = self.get_tree(nslave) - ring_map = self.get_ring(tree_map, parent_map) - rmap = {0: 0} - k = 0 - for i in range(nslave - 1): - k = ring_map[k][1] - rmap[k] = i + 1 - - ring_map_ = {} - tree_map_ = {} - parent_map_ = {} - for k, v in ring_map.items(): - ring_map_[rmap[k]] = (rmap[v[0]], rmap[v[1]]) - for k, v in tree_map.items(): - tree_map_[rmap[k]] = [rmap[x] for x in v] - for k, v in parent_map.items(): - if k != 0: - parent_map_[rmap[k]] = rmap[v] - else: - parent_map_[rmap[k]] = -1 - return tree_map_, parent_map_, ring_map_ - - def accept_slaves(self, nslave): - # set of nodes that finishs the job - shutdown = {} - # set of nodes that is waiting for connections - wait_conn = {} - # maps job id to rank - job_map = {} - # list of workers that is pending to be assigned rank - pending = [] - # lazy initialize tree_map - tree_map = None - - while len(shutdown) != nslave: - fd, s_addr = self.sock.accept() - s = SlaveEntry(fd, s_addr) - if s.cmd == "print": - msg = s.sock.recvstr() - logging.info(msg.strip()) - continue - if s.cmd == "shutdown": - assert s.rank >= 0 and s.rank not in shutdown - assert s.rank not in wait_conn - shutdown[s.rank] = s - logging.debug("Recieve %s signal from %d", s.cmd, s.rank) - continue - assert s.cmd == "start" or s.cmd == "recover" - # lazily initialize the slaves - if tree_map is None: - assert s.cmd == "start" - if s.world_size > 0: - nslave = s.world_size - tree_map, parent_map, ring_map = self.get_link_map(nslave) - # set of nodes that is pending for getting up - todo_nodes = list(range(nslave)) - else: - assert s.world_size == -1 or s.world_size == nslave - if s.cmd == "recover": - assert s.rank >= 0 - - rank = s.decide_rank(job_map) - # batch assignment of ranks - if rank == -1: - assert len(todo_nodes) != 0 - pending.append(s) - if len(pending) == len(todo_nodes): - pending.sort(key=lambda x: x.host) - for s in pending: - rank = todo_nodes.pop(0) - if s.jobid != "NULL": - job_map[s.jobid] = rank - s.assign_rank(rank, wait_conn, tree_map, parent_map, ring_map) - if s.wait_accept > 0: - wait_conn[rank] = s - logging.debug( - "Recieve %s signal from %s; " "assign rank %d", - s.cmd, - s.host, - s.rank, - ) - if len(todo_nodes) == 0: - logging.info("@tracker All of %d nodes getting started", nslave) - self.start_time = time.time() - else: - s.assign_rank(rank, wait_conn, tree_map, parent_map, ring_map) - logging.debug("Recieve %s signal from %d", s.cmd, s.rank) - if s.wait_accept > 0: - wait_conn[rank] = s - - logging.info("worker(ip_address=%s) connected!" % get_some_ip(s_addr[0])) - - logging.info("@tracker All nodes finishes job") - self.end_time = time.time() - logging.info( - "@tracker %s secs between node start and job finish", - str(self.end_time - self.start_time), - ) - - def start(self, nslave): - def run(): - self.accept_slaves(nslave) - - self.thread = Thread(target=run, args=()) - self.thread.setDaemon(True) - self.thread.start() - - def join(self): - while self.thread.isAlive(): - self.thread.join(100) - - -class PSTracker(object): - """ - Tracker module for PS - """ - - def __init__(self, hostIP, cmd, port=9091, port_end=9999, envs=None): - """ - Starts the PS scheduler - """ - self.cmd = cmd - if cmd is None: - return - envs = {} if envs is None else envs - self.hostIP = hostIP - sock = socket.socket(get_family(hostIP), socket.SOCK_STREAM) - for port in range(port, port_end): - try: - sock.bind(("", port)) - self.port = port - sock.close() - break - except socket.error: - continue - env = os.environ.copy() - - env["DMLC_ROLE"] = "scheduler" - env["DMLC_PS_ROOT_URI"] = str(self.hostIP) - env["DMLC_PS_ROOT_PORT"] = str(self.port) - for k, v in envs.items(): - env[k] = str(v) - self.thread = Thread( - target=(lambda: subprocess.check_call(self.cmd, env=env, shell=True)), - args=(), - ) - self.thread.setDaemon(True) - self.thread.start() - - def join(self): - if self.cmd is not None: - while self.thread.isAlive(): - self.thread.join(100) - - def slave_envs(self): - if self.cmd is None: - return {} - else: - return {"DMLC_PS_ROOT_URI": self.hostIP, "DMLC_PS_ROOT_PORT": self.port} - - -def get_host_ip(hostIP=None): - if hostIP is None or hostIP == "auto": - hostIP = "ip" - - if hostIP == "dns": - hostIP = socket.getfqdn() - elif hostIP == "ip": - from socket import gaierror - - try: - hostIP = socket.gethostbyname(socket.getfqdn()) - except gaierror: - logging.warn( - "gethostbyname(socket.getfqdn()) failed... trying on " "hostname()" - ) - hostIP = socket.gethostbyname(socket.gethostname()) - if hostIP.startswith("127."): - s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) - # doesn't have to be reachable - s.connect(("10.255.255.255", 1)) - hostIP = s.getsockname()[0] - return hostIP - - -def submit(nworker, nserver, fun_submit, hostIP="auto", pscmd=None): - if nserver == 0: - pscmd = None - - envs = {"DMLC_NUM_WORKER": nworker, "DMLC_NUM_SERVER": nserver} - hostIP = get_host_ip(hostIP) - - if nserver == 0: - rabit = RabitTracker(hostIP=hostIP, nslave=nworker) - envs.update(rabit.slave_envs()) - rabit.start(nworker) - else: - pserver = PSTracker(hostIP=hostIP, cmd=pscmd, envs=envs) - envs.update(pserver.slave_envs()) - fun_submit(nworker, nserver, envs) - - if nserver == 0: - rabit.join() - else: - pserver.join() - - -def start_rabit_tracker(args): - """Standalone function to start rabit tracker. - Parameters - ---------- - args: arguments to start the rabit tracker. - """ - envs = {"DMLC_NUM_WORKER": args.num_workers, "DMLC_NUM_SERVER": args.num_servers} - rabit = RabitTracker(hostIP=get_host_ip(args.host_ip), nslave=args.num_workers) - envs.update(rabit.slave_envs()) - rabit.start(args.num_workers) - sys.stdout.write("DMLC_TRACKER_ENV_START\n") - # simply write configuration to stdout - for k, v in envs.items(): - sys.stdout.write("%s=%s\n" % (k, str(v))) - sys.stdout.write("DMLC_TRACKER_ENV_END\n") - sys.stdout.flush() - rabit.join() - - -def main(): - """Main function if tracker is executed in standalone mode.""" - parser = argparse.ArgumentParser(description="Rabit Tracker start.") - parser.add_argument( - "--num-workers", - required=True, - type=int, - help="Number of worker proccess to be launched.", - ) - parser.add_argument( - "--num-servers", - default=0, - type=int, - help="Number of server process to be launched. Only " "used in PS jobs.", - ) - parser.add_argument( - "--host-ip", - default=None, - type=str, - help=( - "Host IP addressed, this is only needed " - + "if the host IP cannot be automatically guessed." - ), - ) - parser.add_argument( - "--log-level", - default="INFO", - type=str, - choices=["INFO", "DEBUG"], - help="Logging level of the logger.", - ) - args = parser.parse_args() - - fmt = "%(asctime)s %(levelname)s %(message)s" - if args.log_level == "INFO": - level = logging.INFO - elif args.log_level == "DEBUG": - level = logging.DEBUG - else: - raise RuntimeError("Unknown logging level %s" % args.log_level) - - logging.basicConfig(format=fmt, level=level) - - if args.num_servers == 0: - start_rabit_tracker(args) - else: - raise RuntimeError("Do not yet support start ps tracker in standalone " "mode.") - - -if __name__ == "__main__": - main() diff --git a/examples/xgboost/smoke-dist/xgboost_smoke_test.py b/examples/xgboost/smoke-dist/xgboost_smoke_test.py deleted file mode 100644 index 297620258f..0000000000 --- a/examples/xgboost/smoke-dist/xgboost_smoke_test.py +++ /dev/null @@ -1,117 +0,0 @@ -# Copyright 2018 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import logging -import os -import traceback - -import xgboost as xgb -from tracker import RabitTracker - -logger = logging.getLogger(__name__) - - -def extract_xgbooost_cluster_env(): - - logger.info("start to extract system env") - - master_addr = os.environ.get("MASTER_ADDR", "{}") - master_port = int(os.environ.get("MASTER_PORT", "{}")) - rank = int(os.environ.get("RANK", "{}")) - world_size = int(os.environ.get("WORLD_SIZE", "{}")) - - logger.info( - "extract the rabit env from cluster : %s, port: %d, rank: %d, word_size: %d ", - master_addr, - master_port, - rank, - world_size, - ) - - return master_addr, master_port, rank, world_size - - -def setup_rabit_cluster(): - addr, port, rank, world_size = extract_xgbooost_cluster_env() - - rabit_tracker = None - try: - """start to build the network""" - if world_size > 1: - if rank == 0: - logger.info("start the master node") - - rabit = RabitTracker( - hostIP="0.0.0.0", nslave=world_size, port=port, port_end=port + 1 - ) - rabit.start(world_size) - rabit_tracker = rabit - logger.info("########### RabitTracker Setup Finished #########") - - envs = [ - "DMLC_NUM_WORKER=%d" % world_size, - "DMLC_TRACKER_URI=%s" % addr, - "DMLC_TRACKER_PORT=%d" % port, - "DMLC_TASK_ID=%d" % rank, - ] - logger.info("##### Rabit rank setup with below envs #####") - for i, env in enumerate(envs): - logger.info(env) - envs[i] = str.encode(env) - - xgb.rabit.init(envs) - logger.info("##### Rabit rank = %d" % xgb.rabit.get_rank()) - - rank = xgb.rabit.get_rank() - s = None - if rank == 0: - s = {"hello world": 100, 2: 3} - - logger.info('@node[%d] before-broadcast: s="%s"' % (rank, str(s))) - s = xgb.rabit.broadcast(s, 0) - - logger.info('@node[%d] after-broadcast: s="%s"' % (rank, str(s))) - - except Exception as e: - logger.error("something wrong happen: %s", traceback.format_exc()) - raise e - finally: - if world_size > 1: - xgb.rabit.finalize() - if rabit_tracker: - rabit_tracker.join() - - logger.info("the rabit network testing finished!") - - -def main(): - - port = os.environ.get("MASTER_PORT", "{}") - logging.info("MASTER_PORT: %s", port) - - addr = os.environ.get("MASTER_ADDR", "{}") - logging.info("MASTER_ADDR: %s", addr) - - world_size = os.environ.get("WORLD_SIZE", "{}") - logging.info("WORLD_SIZE: %s", world_size) - - rank = os.environ.get("RANK", "{}") - logging.info("RANK: %s", rank) - - setup_rabit_cluster() - - -if __name__ == "__main__": - logging.getLogger().setLevel(logging.INFO) - main() diff --git a/examples/xgboost/smoke-dist/xgboostjob_v1_rabit_test.yaml b/examples/xgboost/smoke-dist/xgboostjob_v1_rabit_test.yaml deleted file mode 100644 index 80ce3adce1..0000000000 --- a/examples/xgboost/smoke-dist/xgboostjob_v1_rabit_test.yaml +++ /dev/null @@ -1,30 +0,0 @@ -apiVersion: kubeflow.org/v1 -kind: XGBoostJob -metadata: - name: xgboost-dist-test -spec: - xgbReplicaSpecs: - Master: - replicas: 1 - restartPolicy: Never - template: - spec: - containers: - - name: xgboost - image: docker.io/kubeflow/xgboost-dist-rabit-test:latest - ports: - - containerPort: 9991 - name: xgboostjob-port - imagePullPolicy: Always - Worker: - replicas: 2 - restartPolicy: Never - template: - spec: - containers: - - name: xgboost - image: docker.io/kubeflow/xgboost-dist-rabit-test:latest - ports: - - containerPort: 9991 - name: xgboostjob-port - imagePullPolicy: Always diff --git a/examples/xgboost/smoke-dist/xgboostjob_v1alpha1_rabit_test.yaml b/examples/xgboost/smoke-dist/xgboostjob_v1alpha1_rabit_test.yaml deleted file mode 100644 index 5364eb07cd..0000000000 --- a/examples/xgboost/smoke-dist/xgboostjob_v1alpha1_rabit_test.yaml +++ /dev/null @@ -1,34 +0,0 @@ -apiVersion: kubeflow.org/v1 -kind: XGBoostJob -metadata: - name: xgboost-dist-test -spec: - xgbReplicaSpecs: - Master: - replicas: 1 - restartPolicy: Never - template: - apiVersion: v1 - kind: Pod - spec: - containers: - - name: xgboost - image: docker.io/kubeflow/xgboost-dist-rabit-test:latest - ports: - - containerPort: 9991 - name: xgboostjob-port - imagePullPolicy: Always - Worker: - replicas: 2 - restartPolicy: Never - template: - apiVersion: v1 - kind: Pod - spec: - containers: - - name: xgboost - image: docker.io/kubeflow/xgboost-dist-rabit-test:latest - ports: - - containerPort: 9991 - name: xgboostjob-port - imagePullPolicy: Always diff --git a/examples/xgboost/xgboost-dist/Dockerfile b/examples/xgboost/xgboost-dist/Dockerfile deleted file mode 100644 index 16ea8051df..0000000000 --- a/examples/xgboost/xgboost-dist/Dockerfile +++ /dev/null @@ -1,12 +0,0 @@ -# Install python 3.6. -FROM python:3.6 - -RUN mkdir -p /opt/mlkube - -# Install requirements -COPY requirements.txt /opt/mlkube/ -RUN pip install -r /opt/mlkube/requirements.txt - -COPY *.py /opt/mlkube/ - -ENTRYPOINT ["python", "/opt/mlkube/main.py"] diff --git a/examples/xgboost/xgboost-dist/README.md b/examples/xgboost/xgboost-dist/README.md deleted file mode 100644 index 4234e3ebbc..0000000000 --- a/examples/xgboost/xgboost-dist/README.md +++ /dev/null @@ -1,489 +0,0 @@ -### Distributed XGBoost Job train and prediction - -This folder containers related files for distributed XGBoost training and prediction. In this demo, -[Iris Data Set](https://archive.ics.uci.edu/ml/datasets/iris) is a well known multi-class classification dataset. -Thus, in this demo, distributed XGBoost job is able to do multi-class classification problem. Meanwhile, -User can extend provided data reader to read data from distributed data storage like HDFS, HBase or Hive etc. - - -**Configure the job runtime via Yaml file** - -The following files are available to setup distributed XGBoost computation runtime - -To store the model in OSS: - -* xgboostjob_v1_iris_train.yaml -* xgboostjob_v1_iris_predict.yaml - -To store the model in local path: - -* xgboostjob_v1_iris_train_local.yaml -* xgboostjob_v1_iris_predict_local.yaml - -For training jobs in OSS , you could configure xgboostjob_v1_iris_train.yaml and xgboostjob_v1_iris_predict.yaml -Note, we use [OSS](https://www.alibabacloud.com/product/oss) to store the trained model, -thus, you need to specify the OSS parameter in the yaml file. Therefore, remember to fill the OSS parameter in xgboostjob_v1_iris_train.yaml and xgboostjob_v1_iris_predict.yaml file. -The oss parameter includes the account information such as access_id, access_key, access_bucket and endpoint. -For Eg: ---oss_param=endpoint:http://oss-ap-south-1.aliyuncs.com,access_id:XXXXXXXXXXX,access_key:XXXXXXXXXXXXXXXXXXX,access_bucket:XXXXXX -Similarly, xgboostjob_v1_iris_predict.yaml is used to configure XGBoost job batch prediction. - - -**Start the distributed XGBoost train to store the model in OSS** -``` -kubectl create -f xgboostjob_v1_iris_train.yaml -``` - -**Look at the train job status** -``` - kubectl get -o yaml XGBoostJob/xgboost-dist-iris-test-train - ``` - Here is a sample output when the job is finished. The output log like this -``` -Name: xgboost-dist-iris-test -Namespace: default -Labels: -Annotations: -API Version: kubeflow.org/v1 -Kind: XGBoostJob -Metadata: - Creation Timestamp: 2019-06-27T01:16:09Z - Generation: 9 - Resource Version: 385834 - UID: 2565e99a-9879-11e9-bbab-080027dfbfe2 -Spec: - Run Policy: - Clean Pod Policy: None - Xgb Replica Specs: - Master: - Replicas: 1 - Restart Policy: Never - Template: - Metadata: - Creation Timestamp: - Spec: - Containers: - Args: - --job_type=Train - --xgboost_parameter=objective:multi:softprob,num_class:3 - --n_estimators=10 - --learning_rate=0.1 - --model_path=autoAI/xgb-opt/2 - --model_storage_type=oss - --oss_param=unknown - Image: docker.io/kubeflow/xgboost-dist-iris:latest - Image Pull Policy: Always - Name: xgboostjob - Ports: - Container Port: 9991 - Name: xgboostjob-port - Resources: - Worker: - Replicas: 2 - Restart Policy: ExitCode - Template: - Metadata: - Creation Timestamp: - Spec: - Containers: - Args: - --job_type=Train - --xgboost_parameter="objective:multi:softprob,num_class:3" - --n_estimators=10 - --learning_rate=0.1 - --model_path="/tmp/xgboost_model" - --model_storage_type=oss - Image: docker.io/kubeflow/xgboost-dist-iris:latest - Image Pull Policy: Always - Name: xgboostjob - Ports: - Container Port: 9991 - Name: xgboostjob-port - Resources: -Status: - Completion Time: 2019-06-27T01:17:04Z - Conditions: - Last Transition Time: 2019-06-27T01:16:09Z - Last Update Time: 2019-06-27T01:16:09Z - Message: xgboostJob xgboost-dist-iris-test is created. - Reason: XGBoostJobCreated - Status: True - Type: Created - Last Transition Time: 2019-06-27T01:16:09Z - Last Update Time: 2019-06-27T01:16:09Z - Message: XGBoostJob xgboost-dist-iris-test is running. - Reason: XGBoostJobRunning - Status: False - Type: Running - Last Transition Time: 2019-06-27T01:17:04Z - Last Update Time: 2019-06-27T01:17:04Z - Message: XGBoostJob xgboost-dist-iris-test is successfully completed. - Reason: XGBoostJobSucceeded - Status: True - Type: Succeeded - Replica Statuses: - Master: - Succeeded: 1 - Worker: - Succeeded: 2 -Events: - Type Reason Age From Message - ---- ------ ---- ---- ------- - Normal SuccessfulCreatePod 102s xgboostjob-operator Created pod: xgboost-dist-iris-test-master-0 - Normal SuccessfulCreateService 102s xgboostjob-operator Created service: xgboost-dist-iris-test-master-0 - Normal SuccessfulCreatePod 102s xgboostjob-operator Created pod: xgboost-dist-iris-test-worker-1 - Normal SuccessfulCreateService 102s xgboostjob-operator Created service: xgboost-dist-iris-test-worker-0 - Normal SuccessfulCreateService 102s xgboostjob-operator Created service: xgboost-dist-iris-test-worker-1 - Normal SuccessfulCreatePod 64s xgboostjob-operator Created pod: xgboost-dist-iris-test-worker-0 - Normal ExitedWithCode 47s (x3 over 49s) xgboostjob-operator Pod: default.xgboost-dist-iris-test-worker-1 exited with code 0 - Normal ExitedWithCode 47s xgboostjob-operator Pod: default.xgboost-dist-iris-test-master-0 exited with code 0 - Normal XGBoostJobSucceeded 47s xgboostjob-operator XGBoostJob xgboost-dist-iris-test is successfully completed. - ``` - -**Start the distributed XGBoost job predict** -```shell -kubectl create -f xgboostjob_v1_iris_predict.yaml -``` - -**Look at the batch predict job status** -``` - kubectl get -o yaml XGBoostJob/xgboost-dist-iris-test-predict - ``` - Here is a sample output when the job is finished. The output log like this -``` -Name: xgboost-dist-iris-test-predict -Namespace: default -Labels: -Annotations: -API Version: kubeflow.org/v1 -Kind: XGBoostJob -Metadata: - Creation Timestamp: 2019-06-27T06:06:53Z - Generation: 8 - Resource Version: 394523 - UID: c2a04cbc-98a1-11e9-bbab-080027dfbfe2 -Spec: - Run Policy: - Clean Pod Policy: None - Xgb Replica Specs: - Master: - Replicas: 1 - Restart Policy: Never - Template: - Metadata: - Creation Timestamp: - Spec: - Containers: - Args: - --job_type=Predict - --model_path=autoAI/xgb-opt/3 - --model_storage_type=oss - --oss_param=unkown - Image: docker.io/kubeflow/xgboost-dist-iris:latest - Image Pull Policy: Always - Name: xgboostjob - Ports: - Container Port: 9991 - Name: xgboostjob-port - Resources: - Worker: - Replicas: 2 - Restart Policy: ExitCode - Template: - Metadata: - Creation Timestamp: - Spec: - Containers: - Args: - --job_type=Predict - --model_path=autoAI/xgb-opt/3 - --model_storage_type=oss - --oss_param=unkown - Image: docker.io/kubeflow/xgboost-dist-iris:latest - Image Pull Policy: Always - Name: xgboostjob - Ports: - Container Port: 9991 - Name: xgboostjob-port - Resources: -Status: - Completion Time: 2019-06-27T06:07:02Z - Conditions: - Last Transition Time: 2019-06-27T06:06:53Z - Last Update Time: 2019-06-27T06:06:53Z - Message: xgboostJob xgboost-dist-iris-test-predict is created. - Reason: XGBoostJobCreated - Status: True - Type: Created - Last Transition Time: 2019-06-27T06:06:53Z - Last Update Time: 2019-06-27T06:06:53Z - Message: XGBoostJob xgboost-dist-iris-test-predict is running. - Reason: XGBoostJobRunning - Status: False - Type: Running - Last Transition Time: 2019-06-27T06:07:02Z - Last Update Time: 2019-06-27T06:07:02Z - Message: XGBoostJob xgboost-dist-iris-test-predict is successfully completed. - Reason: XGBoostJobSucceeded - Status: True - Type: Succeeded - Replica Statuses: - Master: - Succeeded: 1 - Worker: - Succeeded: 2 -Events: - Type Reason Age From Message - ---- ------ ---- ---- ------- - Normal SuccessfulCreatePod 47s xgboostjob-operator Created pod: xgboost-dist-iris-test-predict-worker-0 - Normal SuccessfulCreatePod 47s xgboostjob-operator Created pod: xgboost-dist-iris-test-predict-worker-1 - Normal SuccessfulCreateService 47s xgboostjob-operator Created service: xgboost-dist-iris-test-predict-worker-0 - Normal SuccessfulCreateService 47s xgboostjob-operator Created service: xgboost-dist-iris-test-predict-worker-1 - Normal SuccessfulCreatePod 47s xgboostjob-operator Created pod: xgboost-dist-iris-test-predict-master-0 - Normal SuccessfulCreateService 47s xgboostjob-operator Created service: xgboost-dist-iris-test-predict-master-0 - Normal ExitedWithCode 38s (x3 over 40s) xgboostjob-operator Pod: default.xgboost-dist-iris-test-predict-worker-0 exited with code 0 - Normal ExitedWithCode 38s xgboostjob-operator Pod: default.xgboost-dist-iris-test-predict-master-0 exited with code 0 - Normal XGBoostJobSucceeded 38s xgboostjob-operator XGBoostJob xgboost-dist-iris-test-predict is successfully completed. -``` - -**Start the distributed XGBoost train to store the model locally** - -Before proceeding with training we will create a PVC to store the model trained. -Creating pvc : -create a yaml file with the below content -pvc.yaml -``` -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: xgboostlocal -spec: - storageClassName: glusterfs - accessModes: - - ReadWriteMany - resources: - requests: - storage: 10Gi -``` -``` -kubectl create -f pvc.yaml -``` -Note: - -* Please use the storage class which supports ReadWriteMany. The example yaml above uses glusterfs - -* Mention model_storage_type=local and model_path accordingly( In the example /tmp/xgboost_model/2 is used ) in xgboostjob_v1_iris_train_local.yaml and xgboostjob_v1_iris_predict_local.yaml" - -Now start the distributed XGBoost train. -``` -kubectl create -f xgboostjob_v1_iris_train_local.yaml -``` - -**Look at the train job status** -``` - kubectl get -o yaml XGBoostJob/xgboost-dist-iris-test-train-local - ``` - Here is a sample output when the job is finished. The output log like this -``` - -apiVersion: kubeflow.org/v1 -kind: XGBoostJob -metadata: - creationTimestamp: "2019-09-17T05:36:01Z" - generation: 7 - name: xgboost-dist-iris-test-train_local - namespace: default - resourceVersion: "8919366" - uid: 08f85fad-d90d-11e9-aca1-fa163ea13108 -spec: - RunPolicy: - cleanPodPolicy: None - xgbReplicaSpecs: - Master: - replicas: 1 - restartPolicy: Never - template: - metadata: - creationTimestamp: null - spec: - containers: - - args: - - --job_type=Train - - --xgboost_parameter=objective:multi:softprob,num_class:3 - - --n_estimators=10 - - --learning_rate=0.1 - - --model_path=/tmp/xgboost_model/2 - - --model_storage_type=local - image: docker.io/kubeflow/xgboost-dist-iris:latest - imagePullPolicy: Always - name: xgboostjob - ports: - - containerPort: 9991 - name: xgboostjob-port - resources: {} - volumeMounts: - - mountPath: /tmp/xgboost_model - name: task-pv-storage - volumes: - - name: task-pv-storage - persistentVolumeClaim: - claimName: xgboostlocal - Worker: - replicas: 2 - restartPolicy: ExitCode - template: - metadata: - creationTimestamp: null - spec: - containers: - - args: - - --job_type=Train - - --xgboost_parameter="objective:multi:softprob,num_class:3" - - --n_estimators=10 - - --learning_rate=0.1 - - --model_path=/tmp/xgboost_model/2 - - --model_storage_type=local - image: bcmt-registry:5000/kubeflow/xgboost-dist-iris-test:1.0 - imagePullPolicy: Always - name: xgboostjob - ports: - - containerPort: 9991 - name: xgboostjob-port - resources: {} - volumeMounts: - - mountPath: /tmp/xgboost_model - name: task-pv-storage - volumes: - - name: task-pv-storage - persistentVolumeClaim: - claimName: xgboostlocal -status: - completionTime: "2019-09-17T05:37:02Z" - conditions: - - lastTransitionTime: "2019-09-17T05:36:02Z" - lastUpdateTime: "2019-09-17T05:36:02Z" - message: xgboostJob xgboost-dist-iris-test-train_local is created. - reason: XGBoostJobCreated - status: "True" - type: Created - - lastTransitionTime: "2019-09-17T05:36:02Z" - lastUpdateTime: "2019-09-17T05:36:02Z" - message: XGBoostJob xgboost-dist-iris-test-train_local is running. - reason: XGBoostJobRunning - status: "False" - type: Running - - lastTransitionTime: "2019-09-17T05:37:02Z" - lastUpdateTime: "2019-09-17T05:37:02Z" - message: XGBoostJob xgboost-dist-iris-test-train_local is successfully completed. - reason: XGBoostJobSucceeded - status: "True" - type: Succeeded - replicaStatuses: - Master: - succeeded: 1 - Worker: - succeeded: 2 - ``` -**Start the distributed XGBoost job predict** -``` -kubectl create -f xgboostjob_v1_iris_predict_local.yaml -``` - -**Look at the batch predict job status** -``` - kubectl get -o yaml XGBoostJob/xgboost-dist-iris-test-predict-local - ``` - Here is a sample output when the job is finished. The output log like this -``` -apiVersion: kubeflow.org/v1 -kind: XGBoostJob -metadata: - creationTimestamp: "2019-09-17T06:33:38Z" - generation: 6 - name: xgboost-dist-iris-test-predict_local - namespace: default - resourceVersion: "8976054" - uid: 151655b0-d915-11e9-aca1-fa163ea13108 -spec: - RunPolicy: - cleanPodPolicy: None - xgbReplicaSpecs: - Master: - replicas: 1 - restartPolicy: Never - template: - metadata: - creationTimestamp: null - spec: - containers: - - args: - - --job_type=Predict - - --model_path=/tmp/xgboost_model/2 - - --model_storage_type=local - image: docker.io/kubeflow/xgboost-dist-iris:latest - imagePullPolicy: Always - name: xgboostjob - ports: - - containerPort: 9991 - name: xgboostjob-port - resources: {} - volumeMounts: - - mountPath: /tmp/xgboost_model - name: task-pv-storage - volumes: - - name: task-pv-storage - persistentVolumeClaim: - claimName: xgboostlocal - Worker: - replicas: 2 - restartPolicy: ExitCode - template: - metadata: - creationTimestamp: null - spec: - containers: - - args: - - --job_type=Predict - - --model_path=/tmp/xgboost_model/2 - - --model_storage_type=local - image: docker.io/kubeflow/xgboost-dist-iris:latest - imagePullPolicy: Always - name: xgboostjob - ports: - - containerPort: 9991 - name: xgboostjob-port - resources: {} - volumeMounts: - - mountPath: /tmp/xgboost_model - name: task-pv-storage - volumes: - - name: task-pv-storage - persistentVolumeClaim: - claimName: xgboostlocal -status: - completionTime: "2019-09-17T06:33:51Z" - conditions: - - lastTransitionTime: "2019-09-17T06:33:38Z" - lastUpdateTime: "2019-09-17T06:33:38Z" - message: xgboostJob xgboost-dist-iris-test-predict_local is created. - reason: XGBoostJobCreated - status: "True" - type: Created - - lastTransitionTime: "2019-09-17T06:33:38Z" - lastUpdateTime: "2019-09-17T06:33:38Z" - message: XGBoostJob xgboost-dist-iris-test-predict_local is running. - reason: XGBoostJobRunning - status: "False" - type: Running - - lastTransitionTime: "2019-09-17T06:33:51Z" - lastUpdateTime: "2019-09-17T06:33:51Z" - message: XGBoostJob xgboost-dist-iris-test-predict_local is successfully completed. - reason: XGBoostJobSucceeded - status: "True" - type: Succeeded - replicaStatuses: - Master: - succeeded: 1 - Worker: - succeeded: 1 -``` diff --git a/examples/xgboost/xgboost-dist/local_test.py b/examples/xgboost/xgboost-dist/local_test.py deleted file mode 100644 index 96b7fad0e5..0000000000 --- a/examples/xgboost/xgboost-dist/local_test.py +++ /dev/null @@ -1,102 +0,0 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -this file contains tests for xgboost local train and predict in single machine. -Note: this is not for distributed train and predict test -""" -import logging - -import numpy as np -import xgboost as xgb -from sklearn.metrics import precision_score -from utils import dump_model, read_model, read_predict_data, read_train_data - -logger = logging.getLogger(__name__) - - -def test_train_model(): - """ - test xgboost train in a single machine - :return: trained model - """ - rank = 1 - world_size = 10 - place = "/tmp/data" - dmatrix = read_train_data(rank, world_size, place) - - param_xgboost_default = { - "max_depth": 2, - "eta": 1, - "silent": 1, - "objective": "multi:softprob", - "num_class": 3, - } - - booster = xgb.train(param_xgboost_default, dtrain=dmatrix) - - assert booster is not None - - return booster - - -def test_model_predict(booster): - """ - test xgboost train in the single node - :return: true if pass the test - """ - rank = 1 - world_size = 10 - place = "/tmp/data" - dmatrix, y_test = read_predict_data(rank, world_size, place) - - preds = booster.predict(dmatrix) - best_preds = np.asarray([np.argmax(line) for line in preds]) - score = precision_score(y_test, best_preds, average="macro") - - assert score > 0.99 - - logging.info("Predict accuracy: %f", score) - - return True - - -def test_upload_model(model, model_path, args): - - return dump_model(model, type="local", model_path=model_path, args=args) - - -def test_download_model(model_path, args): - - return read_model(type="local", model_path=model_path, args=args) - - -def run_test(): - args = {} - model_path = "/tmp/xgboost" - - logging.info("Start the local test") - - booster = test_train_model() - test_upload_model(booster, model_path, args) - booster_new = test_download_model(model_path, args) - test_model_predict(booster_new) - - logging.info("Finish the local test") - - -if __name__ == "__main__": - - logging.basicConfig(format="%(message)s") - logging.getLogger().setLevel(logging.INFO) - - run_test() diff --git a/examples/xgboost/xgboost-dist/main.py b/examples/xgboost/xgboost-dist/main.py deleted file mode 100644 index f11227e38d..0000000000 --- a/examples/xgboost/xgboost-dist/main.py +++ /dev/null @@ -1,81 +0,0 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import argparse -import logging - -from predict import predict -from train import train -from utils import dump_model - - -def main(args): - - model_storage_type = args.model_storage_type - if model_storage_type == "local" or model_storage_type == "oss": - print("The storage type is " + model_storage_type) - else: - raise Exception("Only supports storage types like local and OSS") - - if args.job_type == "Predict": - logging.info("starting the predict job") - predict(args) - - elif args.job_type == "Train": - logging.info("starting the train job") - model = train(args) - - if model is not None: - logging.info("finish the model training, and start to dump model ") - model_path = args.model_path - dump_model(model, model_storage_type, model_path, args) - - elif args.job_type == "All": - logging.info("starting the train and predict job") - - logging.info("Finish distributed XGBoost job") - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - - parser.add_argument("--job_type", help="Train, Predict, All", required=True) - parser.add_argument( - "--xgboost_parameter", - help="XGBoost model parameter like: objective, number_class", - ) - parser.add_argument( - "--n_estimators", help="Number of trees in the model", type=int, default=1000 - ) - parser.add_argument( - "--learning_rate", help="Learning rate for the model", default=0.1 - ) - parser.add_argument( - "--early_stopping_rounds", - help="XGBoost argument for stopping early", - default=50, - ) - parser.add_argument( - "--model_path", help="place to store model", default="/tmp/xgboost_model" - ) - parser.add_argument( - "--model_storage_type", help="place to store the model", default="oss" - ) - parser.add_argument( - "--oss_param", - help="oss parameter if you choose the model storage as OSS type", - ) - - logging.basicConfig(format="%(message)s") - logging.getLogger().setLevel(logging.INFO) - main_args = parser.parse_args() - main(main_args) diff --git a/examples/xgboost/xgboost-dist/predict.py b/examples/xgboost/xgboost-dist/predict.py deleted file mode 100644 index ea5bebb519..0000000000 --- a/examples/xgboost/xgboost-dist/predict.py +++ /dev/null @@ -1,39 +0,0 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import logging - -import numpy as np -from sklearn.metrics import precision_score -from utils import extract_xgbooost_cluster_env, read_model, read_predict_data - - -def predict(args): - """ - This is the demonstration for the batch prediction - :param args: parameter for model related config - """ - - addr, port, rank, world_size = extract_xgbooost_cluster_env() - - dmatrix, y_test = read_predict_data(rank, world_size, None) - - model_path = args.model_path - storage_type = args.model_storage_type - booster = read_model(storage_type, model_path, args) - - preds = booster.predict(dmatrix) - - best_preds = np.asarray([np.argmax(line) for line in preds]) - score = precision_score(y_test, best_preds, average="macro") - - logging.info("Predict accuracy: %f", score) diff --git a/examples/xgboost/xgboost-dist/requirements.txt b/examples/xgboost/xgboost-dist/requirements.txt deleted file mode 100644 index d2cdbf6095..0000000000 --- a/examples/xgboost/xgboost-dist/requirements.txt +++ /dev/null @@ -1,10 +0,0 @@ -numpy>=1.16.3 -Cython>=0.29.4 -requests>=2.21.0 -urllib3>=1.21.1 -scipy>=1.1.0 -joblib>=0.13.2 -scikit-learn>=0.20 -oss2>=2.7.0 -pandas>=0.24.2 -xgboost==1.5.2 diff --git a/examples/xgboost/xgboost-dist/tracker.py b/examples/xgboost/xgboost-dist/tracker.py deleted file mode 100644 index 9c9367a3b7..0000000000 --- a/examples/xgboost/xgboost-dist/tracker.py +++ /dev/null @@ -1,527 +0,0 @@ -""" -Tracker script for DMLC -Implements the tracker control protocol - - start dmlc jobs - - start ps scheduler and rabit tracker - - help nodes to establish links with each other -Tianqi Chen --------------------------- -This was taken from -https://github.com/dmlc/dmlc-core/blob/master/tracker/dmlc_tracker/tracker.py -See LICENSE here -https://github.com/dmlc/dmlc-core/blob/master/LICENSE -No code modified or added except for this explanatory comment. -""" - -# pylint: disable=invalid-name, missing-docstring, too-many-arguments -# pylint: disable=too-many-locals -# pylint: disable=too-many-branches, too-many-statements -from __future__ import absolute_import - -import argparse -import logging -import os -import socket -import struct -import subprocess -import sys -import time -from threading import Thread - - -class ExSocket(object): - """ - Extension of socket to handle recv and send of special data - """ - - def __init__(self, sock): - self.sock = sock - - def recvall(self, nbytes): - res = [] - nread = 0 - while nread < nbytes: - chunk = self.sock.recv(min(nbytes - nread, 1024)) - nread += len(chunk) - res.append(chunk) - return b"".join(res) - - def recvint(self): - return struct.unpack("@i", self.recvall(4))[0] - - def sendint(self, n): - self.sock.sendall(struct.pack("@i", n)) - - def sendstr(self, s): - self.sendint(len(s)) - self.sock.sendall(s.encode()) - - def recvstr(self): - slen = self.recvint() - return self.recvall(slen).decode() - - -# magic number used to verify existence of data -kMagic = 0xFF99 - - -def get_some_ip(host): - return socket.getaddrinfo(host, None)[0][4][0] - - -def get_family(addr): - return socket.getaddrinfo(addr, None)[0][0] - - -class SlaveEntry(object): - def __init__(self, sock, s_addr): - slave = ExSocket(sock) - self.sock = slave - self.host = get_some_ip(s_addr[0]) - magic = slave.recvint() - assert magic == kMagic, "invalid magic number=%d from %s" % (magic, self.host) - slave.sendint(kMagic) - self.rank = slave.recvint() - self.world_size = slave.recvint() - self.jobid = slave.recvstr() - self.cmd = slave.recvstr() - self.wait_accept = 0 - self.port = None - - def decide_rank(self, job_map): - if self.rank >= 0: - return self.rank - if self.jobid != "NULL" and self.jobid in job_map: - return job_map[self.jobid] - return -1 - - def assign_rank(self, rank, wait_conn, tree_map, parent_map, ring_map): - self.rank = rank - nnset = set(tree_map[rank]) - rprev, rnext = ring_map[rank] - self.sock.sendint(rank) - # send parent rank - self.sock.sendint(parent_map[rank]) - # send world size - self.sock.sendint(len(tree_map)) - self.sock.sendint(len(nnset)) - # send the rprev and next link - for r in nnset: - self.sock.sendint(r) - # send prev link - if rprev != -1 and rprev != rank: - nnset.add(rprev) - self.sock.sendint(rprev) - else: - self.sock.sendint(-1) - # send next link - if rnext != -1 and rnext != rank: - nnset.add(rnext) - self.sock.sendint(rnext) - else: - self.sock.sendint(-1) - while True: - ngood = self.sock.recvint() - goodset = set([]) - for _ in range(ngood): - goodset.add(self.sock.recvint()) - assert goodset.issubset(nnset) - badset = nnset - goodset - conset = [] - for r in badset: - if r in wait_conn: - conset.append(r) - self.sock.sendint(len(conset)) - self.sock.sendint(len(badset) - len(conset)) - for r in conset: - self.sock.sendstr(wait_conn[r].host) - self.sock.sendint(wait_conn[r].port) - self.sock.sendint(r) - nerr = self.sock.recvint() - if nerr != 0: - continue - self.port = self.sock.recvint() - rmset = [] - # all connection was successuly setup - for r in conset: - wait_conn[r].wait_accept -= 1 - if wait_conn[r].wait_accept == 0: - rmset.append(r) - for r in rmset: - wait_conn.pop(r, None) - self.wait_accept = len(badset) - len(conset) - return rmset - - -class RabitTracker(object): - """ - tracker for rabit - """ - - def __init__(self, hostIP, nslave, port=9091, port_end=9999): - sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - for port in range(port, port_end): - try: - sock.bind((hostIP, port)) - self.port = port - break - except socket.error as e: - if e.errno in [98, 48]: - continue - else: - raise - sock.listen(256) - self.sock = sock - self.hostIP = hostIP - self.thread = None - self.start_time = None - self.end_time = None - self.nslave = nslave - logging.info("start listen on %s:%d", hostIP, self.port) - - def __del__(self): - self.sock.close() - - @staticmethod - def get_neighbor(rank, nslave): - rank = rank + 1 - ret = [] - if rank > 1: - ret.append(rank // 2 - 1) - if rank * 2 - 1 < nslave: - ret.append(rank * 2 - 1) - if rank * 2 < nslave: - ret.append(rank * 2) - return ret - - def slave_envs(self): - """ - get enviroment variables for slaves - can be passed in as args or envs - """ - return {"DMLC_TRACKER_URI": self.hostIP, "DMLC_TRACKER_PORT": self.port} - - def get_tree(self, nslave): - tree_map = {} - parent_map = {} - for r in range(nslave): - tree_map[r] = self.get_neighbor(r, nslave) - parent_map[r] = (r + 1) // 2 - 1 - return tree_map, parent_map - - def find_share_ring(self, tree_map, parent_map, r): - """ - get a ring structure that tends to share nodes with the tree - return a list starting from r - """ - nset = set(tree_map[r]) - cset = nset - set([parent_map[r]]) - if len(cset) == 0: - return [r] - rlst = [r] - cnt = 0 - for v in cset: - vlst = self.find_share_ring(tree_map, parent_map, v) - cnt += 1 - if cnt == len(cset): - vlst.reverse() - rlst += vlst - return rlst - - def get_ring(self, tree_map, parent_map): - """ - get a ring connection used to recover local data - """ - assert parent_map[0] == -1 - rlst = self.find_share_ring(tree_map, parent_map, 0) - assert len(rlst) == len(tree_map) - ring_map = {} - nslave = len(tree_map) - for r in range(nslave): - rprev = (r + nslave - 1) % nslave - rnext = (r + 1) % nslave - ring_map[rlst[r]] = (rlst[rprev], rlst[rnext]) - return ring_map - - def get_link_map(self, nslave): - """ - get the link map, this is a bit hacky, call for better algorithm - to place similar nodes together - """ - tree_map, parent_map = self.get_tree(nslave) - ring_map = self.get_ring(tree_map, parent_map) - rmap = {0: 0} - k = 0 - for i in range(nslave - 1): - k = ring_map[k][1] - rmap[k] = i + 1 - - ring_map_ = {} - tree_map_ = {} - parent_map_ = {} - for k, v in ring_map.items(): - ring_map_[rmap[k]] = (rmap[v[0]], rmap[v[1]]) - for k, v in tree_map.items(): - tree_map_[rmap[k]] = [rmap[x] for x in v] - for k, v in parent_map.items(): - if k != 0: - parent_map_[rmap[k]] = rmap[v] - else: - parent_map_[rmap[k]] = -1 - return tree_map_, parent_map_, ring_map_ - - def accept_slaves(self, nslave): - # set of nodes that finishs the job - shutdown = {} - # set of nodes that is waiting for connections - wait_conn = {} - # maps job id to rank - job_map = {} - # list of workers that is pending to be assigned rank - pending = [] - # lazy initialize tree_map - tree_map = None - - while len(shutdown) != nslave: - fd, s_addr = self.sock.accept() - s = SlaveEntry(fd, s_addr) - if s.cmd == "print": - msg = s.sock.recvstr() - logging.info(msg.strip()) - continue - if s.cmd == "shutdown": - assert s.rank >= 0 and s.rank not in shutdown - assert s.rank not in wait_conn - shutdown[s.rank] = s - logging.debug("Recieve %s signal from %d", s.cmd, s.rank) - continue - assert s.cmd == "start" or s.cmd == "recover" - # lazily initialize the slaves - if tree_map is None: - assert s.cmd == "start" - if s.world_size > 0: - nslave = s.world_size - tree_map, parent_map, ring_map = self.get_link_map(nslave) - # set of nodes that is pending for getting up - todo_nodes = list(range(nslave)) - else: - assert s.world_size == -1 or s.world_size == nslave - if s.cmd == "recover": - assert s.rank >= 0 - - rank = s.decide_rank(job_map) - # batch assignment of ranks - if rank == -1: - assert len(todo_nodes) != 0 - pending.append(s) - if len(pending) == len(todo_nodes): - pending.sort(key=lambda x: x.host) - for s in pending: - rank = todo_nodes.pop(0) - if s.jobid != "NULL": - job_map[s.jobid] = rank - s.assign_rank(rank, wait_conn, tree_map, parent_map, ring_map) - if s.wait_accept > 0: - wait_conn[rank] = s - logging.debug( - "Recieve %s signal from %s; " "assign rank %d", - s.cmd, - s.host, - s.rank, - ) - if len(todo_nodes) == 0: - logging.info("@tracker All of %d nodes getting started", nslave) - self.start_time = time.time() - else: - s.assign_rank(rank, wait_conn, tree_map, parent_map, ring_map) - logging.debug("Recieve %s signal from %d", s.cmd, s.rank) - if s.wait_accept > 0: - wait_conn[rank] = s - - logging.info("worker(ip_address=%s) connected!" % get_some_ip(s_addr[0])) - - logging.info("@tracker All nodes finishes job") - self.end_time = time.time() - logging.info( - "@tracker %s secs between node start and job finish", - str(self.end_time - self.start_time), - ) - - def start(self, nslave): - def run(): - self.accept_slaves(nslave) - - self.thread = Thread(target=run, args=()) - self.thread.setDaemon(True) - self.thread.start() - - def join(self): - while self.thread.isAlive(): - self.thread.join(100) - - -class PSTracker(object): - """ - Tracker module for PS - """ - - def __init__(self, hostIP, cmd, port=9091, port_end=9999, envs=None): - """ - Starts the PS scheduler - """ - self.cmd = cmd - if cmd is None: - return - envs = {} if envs is None else envs - self.hostIP = hostIP - sock = socket.socket(get_family(hostIP), socket.SOCK_STREAM) - for port in range(port, port_end): - try: - sock.bind(("", port)) - self.port = port - sock.close() - break - except socket.error: - continue - env = os.environ.copy() - - env["DMLC_ROLE"] = "scheduler" - env["DMLC_PS_ROOT_URI"] = str(self.hostIP) - env["DMLC_PS_ROOT_PORT"] = str(self.port) - for k, v in envs.items(): - env[k] = str(v) - self.thread = Thread( - target=(lambda: subprocess.check_call(self.cmd, env=env, shell=True)), - args=(), - ) - self.thread.setDaemon(True) - self.thread.start() - - def join(self): - if self.cmd is not None: - while self.thread.isAlive(): - self.thread.join(100) - - def slave_envs(self): - if self.cmd is None: - return {} - else: - return {"DMLC_PS_ROOT_URI": self.hostIP, "DMLC_PS_ROOT_PORT": self.port} - - -def get_host_ip(hostIP=None): - if hostIP is None or hostIP == "auto": - hostIP = "ip" - - if hostIP == "dns": - hostIP = socket.getfqdn() - elif hostIP == "ip": - from socket import gaierror - - try: - hostIP = socket.gethostbyname(socket.getfqdn()) - except gaierror: - logging.warn( - "gethostbyname(socket.getfqdn()) failed... trying on " "hostname()" - ) - hostIP = socket.gethostbyname(socket.gethostname()) - if hostIP.startswith("127."): - s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) - # doesn't have to be reachable - s.connect(("10.255.255.255", 1)) - hostIP = s.getsockname()[0] - return hostIP - - -def submit(nworker, nserver, fun_submit, hostIP="auto", pscmd=None): - if nserver == 0: - pscmd = None - - envs = {"DMLC_NUM_WORKER": nworker, "DMLC_NUM_SERVER": nserver} - hostIP = get_host_ip(hostIP) - - if nserver == 0: - rabit = RabitTracker(hostIP=hostIP, nslave=nworker) - envs.update(rabit.slave_envs()) - rabit.start(nworker) - else: - pserver = PSTracker(hostIP=hostIP, cmd=pscmd, envs=envs) - envs.update(pserver.slave_envs()) - fun_submit(nworker, nserver, envs) - - if nserver == 0: - rabit.join() - else: - pserver.join() - - -def start_rabit_tracker(args): - """Standalone function to start rabit tracker. - Parameters - ---------- - args: arguments to start the rabit tracker. - """ - envs = {"DMLC_NUM_WORKER": args.num_workers, "DMLC_NUM_SERVER": args.num_servers} - rabit = RabitTracker(hostIP=get_host_ip(args.host_ip), nslave=args.num_workers) - envs.update(rabit.slave_envs()) - rabit.start(args.num_workers) - sys.stdout.write("DMLC_TRACKER_ENV_START\n") - # simply write configuration to stdout - for k, v in envs.items(): - sys.stdout.write("%s=%s\n" % (k, str(v))) - sys.stdout.write("DMLC_TRACKER_ENV_END\n") - sys.stdout.flush() - rabit.join() - - -def main(): - """Main function if tracker is executed in standalone mode.""" - parser = argparse.ArgumentParser(description="Rabit Tracker start.") - parser.add_argument( - "--num-workers", - required=True, - type=int, - help="Number of worker proccess to be launched.", - ) - parser.add_argument( - "--num-servers", - default=0, - type=int, - help="Number of server process to be launched. Only " "used in PS jobs.", - ) - parser.add_argument( - "--host-ip", - default=None, - type=str, - help=( - "Host IP addressed, this is only needed " - + "if the host IP cannot be automatically guessed." - ), - ) - parser.add_argument( - "--log-level", - default="INFO", - type=str, - choices=["INFO", "DEBUG"], - help="Logging level of the logger.", - ) - args = parser.parse_args() - - fmt = "%(asctime)s %(levelname)s %(message)s" - if args.log_level == "INFO": - level = logging.INFO - elif args.log_level == "DEBUG": - level = logging.DEBUG - else: - raise RuntimeError("Unknown logging level %s" % args.log_level) - - logging.basicConfig(format=fmt, level=level) - - if args.num_servers == 0: - start_rabit_tracker(args) - else: - raise RuntimeError("Do not yet support start ps tracker in standalone " "mode.") - - -if __name__ == "__main__": - main() diff --git a/examples/xgboost/xgboost-dist/train.py b/examples/xgboost/xgboost-dist/train.py deleted file mode 100644 index 469be2a7d3..0000000000 --- a/examples/xgboost/xgboost-dist/train.py +++ /dev/null @@ -1,97 +0,0 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import logging -import traceback - -import xgboost as xgb -from tracker import RabitTracker -from utils import extract_xgbooost_cluster_env, read_train_data - -logger = logging.getLogger(__name__) - - -def train(args): - """ - :param args: configuration for train job - :return: XGBoost model - """ - addr, port, rank, world_size = extract_xgbooost_cluster_env() - rabit_tracker = None - - try: - """start to build the network""" - if world_size > 1: - if rank == 0: - logger.info("start the master node") - - rabit = RabitTracker( - hostIP="0.0.0.0", nslave=world_size, port=port, port_end=port + 1 - ) - rabit.start(world_size) - rabit_tracker = rabit - logger.info("###### RabitTracker Setup Finished ######") - - envs = [ - "DMLC_NUM_WORKER=%d" % world_size, - "DMLC_TRACKER_URI=%s" % addr, - "DMLC_TRACKER_PORT=%d" % port, - "DMLC_TASK_ID=%d" % rank, - ] - logger.info("##### Rabit rank setup with below envs #####") - for i, env in enumerate(envs): - logger.info(env) - envs[i] = str.encode(env) - - xgb.rabit.init(envs) - logger.info("##### Rabit rank = %d" % xgb.rabit.get_rank()) - rank = xgb.rabit.get_rank() - - else: - world_size = 1 - logging.info("Start the train in a single node") - - df = read_train_data(rank=rank, num_workers=world_size, path=None) - kwargs = {} - kwargs["dtrain"] = df - kwargs["num_boost_round"] = int(args.n_estimators) - param_xgboost_default = { - "max_depth": 2, - "eta": 1, - "silent": 1, - "objective": "multi:softprob", - "num_class": 3, - } - kwargs["params"] = param_xgboost_default - - logging.info("starting to train xgboost at node with rank %d", rank) - bst = xgb.train(**kwargs) - - if rank == 0: - model = bst - else: - model = None - - logging.info("finish xgboost training at node with rank %d", rank) - - except Exception as e: - logger.error("something wrong happen: %s", traceback.format_exc()) - raise e - finally: - logger.info("xgboost training job finished!") - if world_size > 1: - xgb.rabit.finalize() - if rabit_tracker: - rabit_tracker.join() - - return model diff --git a/examples/xgboost/xgboost-dist/utils.py b/examples/xgboost/xgboost-dist/utils.py deleted file mode 100644 index 99e384733d..0000000000 --- a/examples/xgboost/xgboost-dist/utils.py +++ /dev/null @@ -1,294 +0,0 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import json -import logging -import os -import tempfile - -import joblib -import oss2 -import pandas as pd -import xgboost as xgb -from sklearn import datasets - -logger = logging.getLogger(__name__) - - -def extract_xgbooost_cluster_env(): - """ - Extract the cluster env from pod - :return: the related cluster env to build rabit - """ - - logger.info("starting to extract system env") - - master_addr = os.environ.get("MASTER_ADDR", "{}") - master_port = int(os.environ.get("MASTER_PORT", "{}")) - rank = int(os.environ.get("RANK", "{}")) - world_size = int(os.environ.get("WORLD_SIZE", "{}")) - - logger.info( - "extract the Rabit env from cluster :" - " %s, port: %d, rank: %d, word_size: %d ", - master_addr, - master_port, - rank, - world_size, - ) - - return master_addr, master_port, rank, world_size - - -def read_train_data(rank, num_workers, path): - """ - Read file based on the rank of worker. - We use the sklearn.iris data for demonstration - You can extend this to read distributed data source like HDFS, HIVE etc - :param rank: the id of each worker - :param num_workers: total number of workers in this cluster - :param path: the input file name or the place to read the data - :return: XGBoost Dmatrix - """ - iris = datasets.load_iris() - x = iris.data - y = iris.target - - start, end = get_range_data(len(x), rank, num_workers) - x = x[start:end, :] - y = y[start:end] - - x = pd.DataFrame(x) - y = pd.DataFrame(y) - dtrain = xgb.DMatrix(data=x, label=y) - - logging.info("Read data from IRIS data source with range from %d to %d", start, end) - - return dtrain - - -def read_predict_data(rank, num_workers, path): - """ - Read file based on the rank of worker. - We use the sklearn.iris data for demonstration - You can extend this to read distributed data source like HDFS, HIVE etc - :param rank: the id of each worker - :param num_workers: total number of workers in this cluster - :param path: the input file name or the place to read the data - :return: XGBoost Dmatrix, and real value - """ - iris = datasets.load_iris() - x = iris.data - y = iris.target - - start, end = get_range_data(len(x), rank, num_workers) - x = x[start:end, :] - y = y[start:end] - x = pd.DataFrame(x) - y = pd.DataFrame(y) - - logging.info("Read data from IRIS datasource with range from %d to %d", start, end) - - predict = xgb.DMatrix(x, label=y) - - return predict, y - - -def get_range_data(num_row, rank, num_workers): - """ - compute the data range based on the input data size and worker id - :param num_row: total number of dataset - :param rank: the worker id - :param num_workers: total number of workers - :return: begin and end range of input matrix - """ - num_per_partition = int(num_row / num_workers) - - x_start = rank * num_per_partition - x_end = (rank + 1) * num_per_partition - - if x_end > num_row: - x_end = num_row - - return x_start, x_end - - -def dump_model(model, type, model_path, args): - """ - dump the trained model into local place - you can update this function to store the model into a remote place - :param model: the xgboost trained booster - :param type: model storage type - :param model_path: place to store model - :param args: configuration for model storage - :return: True if the dump process success - """ - if model is None: - raise Exception("fail to get the XGBoost train model") - else: - if type == "local": - joblib.dump(model, model_path) - logging.info("Dump model into local place %s", model_path) - - elif type == "oss": - oss_param = parse_parameters(args.oss_param, ",", ":") - if oss_param is None: - raise Exception("Please config oss parameter to store model") - - oss_param["path"] = args.model_path - dump_model_to_oss(oss_param, model) - logging.info("Dump model into oss place %s", args.model_path) - - return True - - -def read_model(type, model_path, args): - """ - read model from physical storage - :param type: oss or local - :param model_path: place to store the model - :param args: configuration to read model - :return: XGBoost model - """ - - if type == "local": - model = joblib.load(model_path) - logging.info("Read model from local place %s", model_path) - - elif type == "oss": - oss_param = parse_parameters(args.oss_param, ",", ":") - if oss_param is None: - raise Exception("Please config oss to read model") - return False - - oss_param["path"] = args.model_path - - model = read_model_from_oss(oss_param) - logging.info("read model from oss place %s", model_path) - - return model - - -def dump_model_to_oss(oss_parameters, booster): - """ - dump the model to remote OSS disk - :param oss_parameters: oss configuration - :param booster: XGBoost model - :return: True if stored procedure is success - """ - """export model into oss""" - model_fname = os.path.join(tempfile.mkdtemp(), "model") - text_model_fname = os.path.join(tempfile.mkdtemp(), "model.text") - feature_importance = os.path.join(tempfile.mkdtemp(), "feature_importance.json") - - oss_path = oss_parameters["path"] - logger.info("---- export model ----") - booster.save_model(model_fname) - booster.dump_model(text_model_fname) # format output model - fscore_dict = booster.get_fscore() - with open(feature_importance, "w") as file: - file.write(json.dumps(fscore_dict)) - logger.info("---- chief dump model successfully!") - - if os.path.exists(model_fname): - logger.info("---- Upload Model start...") - - while oss_path[-1] == "/": - oss_path = oss_path[:-1] - - upload_oss(oss_parameters, model_fname, oss_path) - aux_path = oss_path + "_dir/" - upload_oss(oss_parameters, model_fname, aux_path) - upload_oss(oss_parameters, text_model_fname, aux_path) - upload_oss(oss_parameters, feature_importance, aux_path) - else: - raise Exception("fail to generate model") - return False - - return True - - -def upload_oss(kw, local_file, oss_path): - """ - help function to upload a model to oss - :param kw: OSS parameter - :param local_file: local place of model - :param oss_path: remote place of OSS - :return: True if the procedure is success - """ - if oss_path[-1] == "/": - oss_path = "%s%s" % (oss_path, os.path.basename(local_file)) - - auth = oss2.Auth(kw["access_id"], kw["access_key"]) - bucket = kw["access_bucket"] - bkt = oss2.Bucket(auth=auth, endpoint=kw["endpoint"], bucket_name=bucket) - - try: - bkt.put_object_from_file(key=oss_path, filename=local_file) - logger.info( - "upload %s to %s successfully!" % (os.path.abspath(local_file), oss_path) - ) - except Exception(): - raise ValueError( - "upload %s to %s failed" % (os.path.abspath(local_file), oss_path) - ) - - -def read_model_from_oss(kw): - """ - helper function to read a model from oss - :param kw: OSS parameter - :return: XGBoost booster model - """ - auth = oss2.Auth(kw["access_id"], kw["access_key"]) - bucket = kw["access_bucket"] - bkt = oss2.Bucket(auth=auth, endpoint=kw["endpoint"], bucket_name=bucket) - oss_path = kw["path"] - - temp_model_fname = os.path.join(tempfile.mkdtemp(), "local_model") - try: - bkt.get_object_to_file(key=oss_path, filename=temp_model_fname) - logger.info("success to load model from oss %s", oss_path) - except Exception as e: - logging.error("fail to load model: " + e) - raise Exception("fail to load model from oss %s", oss_path) - - bst = xgb.Booster({"nthread": 2}) # init model - - bst.load_model(temp_model_fname) - - return bst - - -def parse_parameters(input, splitter_between, splitter_in): - """ - helper function parse the input parameter - :param input: the string of configuration like key-value pairs - :param splitter_between: the splitter between config for input string - :param splitter_in: the splitter inside config for input string - :return: key-value pair configuration - """ - - ky_pairs = input.split(splitter_between) - - confs = {} - - for kv in ky_pairs: - conf = kv.split(splitter_in) - key = conf[0].strip(" ") - if key == "objective" or key == "endpoint": - value = conf[1].strip("'") + ":" + conf[2].strip("'") - else: - value = conf[1] - - confs[key] = value - return confs diff --git a/examples/xgboost/xgboost-dist/xgboostjob_v1_iris_predict.yaml b/examples/xgboost/xgboost-dist/xgboostjob_v1_iris_predict.yaml deleted file mode 100644 index 9f0c773b1f..0000000000 --- a/examples/xgboost/xgboost-dist/xgboostjob_v1_iris_predict.yaml +++ /dev/null @@ -1,40 +0,0 @@ -apiVersion: kubeflow.org/v1 -kind: XGBoostJob -metadata: - name: xgboost-dist-iris-test-predict -spec: - xgbReplicaSpecs: - Master: - replicas: 1 - restartPolicy: Never - template: - spec: - containers: - - name: xgboost - image: docker.io/kubeflow/xgboost-dist-iris:latest - ports: - - containerPort: 9991 - name: xgboostjob-port - imagePullPolicy: Always - args: - - --job_type=Predict - - --model_path=autoAI/xgb-opt/2 - - --model_storage_type=oss - - --oss_param=unknown - Worker: - replicas: 2 - restartPolicy: ExitCode - template: - spec: - containers: - - name: xgboost - image: docker.io/kubeflow/xgboost-dist-iris:latest - ports: - - containerPort: 9991 - name: xgboostjob-port - imagePullPolicy: Always - args: - - --job_type=Predict - - --model_path=autoAI/xgb-opt/2 - - --model_storage_type=oss - - --oss_param=unknown diff --git a/examples/xgboost/xgboost-dist/xgboostjob_v1_iris_predict_local.yaml b/examples/xgboost/xgboost-dist/xgboostjob_v1_iris_predict_local.yaml deleted file mode 100644 index 168de9971d..0000000000 --- a/examples/xgboost/xgboost-dist/xgboostjob_v1_iris_predict_local.yaml +++ /dev/null @@ -1,52 +0,0 @@ -apiVersion: kubeflow.org/v1 -kind: XGBoostJob -metadata: - name: xgboost-dist-iris-test-predict-local -spec: - xgbReplicaSpecs: - Master: - replicas: 1 - restartPolicy: Never - template: - spec: - volumes: - - name: task-pv-storage - persistentVolumeClaim: - claimName: xgboostlocal - containers: - - name: xgboost - image: docker.io/kubeflow/xgboost-dist-iris:latest - volumeMounts: - - name: task-pv-storage - mountPath: /tmp/xgboost_model - ports: - - containerPort: 9991 - name: xgboostjob-port - imagePullPolicy: Always - args: - - --job_type=Predict - - --model_path=/tmp/xgboost_model/2 - - --model_storage_type=local - Worker: - replicas: 2 - restartPolicy: ExitCode - template: - spec: - volumes: - - name: task-pv-storage - persistentVolumeClaim: - claimName: xgboostlocal - containers: - - name: xgboost - image: docker.io/kubeflow/xgboost-dist-iris:latest - volumeMounts: - - name: task-pv-storage - mountPath: /tmp/xgboost_model - ports: - - containerPort: 9991 - name: xgboostjob-port - imagePullPolicy: Always - args: - - --job_type=Predict - - --model_path=/tmp/xgboost_model/2 - - --model_storage_type=local diff --git a/examples/xgboost/xgboost-dist/xgboostjob_v1_iris_train.yaml b/examples/xgboost/xgboost-dist/xgboostjob_v1_iris_train.yaml deleted file mode 100644 index 5824ec056d..0000000000 --- a/examples/xgboost/xgboost-dist/xgboostjob_v1_iris_train.yaml +++ /dev/null @@ -1,42 +0,0 @@ -apiVersion: kubeflow.org/v1 -kind: XGBoostJob -metadata: - name: xgboost-dist-iris-test-train -spec: - xgbReplicaSpecs: - Master: - replicas: 1 - restartPolicy: Never - template: - spec: - containers: - - name: xgboost - image: docker.io/kubeflow/xgboost-dist-iris:latest - ports: - - containerPort: 9991 - name: xgboostjob-port - imagePullPolicy: Always - args: - - --job_type=Train - - --xgboost_parameter=objective:multi:softprob,num_class:3 - - --n_estimators=10 - - --learning_rate=0.1 - - --model_path=/tmp/xgboost-model - - --model_storage_type=local - Worker: - replicas: 2 - restartPolicy: ExitCode - template: - spec: - containers: - - name: xgboost - image: docker.io/kubeflow/xgboost-dist-iris:latest - ports: - - containerPort: 9991 - name: xgboostjob-port - imagePullPolicy: Always - args: - - --job_type=Train - - --xgboost_parameter="objective:multi:softprob,num_class:3" - - --n_estimators=10 - - --learning_rate=0.1 diff --git a/examples/xgboost/xgboost-dist/xgboostjob_v1_iris_train_local.yaml b/examples/xgboost/xgboost-dist/xgboostjob_v1_iris_train_local.yaml deleted file mode 100644 index 2798587bb2..0000000000 --- a/examples/xgboost/xgboost-dist/xgboostjob_v1_iris_train_local.yaml +++ /dev/null @@ -1,58 +0,0 @@ -apiVersion: kubeflow.org/v1 -kind: XGBoostJob -metadata: - name: xgboost-dist-iris-test-train-local -spec: - xgbReplicaSpecs: - Master: - replicas: 1 - restartPolicy: Never - template: - spec: - volumes: - - name: task-pv-storage - persistentVolumeClaim: - claimName: xgboostlocal - containers: - - name: xgboost - image: docker.io/kubeflow/xgboost-dist-iris:latest - volumeMounts: - - name: task-pv-storage - mountPath: /tmp/xgboost_model - ports: - - containerPort: 9991 - name: xgboostjob-port - imagePullPolicy: Always - args: - - --job_type=Train - - --xgboost_parameter=objective:multi:softprob,num_class:3 - - --n_estimators=10 - - --learning_rate=0.1 - - --model_path=/tmp/xgboost_model/2 - - --model_storage_type=local - Worker: - replicas: 2 - restartPolicy: ExitCode - template: - spec: - volumes: - - name: task-pv-storage - persistentVolumeClaim: - claimName: xgboostlocal - containers: - - name: xgboost - image: docker.io/kubeflow/xgboost-dist-iris:latest - volumeMounts: - - name: task-pv-storage - mountPath: /tmp/xgboost_model - ports: - - containerPort: 9991 - name: xgboostjob-port - imagePullPolicy: Always - args: - - --job_type=Train - - --xgboost_parameter="objective:multi:softprob,num_class:3" - - --n_estimators=10 - - --learning_rate=0.1 - - --model_path=/tmp/xgboost_model/2 - - --model_storage_type=local diff --git a/examples/xgboost/xgboostjob.yaml b/examples/xgboost/xgboostjob.yaml deleted file mode 100644 index 5824ec056d..0000000000 --- a/examples/xgboost/xgboostjob.yaml +++ /dev/null @@ -1,42 +0,0 @@ -apiVersion: kubeflow.org/v1 -kind: XGBoostJob -metadata: - name: xgboost-dist-iris-test-train -spec: - xgbReplicaSpecs: - Master: - replicas: 1 - restartPolicy: Never - template: - spec: - containers: - - name: xgboost - image: docker.io/kubeflow/xgboost-dist-iris:latest - ports: - - containerPort: 9991 - name: xgboostjob-port - imagePullPolicy: Always - args: - - --job_type=Train - - --xgboost_parameter=objective:multi:softprob,num_class:3 - - --n_estimators=10 - - --learning_rate=0.1 - - --model_path=/tmp/xgboost-model - - --model_storage_type=local - Worker: - replicas: 2 - restartPolicy: ExitCode - template: - spec: - containers: - - name: xgboost - image: docker.io/kubeflow/xgboost-dist-iris:latest - ports: - - containerPort: 9991 - name: xgboostjob-port - imagePullPolicy: Always - args: - - --job_type=Train - - --xgboost_parameter="objective:multi:softprob,num_class:3" - - --n_estimators=10 - - --learning_rate=0.1 diff --git a/scripts/run-notebook.sh b/hack/e2e-run-notebook.sh similarity index 79% rename from scripts/run-notebook.sh rename to hack/e2e-run-notebook.sh index d6a6f81c61..42bef13a1e 100755 --- a/scripts/run-notebook.sh +++ b/hack/e2e-run-notebook.sh @@ -14,6 +14,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +# TODO (andreyvelich): Refactor this script for Kubeflow Trainer V2 + # This bash script is used to run the example notebooks set -o errexit @@ -39,12 +41,15 @@ usage() { while getopts "i:o:p:k:n:r:d:h:" opt; do case "$opt" in - i) NOTEBOOK_INPUT="$OPTARG" ;; # -i for notebook input path - o) NOTEBOOK_OUTPUT="$OPTARG" ;; # -o for notebook output path - k) TRAINING_PYTHON_SDK="$OPTARG" ;; # -k for training operator python sdk - n) NAMESPACE="$OPTARG" ;; # -n for kubernetes namespace used by tests - h) usage ;; # -h for help (usage) - *) usage; exit 1 ;; + i) NOTEBOOK_INPUT="$OPTARG" ;; # -i for notebook input path + o) NOTEBOOK_OUTPUT="$OPTARG" ;; # -o for notebook output path + k) TRAINING_PYTHON_SDK="$OPTARG" ;; # -k for training operator python sdk + n) NAMESPACE="$OPTARG" ;; # -n for kubernetes namespace used by tests + h) usage ;; # -h for help (usage) + *) + usage + exit 1 + ;; esac done @@ -55,7 +60,7 @@ fi papermill_cmd="papermill $NOTEBOOK_INPUT $NOTEBOOK_OUTPUT -p training_python_sdk $TRAINING_PYTHON_SDK -p namespace $NAMESPACE" -if ! command -v papermill &> /dev/null; then +if ! command -v papermill &>/dev/null; then echo "Error: papermill is not installed. Please install papermill to proceed." exit 1 fi diff --git a/scripts/gha/setup-training-operator.sh b/hack/e2e-setup-cluster.sh similarity index 83% rename from scripts/gha/setup-training-operator.sh rename to hack/e2e-setup-cluster.sh index 8f07c00ace..78fdfe687a 100755 --- a/scripts/gha/setup-training-operator.sh +++ b/hack/e2e-setup-cluster.sh @@ -14,6 +14,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +# TODO (andreyvelich): Refactor this script for Kubeflow Trainer V2 + set -o errexit set -o nounset set -o pipefail @@ -42,7 +44,7 @@ if [ "${GANG_SCHEDULER_NAME}" = "scheduler-plugins" ]; then echo "Configure gang-scheduling using scheduler-plugins to training-operator" kubectl patch -n kubeflow deployments training-operator --type='json' \ -p='[{"op": "add", "path": "/spec/template/spec/containers/0/command/1", "value": "--gang-scheduler-name=scheduler-plugins"}]' -elif [ "${GANG_SCHEDULER_NAME}" = "volcano" ]; then +elif [ "${GANG_SCHEDULER_NAME}" = "volcano" ]; then VOLCANO_SCHEDULER_VERSION=$(go list -m -f "{{.Version}}" volcano.sh/apis) # patch scheduler first so that it is ready when scheduler-deployment installing finished @@ -57,19 +59,25 @@ fi TIMEOUT=30 until kubectl get pods -n kubeflow | grep training-operator | grep 1/1 || [[ $TIMEOUT -eq 1 ]]; do sleep 10 - TIMEOUT=$(( TIMEOUT - 1 )) + TIMEOUT=$((TIMEOUT - 1)) done if [ "${GANG_SCHEDULER_NAME}" = "scheduler-plugins" ]; then - kubectl wait pods --for=condition=ready -n scheduler-plugins --timeout "${TIMEOUT}s" --all || \ - (kubectl get pods -n scheduler-plugins && kubectl describe pods -n scheduler-plugins; exit 1) + kubectl wait pods --for=condition=ready -n scheduler-plugins --timeout "${TIMEOUT}s" --all || + ( + kubectl get pods -n scheduler-plugins && kubectl describe pods -n scheduler-plugins + exit 1 + ) fi # wait for volcano up if [ "${GANG_SCHEDULER_NAME}" = "volcano" ]; then - kubectl rollout status deployment -n volcano-system volcano-admission --timeout "${TIMEOUT}s" && \ - kubectl rollout status deployment -n volcano-system volcano-scheduler --timeout "${TIMEOUT}s" && \ - kubectl rollout status deployment -n volcano-system volcano-controllers --timeout "${TIMEOUT}s" || \ - (kubectl get pods -n volcano-system && kubectl describe pods -n volcano-system; exit 1) + kubectl rollout status deployment -n volcano-system volcano-admission --timeout "${TIMEOUT}s" && + kubectl rollout status deployment -n volcano-system volcano-scheduler --timeout "${TIMEOUT}s" && + kubectl rollout status deployment -n volcano-system volcano-controllers --timeout "${TIMEOUT}s" || + ( + kubectl get pods -n volcano-system && kubectl describe pods -n volcano-system + exit 1 + ) fi kubectl version diff --git a/hack/generate-apidoc.sh b/hack/generate-apidoc.sh deleted file mode 100755 index cd7a15f5f0..0000000000 --- a/hack/generate-apidoc.sh +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/bash - -# Copyright 2017 The Kubernetes Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# This shell is used to auto generate some useful tools for k8s, such as lister, -# informer, deepcopy, defaulter and so on. - -set -o errexit -set -o nounset -set -o pipefail - -SCRIPT_ROOT=$(dirname ${BASH_SOURCE})/.. - -cd ${SCRIPT_ROOT} - -CRD_REF_DOCS_PATH=$(go env GOPATH)/bin -CRD_REF_GEN_VERSION=v0.1.0 -go install github.com/elastic/crd-ref-docs@${CRD_REF_GEN_VERSION} - -${CRD_REF_DOCS_PATH}/crd-ref-docs --log-level DEBUG\ - --source-path ./pkg/apis/kubeflow.org/v1 \ - --config ./docs/api/autogen/config.yaml \ - --templates-dir ./docs/api/autogen/templates \ - --output-path ./docs/api/kubeflow.org_v1_generated.asciidoc \ - --max-depth 30 - -cd - > /dev/null diff --git a/hack/python-sdk-v2/gen-sdk.sh b/hack/python-sdk-v2/gen-sdk.sh index be043eebe6..00f5a75adb 100755 --- a/hack/python-sdk-v2/gen-sdk.sh +++ b/hack/python-sdk-v2/gen-sdk.sh @@ -1,4 +1,4 @@ -#!/usr/bin/env bash +#!/bin/bash # Copyright 2024 The Kubeflow Authors. # diff --git a/hack/python-sdk/gen-sdk.sh b/hack/python-sdk/gen-sdk.sh deleted file mode 100755 index 051543d297..0000000000 --- a/hack/python-sdk/gen-sdk.sh +++ /dev/null @@ -1,53 +0,0 @@ -#!/usr/bin/env bash - -# Copyright 2021 The Kubeflow Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -set -o errexit -set -o nounset -set -o pipefail - -repo_root="$(dirname "${BASH_SOURCE}")/../.." - -SWAGGER_JAR_URL="https://repo1.maven.org/maven2/org/openapitools/openapi-generator-cli/4.3.1/openapi-generator-cli-4.3.1.jar" -SWAGGER_CODEGEN_JAR="${repo_root}/hack/python-sdk/openapi-generator-cli.jar" -SWAGGER_CODEGEN_CONF="${repo_root}/hack/python-sdk/swagger_config.json" -SDK_OUTPUT_PATH="${repo_root}/sdk/python" -VERSION=1.7.0 -SWAGGER_CODEGEN_FILE="${repo_root}/hack/python-sdk/swagger.json" - -if [ -z "${GOPATH:-}" ]; then - export GOPATH=$(go env GOPATH) -fi - -echo "Generating OpenAPI specification ..." -echo "./hack/update-codegen.sh already help us generate openapi specs ..." - -if [[ ! -f "$SWAGGER_CODEGEN_JAR" ]]; then - echo "Downloading the swagger-codegen JAR package ..." - wget -O "${SWAGGER_CODEGEN_JAR}" ${SWAGGER_JAR_URL} -fi - -echo "Generating swagger file ..." -go run "${repo_root}"/hack/swagger/main.go ${VERSION} >"${SWAGGER_CODEGEN_FILE}" - -echo "Removing previously generated files ..." -rm -rf "${SDK_OUTPUT_PATH}"/docs/KubeflowOrgV1*.md "${SDK_OUTPUT_PATH}"/kubeflow/training/models "${SDK_OUTPUT_PATH}"/kubeflow/training/*.py "${SDK_OUTPUT_PATH}"/test/test_*.py -echo "Generating Python SDK for Training Operator ..." -java -jar "${SWAGGER_CODEGEN_JAR}" generate -i "${repo_root}"/hack/python-sdk/swagger.json -g python --global-property apiTests=false,modelTests=false -o "${SDK_OUTPUT_PATH}" -c "${SWAGGER_CODEGEN_CONF}" - -echo "Kubeflow Training Operator Python SDK is generated successfully to folder ${SDK_OUTPUT_PATH}/." - -echo "Running post-generation script ..." -"${repo_root}"/hack/python-sdk/post_gen.py diff --git a/hack/python-sdk/post_gen.py b/hack/python-sdk/post_gen.py deleted file mode 100755 index 99ede41eac..0000000000 --- a/hack/python-sdk/post_gen.py +++ /dev/null @@ -1,83 +0,0 @@ -#!/usr/bin/env python3 - -# Copyright 2021 The Kubeflow Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -This script is used for updating generated SDK files. -""" - -import fileinput -import os -import re - -__replacements = [ - ("import kubeflow.training", "from kubeflow.training.models import *"), - ("kubeflow.training.models.v1\/.*.v1.", "V1"), - ("kubeflow.training.models.kubeflow/org/v1/", "kubeflow_org_v1_"), - ("\.kubeflow.org.v1\.", ".KubeflowOrgV1"), -] - -sdk_dir = os.path.abspath(os.path.join(__file__, "../../..", "sdk/python")) - - -def main(): - fix_test_files() - add_imports() - - -def fix_test_files() -> None: - """ - Fix invalid model imports in generated model tests - """ - test_folder_dir = os.path.join(sdk_dir, "test") - test_files = os.listdir(test_folder_dir) - for test_file in test_files: - print(f"Processing file {test_file}") - if test_file.endswith(".py"): - with fileinput.FileInput( - os.path.join(test_folder_dir, test_file), inplace=True - ) as file: - for line in file: - print(_apply_regex(line), end="") - - -def add_imports() -> None: - with open(os.path.join(sdk_dir, "kubeflow/training/__init__.py"), "a") as f: - f.write("from kubeflow.training.api.training_client import TrainingClient\n") - f.write("from kubeflow.training.constants import constants\n") - with open(os.path.join(sdk_dir, "kubeflow/__init__.py"), "a") as f: - f.write("__path__ = __import__('pkgutil').extend_path(__path__, __name__)\n") - - # Add Kubernetes models to proper deserialization of Training models. - with open(os.path.join(sdk_dir, "kubeflow/training/models/__init__.py"), "r") as f: - new_lines = [] - for line in f.readlines(): - new_lines.append(line) - if line.startswith("from __future__ import absolute_import"): - new_lines.append("\n") - new_lines.append("# Import Kubernetes models.\n") - new_lines.append("from kubernetes.client import *\n") - with open(os.path.join(sdk_dir, "kubeflow/training/models/__init__.py"), "w") as f: - f.writelines(new_lines) - - -def _apply_regex(input_str: str) -> str: - for pattern, replacement in __replacements: - input_str = re.sub(pattern, replacement, input_str) - return input_str - - -if __name__ == "__main__": - main() diff --git a/hack/python-sdk/swagger.json b/hack/python-sdk/swagger.json deleted file mode 100644 index bb5632167a..0000000000 --- a/hack/python-sdk/swagger.json +++ /dev/null @@ -1,787 +0,0 @@ -{ - "swagger": "2.0", - "info": { - "description": "Python SDK for Kubeflow Training", - "title": "Kubeflow Training SDK", - "version": "v1.7.0" - }, - "paths": {}, - "definitions": { - "kubeflow.org.v1.ElasticPolicy": { - "type": "object", - "properties": { - "maxReplicas": { - "description": "upper limit for the number of pods that can be set by the autoscaler; cannot be smaller than MinReplicas, defaults to null.", - "type": "integer", - "format": "int32" - }, - "maxRestarts": { - "type": "integer", - "format": "int32" - }, - "metrics": { - "description": "Metrics contains the specifications which are used to calculate the desired replica count (the maximum replica count across all metrics will be used). The desired replica count is calculated with multiplying the ratio between the target value and the current value by the current number of pods. Ergo, metrics used must decrease as the pod count is increased, and vice-versa. See the individual metric source types for more information about how each type of metric must respond. If not set, the HPA will not be created.", - "type": "array", - "items": { - "default": {}, - "$ref": "#/definitions/k8s.io.api.autoscaling.v2.MetricSpec" - } - }, - "minReplicas": { - "description": "minReplicas is the lower limit for the number of replicas to which the training job can scale down. It defaults to null.", - "type": "integer", - "format": "int32" - }, - "nProcPerNode": { - "description": "Number of workers per node; supported values: [auto, cpu, gpu, int]. Deprecated: This API is deprecated in v1.7+ Use .spec.nprocPerNode instead.", - "type": "integer", - "format": "int32" - }, - "rdzvBackend": { - "type": "string" - }, - "rdzvConf": { - "description": "RDZVConf contains additional rendezvous configuration (\u003ckey1\u003e=\u003cvalue1\u003e,\u003ckey2\u003e=\u003cvalue2\u003e,...).", - "type": "array", - "items": { - "default": {}, - "$ref": "#/definitions/kubeflow.org.v1.RDZVConf" - } - }, - "rdzvHost": { - "type": "string" - }, - "rdzvId": { - "type": "string" - }, - "rdzvPort": { - "type": "integer", - "format": "int32" - }, - "standalone": { - "description": "Start a local standalone rendezvous backend that is represented by a C10d TCP store on port 29400. Useful when launching single-node, multi-worker job. If specified --rdzv_backend, --rdzv_endpoint, --rdzv_id are auto-assigned; any explicitly set values are ignored.", - "type": "boolean" - } - } - }, - "kubeflow.org.v1.JAXJob": { - "description": "JAXJob Represents a JAXJob resource.", - "type": "object", - "properties": { - "apiVersion": { - "description": "APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources", - "type": "string" - }, - "kind": { - "description": "Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds", - "type": "string" - }, - "metadata": { - "default": {}, - "$ref": "#/definitions/v1.ObjectMeta" - }, - "spec": { - "description": "Specification of the desired state of the JAXJob.", - "default": {}, - "$ref": "#/definitions/kubeflow.org.v1.JAXJobSpec" - }, - "status": { - "description": "Most recently observed status of the JAXJob. Read-only (modified by the system).", - "default": {}, - "$ref": "#/definitions/kubeflow.org.v1.JobStatus" - } - } - }, - "kubeflow.org.v1.JAXJobList": { - "description": "JAXJobList is a list of JAXJobs.", - "type": "object", - "required": [ - "items" - ], - "properties": { - "apiVersion": { - "description": "APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources", - "type": "string" - }, - "items": { - "description": "List of JAXJobs.", - "type": "array", - "items": { - "default": {}, - "$ref": "#/definitions/kubeflow.org.v1.JAXJob" - } - }, - "kind": { - "description": "Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds", - "type": "string" - }, - "metadata": { - "description": "Standard list metadata.", - "default": {}, - "$ref": "#/definitions/v1.ListMeta" - } - } - }, - "kubeflow.org.v1.JAXJobSpec": { - "description": "JAXJobSpec is a desired state description of the JAXJob.", - "type": "object", - "required": [ - "runPolicy", - "jaxReplicaSpecs" - ], - "properties": { - "jaxReplicaSpecs": { - "description": "A map of JAXReplicaType (type) to ReplicaSpec (value). Specifies the JAX cluster configuration. For example,\n {\n \"Worker\": JAXReplicaSpec,\n }", - "type": "object", - "additionalProperties": { - "$ref": "#/definitions/kubeflow.org.v1.ReplicaSpec" - } - }, - "runPolicy": { - "description": "RunPolicy encapsulates various runtime policies of the distributed training job, for example how to clean up resources and how long the job can stay active.", - "default": {}, - "$ref": "#/definitions/kubeflow.org.v1.RunPolicy" - } - } - }, - "kubeflow.org.v1.JobCondition": { - "description": "JobCondition describes the state of the job at a certain point.", - "type": "object", - "required": [ - "type", - "status" - ], - "properties": { - "lastTransitionTime": { - "description": "Last time the condition transitioned from one status to another.", - "$ref": "#/definitions/v1.Time" - }, - "lastUpdateTime": { - "description": "The last time this condition was updated.", - "$ref": "#/definitions/v1.Time" - }, - "message": { - "description": "A human readable message indicating details about the transition.", - "type": "string" - }, - "reason": { - "description": "The reason for the condition's last transition.", - "type": "string" - }, - "status": { - "description": "Status of the condition, one of True, False, Unknown.", - "type": "string", - "default": "" - }, - "type": { - "description": "Type of job condition.", - "type": "string", - "default": "" - } - } - }, - "kubeflow.org.v1.JobStatus": { - "description": "JobStatus represents the current observed state of the training Job.", - "type": "object", - "properties": { - "completionTime": { - "description": "Represents time when the job was completed. It is not guaranteed to be set in happens-before order across separate operations. It is represented in RFC3339 form and is in UTC.", - "$ref": "#/definitions/v1.Time" - }, - "conditions": { - "description": "Conditions is an array of current observed job conditions.", - "type": "array", - "items": { - "default": {}, - "$ref": "#/definitions/kubeflow.org.v1.JobCondition" - } - }, - "lastReconcileTime": { - "description": "Represents last time when the job was reconciled. It is not guaranteed to be set in happens-before order across separate operations. It is represented in RFC3339 form and is in UTC.", - "$ref": "#/definitions/v1.Time" - }, - "replicaStatuses": { - "description": "ReplicaStatuses is map of ReplicaType and ReplicaStatus, specifies the status of each replica.", - "type": "object", - "additionalProperties": { - "$ref": "#/definitions/kubeflow.org.v1.ReplicaStatus" - } - }, - "startTime": { - "description": "Represents time when the job was acknowledged by the job controller. It is not guaranteed to be set in happens-before order across separate operations. It is represented in RFC3339 form and is in UTC.", - "$ref": "#/definitions/v1.Time" - } - } - }, - "kubeflow.org.v1.MPIJob": { - "type": "object", - "properties": { - "apiVersion": { - "description": "APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources", - "type": "string" - }, - "kind": { - "description": "Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds", - "type": "string" - }, - "metadata": { - "default": {}, - "$ref": "#/definitions/v1.ObjectMeta" - }, - "spec": { - "default": {}, - "$ref": "#/definitions/kubeflow.org.v1.MPIJobSpec" - }, - "status": { - "default": {}, - "$ref": "#/definitions/kubeflow.org.v1.JobStatus" - } - } - }, - "kubeflow.org.v1.MPIJobList": { - "type": "object", - "required": [ - "items" - ], - "properties": { - "apiVersion": { - "description": "APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources", - "type": "string" - }, - "items": { - "type": "array", - "items": { - "default": {}, - "$ref": "#/definitions/kubeflow.org.v1.MPIJob" - } - }, - "kind": { - "description": "Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds", - "type": "string" - }, - "metadata": { - "default": {}, - "$ref": "#/definitions/v1.ListMeta" - } - } - }, - "kubeflow.org.v1.MPIJobSpec": { - "type": "object", - "required": [ - "mpiReplicaSpecs" - ], - "properties": { - "cleanPodPolicy": { - "description": "CleanPodPolicy defines the policy that whether to kill pods after the job completes. Defaults to None.", - "type": "string" - }, - "mainContainer": { - "description": "MainContainer specifies name of the main container which executes the MPI code.", - "type": "string" - }, - "mpiReplicaSpecs": { - "description": "`MPIReplicaSpecs` contains maps from `MPIReplicaType` to `ReplicaSpec` that specify the MPI replicas to run.", - "type": "object", - "additionalProperties": { - "$ref": "#/definitions/kubeflow.org.v1.ReplicaSpec" - } - }, - "runPolicy": { - "description": "`RunPolicy` encapsulates various runtime policies of the distributed training job, for example how to clean up resources and how long the job can stay active.", - "default": {}, - "$ref": "#/definitions/kubeflow.org.v1.RunPolicy" - }, - "slotsPerWorker": { - "description": "Specifies the number of slots per worker used in hostfile. Defaults to 1.", - "type": "integer", - "format": "int32" - } - } - }, - "kubeflow.org.v1.PaddleElasticPolicy": { - "type": "object", - "properties": { - "maxReplicas": { - "description": "upper limit for the number of pods that can be set by the autoscaler; cannot be smaller than MinReplicas, defaults to null.", - "type": "integer", - "format": "int32" - }, - "maxRestarts": { - "description": "MaxRestarts is the limit for restart times of pods in elastic mode.", - "type": "integer", - "format": "int32" - }, - "metrics": { - "description": "Metrics contains the specifications which are used to calculate the desired replica count (the maximum replica count across all metrics will be used). The desired replica count is calculated with multiplying the ratio between the target value and the current value by the current number of pods. Ergo, metrics used must decrease as the pod count is increased, and vice-versa. See the individual metric source types for more information about how each type of metric must respond. If not set, the HPA will not be created.", - "type": "array", - "items": { - "default": {}, - "$ref": "#/definitions/k8s.io.api.autoscaling.v2.MetricSpec" - } - }, - "minReplicas": { - "description": "minReplicas is the lower limit for the number of replicas to which the training job can scale down. It defaults to null.", - "type": "integer", - "format": "int32" - } - } - }, - "kubeflow.org.v1.PaddleJob": { - "description": "PaddleJob Represents a PaddleJob resource.", - "type": "object", - "properties": { - "apiVersion": { - "description": "APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources", - "type": "string" - }, - "kind": { - "description": "Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds", - "type": "string" - }, - "metadata": { - "default": {}, - "$ref": "#/definitions/v1.ObjectMeta" - }, - "spec": { - "description": "Specification of the desired state of the PaddleJob.", - "default": {}, - "$ref": "#/definitions/kubeflow.org.v1.PaddleJobSpec" - }, - "status": { - "description": "Most recently observed status of the PaddleJob. Read-only (modified by the system).", - "default": {}, - "$ref": "#/definitions/kubeflow.org.v1.JobStatus" - } - } - }, - "kubeflow.org.v1.PaddleJobList": { - "description": "PaddleJobList is a list of PaddleJobs.", - "type": "object", - "required": [ - "items" - ], - "properties": { - "apiVersion": { - "description": "APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources", - "type": "string" - }, - "items": { - "description": "List of PaddleJobs.", - "type": "array", - "items": { - "default": {}, - "$ref": "#/definitions/kubeflow.org.v1.PaddleJob" - } - }, - "kind": { - "description": "Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds", - "type": "string" - }, - "metadata": { - "description": "Standard list metadata.", - "default": {}, - "$ref": "#/definitions/v1.ListMeta" - } - } - }, - "kubeflow.org.v1.PaddleJobSpec": { - "description": "PaddleJobSpec is a desired state description of the PaddleJob.", - "type": "object", - "required": [ - "runPolicy", - "paddleReplicaSpecs" - ], - "properties": { - "elasticPolicy": { - "description": "ElasticPolicy holds the elastic policy for paddle job.", - "$ref": "#/definitions/kubeflow.org.v1.PaddleElasticPolicy" - }, - "paddleReplicaSpecs": { - "description": "A map of PaddleReplicaType (type) to ReplicaSpec (value). Specifies the Paddle cluster configuration. For example,\n {\n \"Master\": PaddleReplicaSpec,\n \"Worker\": PaddleReplicaSpec,\n }", - "type": "object", - "additionalProperties": { - "$ref": "#/definitions/kubeflow.org.v1.ReplicaSpec" - } - }, - "runPolicy": { - "description": "RunPolicy encapsulates various runtime policies of the distributed training job, for example how to clean up resources and how long the job can stay active.", - "default": {}, - "$ref": "#/definitions/kubeflow.org.v1.RunPolicy" - } - } - }, - "kubeflow.org.v1.PyTorchJob": { - "description": "PyTorchJob Represents a PyTorchJob resource.", - "type": "object", - "properties": { - "apiVersion": { - "description": "APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources", - "type": "string" - }, - "kind": { - "description": "Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds", - "type": "string" - }, - "metadata": { - "default": {}, - "$ref": "#/definitions/v1.ObjectMeta" - }, - "spec": { - "description": "Specification of the desired state of the PyTorchJob.", - "default": {}, - "$ref": "#/definitions/kubeflow.org.v1.PyTorchJobSpec" - }, - "status": { - "description": "Most recently observed status of the PyTorchJob. Read-only (modified by the system).", - "default": {}, - "$ref": "#/definitions/kubeflow.org.v1.JobStatus" - } - } - }, - "kubeflow.org.v1.PyTorchJobList": { - "description": "PyTorchJobList is a list of PyTorchJobs.", - "type": "object", - "required": [ - "items" - ], - "properties": { - "apiVersion": { - "description": "APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources", - "type": "string" - }, - "items": { - "description": "List of PyTorchJobs.", - "type": "array", - "items": { - "default": {}, - "$ref": "#/definitions/kubeflow.org.v1.PyTorchJob" - } - }, - "kind": { - "description": "Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds", - "type": "string" - }, - "metadata": { - "description": "Standard list metadata.", - "default": {}, - "$ref": "#/definitions/v1.ListMeta" - } - } - }, - "kubeflow.org.v1.PyTorchJobSpec": { - "description": "PyTorchJobSpec is a desired state description of the PyTorchJob.", - "type": "object", - "required": [ - "runPolicy", - "pytorchReplicaSpecs" - ], - "properties": { - "elasticPolicy": { - "$ref": "#/definitions/kubeflow.org.v1.ElasticPolicy" - }, - "nprocPerNode": { - "description": "Number of workers per node; supported values: [auto, cpu, gpu, int]. For more, https://github.com/pytorch/pytorch/blob/26f7f470df64d90e092081e39507e4ac751f55d6/torch/distributed/run.py#L629-L658. Defaults to auto.", - "type": "string" - }, - "pytorchReplicaSpecs": { - "description": "A map of PyTorchReplicaType (type) to ReplicaSpec (value). Specifies the PyTorch cluster configuration. For example,\n {\n \"Master\": PyTorchReplicaSpec,\n \"Worker\": PyTorchReplicaSpec,\n }", - "type": "object", - "additionalProperties": { - "$ref": "#/definitions/kubeflow.org.v1.ReplicaSpec" - } - }, - "runPolicy": { - "description": "RunPolicy encapsulates various runtime policies of the distributed training job, for example how to clean up resources and how long the job can stay active.", - "default": {}, - "$ref": "#/definitions/kubeflow.org.v1.RunPolicy" - } - } - }, - "kubeflow.org.v1.RDZVConf": { - "type": "object", - "properties": { - "key": { - "type": "string" - }, - "value": { - "type": "string" - } - } - }, - "kubeflow.org.v1.ReplicaSpec": { - "description": "ReplicaSpec is a description of the replica", - "type": "object", - "properties": { - "replicas": { - "description": "Replicas is the desired number of replicas of the given template. If unspecified, defaults to 1.", - "type": "integer", - "format": "int32" - }, - "restartPolicy": { - "description": "Restart policy for all replicas within the job. One of Always, OnFailure, Never and ExitCode. Default to Never.", - "type": "string" - }, - "template": { - "description": "Template is the object that describes the pod that will be created for this replica. RestartPolicy in PodTemplateSpec will be overide by RestartPolicy in ReplicaSpec", - "default": {}, - "$ref": "#/definitions/v1.PodTemplateSpec" - } - } - }, - "kubeflow.org.v1.ReplicaStatus": { - "description": "ReplicaStatus represents the current observed state of the replica.", - "type": "object", - "properties": { - "active": { - "description": "The number of actively running pods.", - "type": "integer", - "format": "int32" - }, - "failed": { - "description": "The number of pods which reached phase Failed.", - "type": "integer", - "format": "int32" - }, - "labelSelector": { - "description": "Deprecated: Use Selector instead", - "$ref": "#/definitions/v1.LabelSelector" - }, - "selector": { - "description": "A Selector is a label query over a set of resources. The result of matchLabels and matchExpressions are ANDed. An empty Selector matches all objects. A null Selector matches no objects.", - "type": "string" - }, - "succeeded": { - "description": "The number of pods which reached phase Succeeded.", - "type": "integer", - "format": "int32" - } - } - }, - "kubeflow.org.v1.RunPolicy": { - "description": "RunPolicy encapsulates various runtime policies of the distributed training job, for example how to clean up resources and how long the job can stay active.", - "type": "object", - "properties": { - "activeDeadlineSeconds": { - "description": "Specifies the duration in seconds relative to the startTime that the job may be active before the system tries to terminate it; value must be positive integer.", - "type": "integer", - "format": "int64" - }, - "backoffLimit": { - "description": "Optional number of retries before marking this job failed.", - "type": "integer", - "format": "int32" - }, - "cleanPodPolicy": { - "description": "CleanPodPolicy defines the policy to kill pods after the job completes. Default to None.", - "type": "string" - }, - "managedBy": { - "description": "ManagedBy is used to indicate the controller or entity that manages a job. The value must be either an empty, 'kubeflow.org/training-operator' or 'kueue.x-k8s.io/multikueue'. The training-operator reconciles a job which doesn't have this field at all or the field value is the reserved string 'kubeflow.org/training-operator', but delegates reconciling the job with 'kueue.x-k8s.io/multikueue' to the Kueue. The field is immutable.", - "type": "string" - }, - "schedulingPolicy": { - "description": "SchedulingPolicy defines the policy related to scheduling, e.g. gang-scheduling", - "$ref": "#/definitions/kubeflow.org.v1.SchedulingPolicy" - }, - "suspend": { - "description": "suspend specifies whether the Job controller should create Pods or not. If a Job is created with suspend set to true, no Pods are created by the Job controller. If a Job is suspended after creation (i.e. the flag goes from false to true), the Job controller will delete all active Pods and PodGroups associated with this Job. Users must design their workload to gracefully handle this. Suspending a Job will reset the StartTime field of the Job.\n\nDefaults to false.", - "type": "boolean" - }, - "ttlSecondsAfterFinished": { - "description": "TTLSecondsAfterFinished is the TTL to clean up jobs. It may take extra ReconcilePeriod seconds for the cleanup, since reconcile gets called periodically. Default to infinite.", - "type": "integer", - "format": "int32" - } - } - }, - "kubeflow.org.v1.SchedulingPolicy": { - "description": "SchedulingPolicy encapsulates various scheduling policies of the distributed training job, for example `minAvailable` for gang-scheduling.", - "type": "object", - "properties": { - "minAvailable": { - "type": "integer", - "format": "int32" - }, - "minResources": { - "type": "object", - "additionalProperties": { - "$ref": "#/definitions/.Quantity" - } - }, - "priorityClass": { - "type": "string" - }, - "queue": { - "type": "string" - }, - "scheduleTimeoutSeconds": { - "type": "integer", - "format": "int32" - } - } - }, - "kubeflow.org.v1.TFJob": { - "description": "TFJob represents a TFJob resource.", - "type": "object", - "properties": { - "apiVersion": { - "description": "APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources", - "type": "string" - }, - "kind": { - "description": "Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds", - "type": "string" - }, - "metadata": { - "default": {}, - "$ref": "#/definitions/v1.ObjectMeta" - }, - "spec": { - "description": "Specification of the desired state of the TFJob.", - "default": {}, - "$ref": "#/definitions/kubeflow.org.v1.TFJobSpec" - }, - "status": { - "description": "Most recently observed status of the TFJob. Populated by the system. Read-only.", - "default": {}, - "$ref": "#/definitions/kubeflow.org.v1.JobStatus" - } - } - }, - "kubeflow.org.v1.TFJobList": { - "description": "TFJobList is a list of TFJobs.", - "type": "object", - "required": [ - "items" - ], - "properties": { - "apiVersion": { - "description": "APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources", - "type": "string" - }, - "items": { - "description": "List of TFJobs.", - "type": "array", - "items": { - "default": {}, - "$ref": "#/definitions/kubeflow.org.v1.TFJob" - } - }, - "kind": { - "description": "Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds", - "type": "string" - }, - "metadata": { - "description": "Standard list metadata.", - "default": {}, - "$ref": "#/definitions/v1.ListMeta" - } - } - }, - "kubeflow.org.v1.TFJobSpec": { - "description": "TFJobSpec is a desired state description of the TFJob.", - "type": "object", - "required": [ - "runPolicy", - "tfReplicaSpecs" - ], - "properties": { - "enableDynamicWorker": { - "description": "A switch to enable dynamic worker", - "type": "boolean" - }, - "runPolicy": { - "description": "RunPolicy encapsulates various runtime policies of the distributed training job, for example how to clean up resources and how long the job can stay active.", - "default": {}, - "$ref": "#/definitions/kubeflow.org.v1.RunPolicy" - }, - "successPolicy": { - "description": "SuccessPolicy defines the policy to mark the TFJob as succeeded. Default to \"\", using the default rules.", - "type": "string" - }, - "tfReplicaSpecs": { - "description": "A map of TFReplicaType (type) to ReplicaSpec (value). Specifies the TF cluster configuration. For example,\n {\n \"PS\": ReplicaSpec,\n \"Worker\": ReplicaSpec,\n }", - "type": "object", - "additionalProperties": { - "$ref": "#/definitions/kubeflow.org.v1.ReplicaSpec" - } - } - } - }, - "kubeflow.org.v1.XGBoostJob": { - "description": "XGBoostJob is the Schema for the xgboostjobs API", - "type": "object", - "properties": { - "apiVersion": { - "description": "APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources", - "type": "string" - }, - "kind": { - "description": "Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds", - "type": "string" - }, - "metadata": { - "default": {}, - "$ref": "#/definitions/v1.ObjectMeta" - }, - "spec": { - "default": {}, - "$ref": "#/definitions/kubeflow.org.v1.XGBoostJobSpec" - }, - "status": { - "default": {}, - "$ref": "#/definitions/kubeflow.org.v1.JobStatus" - } - } - }, - "kubeflow.org.v1.XGBoostJobList": { - "description": "XGBoostJobList contains a list of XGBoostJob", - "type": "object", - "required": [ - "items" - ], - "properties": { - "apiVersion": { - "description": "APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources", - "type": "string" - }, - "items": { - "type": "array", - "items": { - "default": {}, - "$ref": "#/definitions/kubeflow.org.v1.XGBoostJob" - } - }, - "kind": { - "description": "Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds", - "type": "string" - }, - "metadata": { - "default": {}, - "$ref": "#/definitions/v1.ListMeta" - } - } - }, - "kubeflow.org.v1.XGBoostJobSpec": { - "description": "XGBoostJobSpec defines the desired state of XGBoostJob", - "type": "object", - "required": [ - "runPolicy", - "xgbReplicaSpecs" - ], - "properties": { - "runPolicy": { - "description": "INSERT ADDITIONAL SPEC FIELDS - desired state of cluster Important: Run \"make\" to regenerate code after modifying this file", - "default": {}, - "$ref": "#/definitions/kubeflow.org.v1.RunPolicy" - }, - "xgbReplicaSpecs": { - "type": "object", - "additionalProperties": { - "$ref": "#/definitions/kubeflow.org.v1.ReplicaSpec" - } - } - } - } - } -} diff --git a/hack/python-sdk/swagger_config.json b/hack/python-sdk/swagger_config.json deleted file mode 100644 index feac4b1f42..0000000000 --- a/hack/python-sdk/swagger_config.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "packageName": "kubeflow.training", - "projectName": "training", - "packageVersion": "1.7.0", - "typeMappings": { - "V1Time": "datetime" - } -} diff --git a/hack/scripts/cleanup_clusters.sh b/hack/scripts/cleanup_clusters.sh deleted file mode 100755 index c2d8eaf6ff..0000000000 --- a/hack/scripts/cleanup_clusters.sh +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/bash - -# Copyright 2018 The Kubeflow Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Helper script to remove all resources in a kubernetes cluster created by the CRD. - -kubectl delete service --selector='kubeflow.org=' -kubectl delete jobs --selector='kubeflow.org=' -kubectl delete pods --selector='kubeflow.org=' diff --git a/hack/scripts/update-changelog.sh b/hack/scripts/update-changelog.sh deleted file mode 100755 index 207b29c60b..0000000000 --- a/hack/scripts/update-changelog.sh +++ /dev/null @@ -1,45 +0,0 @@ -#!/bin/bash - -# Copyright 2018 The Kubeflow Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Update CHANGELOG.md using github_changelog_generator. -# -# The script will compute changes between release tags. So make sure there is -# a release tag corresponding to the release you want to compute the changes -# for. -set -o errexit -set -o nounset -set -o pipefail - -GITHUB_TOKEN=${GITHUB_TOKEN:-"NO"} - -SCRIPT_ROOT=$(dirname ${BASH_SOURCE})/../.. - -cd ${SCRIPT_ROOT} - -if [ "${GITHUB_TOKEN}" == "NO" ] -then - echo "Environment variable GITHUB_TOKEN is not set." - exit 1 -fi - -github_changelog_generator -t ${GITHUB_TOKEN} -u kubeflow -p training-operator \ - --exclude-labels community/discussion,cmmunity/question,duplicate,question,invalid,wontfix \ - --bug-labels kind/bug,problems/bug \ - --enhancement-labels improvement/optimization,kind/enhancement,improvement/enhancement,addition/feature,kind/feature \ - --enhancement-label "**Features and improvements:**" \ - --max-issues=100 - -cd - > /dev/null diff --git a/hack/swagger-v2/main.go b/hack/swagger-v2/main.go index 069656a0ea..e4f7230ae4 100644 --- a/hack/swagger-v2/main.go +++ b/hack/swagger-v2/main.go @@ -25,6 +25,8 @@ import ( "k8s.io/kube-openapi/pkg/common" "k8s.io/kube-openapi/pkg/validation/spec" + _ "k8s.io/code-generator" + kubeflowv2 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v2alpha1" ) diff --git a/hack/swagger/go.mod b/hack/swagger/go.mod deleted file mode 100644 index 2871baf675..0000000000 --- a/hack/swagger/go.mod +++ /dev/null @@ -1,44 +0,0 @@ -module github.com/kubeflow/training-operator/hack/swagger - -go 1.23 - -require ( - github.com/kubeflow/training-operator v0.0.0-00010101000000-000000000000 - k8s.io/klog/v2 v2.130.1 - k8s.io/kube-openapi v0.0.0-20240430033511-f0e62f92d13f -) - -replace github.com/kubeflow/training-operator => ../../ - -require ( - github.com/emicklei/go-restful/v3 v3.12.1 // indirect - github.com/fxamacker/cbor/v2 v2.7.0 // indirect - github.com/go-logr/logr v1.4.2 // indirect - github.com/go-openapi/jsonpointer v0.21.0 // indirect - github.com/go-openapi/jsonreference v0.21.0 // indirect - github.com/go-openapi/swag v0.23.0 // indirect - github.com/gogo/protobuf v1.3.2 // indirect - github.com/golang/protobuf v1.5.4 // indirect - github.com/google/gnostic-models v0.6.8 // indirect - github.com/google/gofuzz v1.2.0 // indirect - github.com/josharian/intern v1.0.0 // indirect - github.com/json-iterator/go v1.1.12 // indirect - github.com/mailru/easyjson v0.7.7 // indirect - github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect - github.com/modern-go/reflect2 v1.0.2 // indirect - github.com/sirupsen/logrus v1.9.3 // indirect - github.com/x448/float16 v0.8.4 // indirect - golang.org/x/net v0.30.0 // indirect - golang.org/x/sys v0.26.0 // indirect - golang.org/x/text v0.19.0 // indirect - google.golang.org/protobuf v1.35.1 // indirect - gopkg.in/inf.v0 v0.9.1 // indirect - gopkg.in/yaml.v2 v2.4.0 // indirect - gopkg.in/yaml.v3 v3.0.1 // indirect - k8s.io/api v0.31.3 // indirect - k8s.io/apimachinery v0.31.3 // indirect - k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 // indirect - sigs.k8s.io/controller-runtime v0.19.1 // indirect - sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect - sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect -) diff --git a/hack/swagger/go.sum b/hack/swagger/go.sum deleted file mode 100644 index 1ac64d041c..0000000000 --- a/hack/swagger/go.sum +++ /dev/null @@ -1,136 +0,0 @@ -github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= -github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/emicklei/go-restful/v3 v3.12.1 h1:PJMDIM/ak7btuL8Ex0iYET9hxM3CI2sjZtzpL63nKAU= -github.com/emicklei/go-restful/v3 v3.12.1/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= -github.com/fxamacker/cbor/v2 v2.7.0 h1:iM5WgngdRBanHcxugY4JySA0nk1wZorNOpTgCMedv5E= -github.com/fxamacker/cbor/v2 v2.7.0/go.mod h1:pxXPTn3joSm21Gbwsv0w9OSA2y1HFR9qXEeXQVeNoDQ= -github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= -github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= -github.com/go-openapi/jsonpointer v0.21.0 h1:YgdVicSA9vH5RiHs9TZW5oyafXZFc6+2Vc1rr/O9oNQ= -github.com/go-openapi/jsonpointer v0.21.0/go.mod h1:IUyH9l/+uyhIYQ/PXVA41Rexl+kOkAPDdXEYns6fzUY= -github.com/go-openapi/jsonreference v0.21.0 h1:Rs+Y7hSXT83Jacb7kFyjn4ijOuVGSvOdF2+tg1TRrwQ= -github.com/go-openapi/jsonreference v0.21.0/go.mod h1:LmZmgsrTkVg9LG4EaHeY8cBDslNPMo06cago5JNLkm4= -github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+GrE= -github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ= -github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI= -github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= -github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= -github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= -github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= -github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= -github.com/google/gnostic-models v0.6.8 h1:yo/ABAfM5IMRsS1VnXjTBvUb61tFIHozhlYvRgGre9I= -github.com/google/gnostic-models v0.6.8/go.mod h1:5n7qKqH0f5wFt+aWF8CW6pZLLNOfYuF5OpfBSENuI8U= -github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= -github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= -github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= -github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= -github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= -github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= -github.com/google/pprof v0.0.0-20240827171923-fa2c70bbbfe5 h1:5iH8iuqE5apketRbSFBy+X1V0o+l+8NF1avt4HWl7cA= -github.com/google/pprof v0.0.0-20240827171923-fa2c70bbbfe5/go.mod h1:vavhavw2zAxS5dIdcRluK6cSGGPlZynqzFM8NdvU144= -github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= -github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= -github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= -github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= -github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= -github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= -github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= -github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= -github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= -github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= -github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= -github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= -github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= -github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= -github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= -github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= -github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= -github.com/onsi/ginkgo/v2 v2.20.1 h1:YlVIbqct+ZmnEph770q9Q7NVAz4wwIiVNahee6JyUzo= -github.com/onsi/ginkgo/v2 v2.20.1/go.mod h1:lG9ey2Z29hR41WMVthyJBGUBcBhGOtoPF2VFMvBXFCI= -github.com/onsi/gomega v1.35.1 h1:Cwbd75ZBPxFSuZ6T+rN/WCb/gOc6YgFBXLlZLhC7Ds4= -github.com/onsi/gomega v1.35.1/go.mod h1:PvZbdDc8J6XJEpDK4HCuRBm8a6Fzp9/DmhC9C7yFlog= -github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= -github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8= -github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4= -github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= -github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= -github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= -github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= -github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= -github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= -github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= -github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= -github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg= -github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= -github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= -golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= -golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.30.0 h1:AcW1SDZMkb8IpzCdQUaIq2sP4sZ4zw+55h6ynffypl4= -golang.org/x/net v0.30.0/go.mod h1:2wGyMJ5iFasEhkwi13ChkO/t1ECNC4X4eBKkVFyYFlU= -golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo= -golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.19.0 h1:kTxAhCbGbxhK0IwgSKiMO5awPoDQ0RpfiVYBfK860YM= -golang.org/x/text v0.19.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= -golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= -golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= -golang.org/x/tools v0.24.0 h1:J1shsA93PJUEVaUSaay7UXAyE8aimq3GW0pjlolpa24= -golang.org/x/tools v0.24.0/go.mod h1:YhNqVBIfWHdzvTLs0d8LCuMhkKUgSUKldakyV7W/WDQ= -golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -google.golang.org/protobuf v1.35.1 h1:m3LfL6/Ca+fqnjnlqQXNpFPABW1UD7mjh8KO2mKFytA= -google.golang.org/protobuf v1.35.1/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= -gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= -gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= -gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= -gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= -gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= -gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= -gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -k8s.io/api v0.31.3 h1:umzm5o8lFbdN/hIXbrK9oRpOproJO62CV1zqxXrLgk8= -k8s.io/api v0.31.3/go.mod h1:UJrkIp9pnMOI9K2nlL6vwpxRzzEX5sWgn8kGQe92kCE= -k8s.io/apimachinery v0.31.3 h1:6l0WhcYgasZ/wk9ktLq5vLaoXJJr5ts6lkaQzgeYPq4= -k8s.io/apimachinery v0.31.3/go.mod h1:rsPdaZJfTfLsNJSQzNHQvYoTmxhoOEofxtOsF3rtsMo= -k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk= -k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= -k8s.io/kube-openapi v0.0.0-20240430033511-f0e62f92d13f h1:0LQagt0gDpKqvIkAMPaRGcXawNMouPECM1+F9BVxEaM= -k8s.io/kube-openapi v0.0.0-20240430033511-f0e62f92d13f/go.mod h1:S9tOR0FxgyusSNR+MboCuiDpVWkAifZvaYI1Q2ubgro= -k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 h1:pUdcCO1Lk/tbT5ztQWOBi5HBgbBP1J8+AsQnQCKsi8A= -k8s.io/utils v0.0.0-20240711033017-18e509b52bc8/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= -sigs.k8s.io/controller-runtime v0.19.1 h1:Son+Q40+Be3QWb+niBXAg2vFiYWolDjjRfO8hn/cxOk= -sigs.k8s.io/controller-runtime v0.19.1/go.mod h1:iRmWllt8IlaLjvTTDLhRBXIEtkCK6hwVBJJsYS9Ajf4= -sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo= -sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0= -sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4= -sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08= -sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= -sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY= diff --git a/hack/swagger/main.go b/hack/swagger/main.go deleted file mode 100644 index fef68c3776..0000000000 --- a/hack/swagger/main.go +++ /dev/null @@ -1,84 +0,0 @@ -/* -Copyright 2021 kubeflow.org. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package main - -import ( - "encoding/json" - "fmt" - "os" - "strings" - - "k8s.io/klog/v2" - "k8s.io/kube-openapi/pkg/common" - "k8s.io/kube-openapi/pkg/validation/spec" - - kubeflowv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" -) - -// Generate OpenAPI spec definitions for API resources -func main() { - if len(os.Args) <= 1 { - klog.Fatal("Supply a version") - } - version := os.Args[1] - if !strings.HasPrefix(version, "v") { - version = "v" + version - } - var oAPIDefs = map[string]common.OpenAPIDefinition{} - defs := spec.Definitions{} - - refCallback := func(name string) spec.Ref { - return spec.MustCreateRef("#/definitions/" + common.EscapeJsonPointer(swaggify(name))) - } - - for k, v := range kubeflowv1.GetOpenAPIDefinitions(refCallback) { - oAPIDefs[k] = v - } - - for defName, val := range oAPIDefs { - defs[swaggify(defName)] = val.Schema - } - swagger := spec.Swagger{ - SwaggerProps: spec.SwaggerProps{ - Swagger: "2.0", - Definitions: defs, - Paths: &spec.Paths{Paths: map[string]spec.PathItem{}}, - Info: &spec.Info{ - InfoProps: spec.InfoProps{ - Title: "Kubeflow Training SDK", - Description: "Python SDK for Kubeflow Training", - Version: version, - }, - }, - }, - } - jsonBytes, err := json.MarshalIndent(swagger, "", " ") - if err != nil { - klog.Fatal(err.Error()) - } - fmt.Println(string(jsonBytes)) -} - -func swaggify(name string) string { - name = strings.Replace(name, "github.com/kubeflow/training-operator/pkg/apis/", "", -1) - name = strings.Replace(name, "k8s.io/api/core/", "", -1) - name = strings.Replace(name, "k8s.io/apimachinery/pkg/apis/meta/", "", -1) - name = strings.Replace(name, "k8s.io/apimachinery/pkg/api/resource", "", -1) - name = strings.Replace(name, "k8s.io/apimachinery/pkg/", "", -1) - name = strings.Replace(name, "/", ".", -1) - return name -} diff --git a/hack/update-codegen.sh b/hack/update-codegen.sh index 2abaea0868..5d09fc4491 100755 --- a/hack/update-codegen.sh +++ b/hack/update-codegen.sh @@ -1,5 +1,19 @@ #!/bin/bash +# Copyright 2024 The Kubeflow Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # This shell is used to auto generate some useful tools for k8s, such as clientset, lister, informer and so on. # We don't use this tool to generate deepcopy because kubebuilder (controller-tools) has covered that part. @@ -19,13 +33,13 @@ source "${CODEGEN_PKG}/kube_codegen.sh" echo ">> Using ${CODEGEN_PKG}" # Generating deepcopy and defaults. -echo "Generating deepcopy and defaults for kubeflow.org/v1 and kubeflow.org/v2alpha1" +echo "Generating deepcopy and defaults for kubeflow.org/v2alpha1" kube::codegen::gen_helpers \ --boilerplate "${TRAINING_OPERATOR_ROOT}/hack/boilerplate/boilerplate.go.txt" \ "${TRAINING_OPERATOR_ROOT}/pkg/apis" -# Generate clients for Training Operator V1 and V2 -echo "Generating clients for kubeflow.org/v1 and kubeflow.org/v2alpha1" +# Generate clients for Kubeflow Trainer V2. +echo "Generating clients for kubeflow.org/v2alpha1" kube::codegen::gen_client \ --boilerplate "${TRAINING_OPERATOR_ROOT}/hack/boilerplate/boilerplate.go.txt" \ --output-dir "${TRAINING_OPERATOR_ROOT}/pkg/client" \ @@ -38,15 +52,6 @@ kube::codegen::gen_client \ OPENAPI_PKG=$(go list -m -mod=readonly -f "{{.Dir}}" k8s.io/kube-openapi) echo ">> Using ${OPENAPI_PKG}" -echo "Generating OpenAPI specification for kubeflow.org/v1" -go run ${OPENAPI_PKG}/cmd/openapi-gen \ - --go-header-file "${TRAINING_OPERATOR_ROOT}/hack/boilerplate/boilerplate.go.txt" \ - --output-pkg "${TRAINING_OPERATOR_PKG}/pkg/apis/kubeflow.org/v1" \ - --output-dir "${TRAINING_OPERATOR_ROOT}/pkg/apis/kubeflow.org/v1" \ - --output-file "zz_generated.openapi.go" \ - --report-filename "${TRAINING_OPERATOR_ROOT}/hack/violation_exception_v1.list" \ - "${TRAINING_OPERATOR_ROOT}/pkg/apis/kubeflow.org/v1" - echo "Generating OpenAPI specification for kubeflow.org/v2alpha1" go run ${OPENAPI_PKG}/cmd/openapi-gen \ --go-header-file "${TRAINING_OPERATOR_ROOT}/hack/boilerplate/boilerplate.go.txt" \ @@ -56,6 +61,6 @@ go run ${OPENAPI_PKG}/cmd/openapi-gen \ --report-filename "${TRAINING_OPERATOR_ROOT}/hack/violation_exception_v2alpha1.list" \ "${TRAINING_OPERATOR_ROOT}/pkg/apis/kubeflow.org/v2alpha1" -# Generating OpenAPI Swagger for Training Operator V2. +# Generating OpenAPI Swagger for Kubeflow Trainer V2. echo "Generate OpenAPI Swagger for kubeflow.org/v2alpha1" go run hack/swagger-v2/main.go >api.v2/openapi-spec/swagger.json diff --git a/hack/verify-codegen.sh b/hack/verify-codegen.sh index 9cc02a5a4a..c8f3ae33b0 100755 --- a/hack/verify-codegen.sh +++ b/hack/verify-codegen.sh @@ -1,6 +1,6 @@ #!/bin/bash -# Copyright 2017 The Kubernetes Authors. +# Copyright 2024 The Kubeflow Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -18,14 +18,12 @@ set -o errexit set -o nounset set -o pipefail -SCRIPT_ROOT=$(dirname "${BASH_SOURCE}")/.. - -DIFFROOT="${SCRIPT_ROOT}/pkg" -TMP_DIFFROOT="${SCRIPT_ROOT}/_tmp/pkg" -_tmp="${SCRIPT_ROOT}/_tmp" +CURRENT_DIR=$(dirname "${BASH_SOURCE[0]}") +DIFFROOT="${CURRENT_DIR}" +TMP_DIFFROOT="$(mktemp -d -t "$(basename "$0").XXXXXX")" cleanup() { - rm -rf "${_tmp}" + rm -rf "${TMP_DIFFROOT}" } trap "cleanup" EXIT SIGINT @@ -34,15 +32,19 @@ cleanup mkdir -p "${TMP_DIFFROOT}" cp -a "${DIFFROOT}"/* "${TMP_DIFFROOT}" -"${SCRIPT_ROOT}/hack/update-codegen.sh" +echo $TMP_DIFFROOT +echo $DIFFROOT + +# Generate files. +make generate + echo "diffing ${DIFFROOT} against freshly generated codegen" ret=0 -diff -Naupr "${DIFFROOT}" "${TMP_DIFFROOT}" || ret=$? -cp -a "${TMP_DIFFROOT}"/* "${DIFFROOT}" -if [[ $ret -eq 0 ]] -then +diff -Naupr -x.gitignore "${DIFFROOT}" "${TMP_DIFFROOT}" || ret=$? + +if [[ $ret -eq 0 ]]; then echo "${DIFFROOT} up to date." else - echo "${DIFFROOT} is out of date. Please run hack/update-codegen.sh" + echo "${DIFFROOT} is out of date. Please run make generate" exit 1 fi diff --git a/hack/violation_exception_v1.list b/hack/violation_exception_v1.list deleted file mode 100644 index de1fdc3ab4..0000000000 --- a/hack/violation_exception_v1.list +++ /dev/null @@ -1,6 +0,0 @@ -API rule violation: list_type_missing,github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1,ElasticPolicy,Metrics -API rule violation: list_type_missing,github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1,ElasticPolicy,RDZVConf -API rule violation: list_type_missing,github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1,JobStatus,Conditions -API rule violation: list_type_missing,github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1,PaddleElasticPolicy,Metrics -API rule violation: names_match,github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1,ElasticPolicy,RDZVID -API rule violation: names_match,github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1,PyTorchJobSpec,PyTorchReplicaSpecs diff --git a/manifests/base/crds/kubeflow.org_jaxjobs.yaml b/manifests/base/crds/kubeflow.org_jaxjobs.yaml deleted file mode 100644 index af56195604..0000000000 --- a/manifests/base/crds/kubeflow.org_jaxjobs.yaml +++ /dev/null @@ -1,7901 +0,0 @@ ---- -apiVersion: apiextensions.k8s.io/v1 -kind: CustomResourceDefinition -metadata: - annotations: - controller-gen.kubebuilder.io/version: v0.16.5 - name: jaxjobs.kubeflow.org -spec: - group: kubeflow.org - names: - kind: JAXJob - listKind: JAXJobList - plural: jaxjobs - singular: jaxjob - scope: Namespaced - versions: - - additionalPrinterColumns: - - jsonPath: .status.conditions[-1:].type - name: State - type: string - - jsonPath: .metadata.creationTimestamp - name: Age - type: date - name: v1 - schema: - openAPIV3Schema: - description: JAXJob Represents a JAXJob resource. - properties: - apiVersion: - description: |- - APIVersion defines the versioned schema of this representation of an object. - Servers should convert recognized schemas to the latest internal value, and - may reject unrecognized values. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources - type: string - kind: - description: |- - Kind is a string value representing the REST resource this object represents. - Servers may infer this from the endpoint the client submits requests to. - Cannot be updated. - In CamelCase. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds - type: string - metadata: - type: object - spec: - description: Specification of the desired state of the JAXJob. - properties: - jaxReplicaSpecs: - additionalProperties: - description: ReplicaSpec is a description of the replica - properties: - replicas: - description: |- - Replicas is the desired number of replicas of the given template. - If unspecified, defaults to 1. - format: int32 - type: integer - restartPolicy: - description: |- - Restart policy for all replicas within the job. - One of Always, OnFailure, Never and ExitCode. - Default to Never. - type: string - template: - description: |- - Template is the object that describes the pod that - will be created for this replica. RestartPolicy in PodTemplateSpec - will be overide by RestartPolicy in ReplicaSpec - properties: - metadata: - description: |- - Standard object's metadata. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#metadata - properties: - annotations: - additionalProperties: - type: string - type: object - finalizers: - items: - type: string - type: array - labels: - additionalProperties: - type: string - type: object - name: - type: string - namespace: - type: string - type: object - spec: - description: |- - Specification of the desired behavior of the pod. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#spec-and-status - properties: - activeDeadlineSeconds: - description: |- - Optional duration in seconds the pod may be active on the node relative to - StartTime before the system will actively try to mark it failed and kill associated containers. - Value must be a positive integer. - format: int64 - type: integer - affinity: - description: If specified, the pod's scheduling constraints - properties: - nodeAffinity: - description: Describes node affinity scheduling - rules for the pod. - properties: - preferredDuringSchedulingIgnoredDuringExecution: - description: |- - The scheduler will prefer to schedule pods to nodes that satisfy - the affinity expressions specified by this field, but it may choose - a node that violates one or more of the expressions. The node that is - most preferred is the one with the greatest sum of weights, i.e. - for each node that meets all of the scheduling requirements (resource - request, requiredDuringScheduling affinity expressions, etc. - items: - description: |- - An empty preferred scheduling term matches all objects with implicit weight 0 - (i.e. it's a no-op). A null preferred scheduling term matches no objects (i.e. is also a no-op). - properties: - preference: - description: A node selector term, associated - with the corresponding weight. - properties: - matchExpressions: - description: A list of node selector - requirements by node's labels. - items: - description: |- - A node selector requirement is a selector that contains values, a key, and an operator - that relates the key and values. - properties: - key: - description: The label key that - the selector applies to. - type: string - operator: - description: |- - Represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. - type: string - values: - description: |- - An array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. If the operator is Gt or Lt, the values - array must have a single element, which will be interpreted as an integer. - This array is replaced during a strategic merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchFields: - description: A list of node selector - requirements by node's fields. - items: - description: |- - A node selector requirement is a selector that contains values, a key, and an operator - that relates the key and values. - properties: - key: - description: The label key that - the selector applies to. - type: string - operator: - description: |- - Represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. - type: string - values: - description: |- - An array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. If the operator is Gt or Lt, the values - array must have a single element, which will be interpreted as an integer. - This array is replaced during a strategic merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - type: object - x-kubernetes-map-type: atomic - weight: - description: Weight associated with matching - the corresponding nodeSelectorTerm, - in the range 1-100. - format: int32 - type: integer - required: - - preference - - weight - type: object - type: array - x-kubernetes-list-type: atomic - requiredDuringSchedulingIgnoredDuringExecution: - description: |- - If the affinity requirements specified by this field are not met at - scheduling time, the pod will not be scheduled onto the node. - If the affinity requirements specified by this field cease to be met - at some point during pod execution (e.g. due to an update), the system - may or may not try to eventually evict the pod from its node. - properties: - nodeSelectorTerms: - description: Required. A list of node selector - terms. The terms are ORed. - items: - description: |- - A null or empty node selector term matches no objects. The requirements of - them are ANDed. - The TopologySelectorTerm type implements a subset of the NodeSelectorTerm. - properties: - matchExpressions: - description: A list of node selector - requirements by node's labels. - items: - description: |- - A node selector requirement is a selector that contains values, a key, and an operator - that relates the key and values. - properties: - key: - description: The label key that - the selector applies to. - type: string - operator: - description: |- - Represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. - type: string - values: - description: |- - An array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. If the operator is Gt or Lt, the values - array must have a single element, which will be interpreted as an integer. - This array is replaced during a strategic merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchFields: - description: A list of node selector - requirements by node's fields. - items: - description: |- - A node selector requirement is a selector that contains values, a key, and an operator - that relates the key and values. - properties: - key: - description: The label key that - the selector applies to. - type: string - operator: - description: |- - Represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. - type: string - values: - description: |- - An array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. If the operator is Gt or Lt, the values - array must have a single element, which will be interpreted as an integer. - This array is replaced during a strategic merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - type: object - x-kubernetes-map-type: atomic - type: array - x-kubernetes-list-type: atomic - required: - - nodeSelectorTerms - type: object - x-kubernetes-map-type: atomic - type: object - podAffinity: - description: Describes pod affinity scheduling rules - (e.g. co-locate this pod in the same node, zone, - etc. as some other pod(s)). - properties: - preferredDuringSchedulingIgnoredDuringExecution: - description: |- - The scheduler will prefer to schedule pods to nodes that satisfy - the affinity expressions specified by this field, but it may choose - a node that violates one or more of the expressions. The node that is - most preferred is the one with the greatest sum of weights, i.e. - for each node that meets all of the scheduling requirements (resource - request, requiredDuringScheduling affinity expressions, etc. - items: - description: The weights of all of the matched - WeightedPodAffinityTerm fields are added - per-node to find the most preferred node(s) - properties: - podAffinityTerm: - description: Required. A pod affinity - term, associated with the corresponding - weight. - properties: - labelSelector: - description: |- - A label query over a set of resources, in this case pods. - If it's null, this PodAffinityTerm matches with no Pods. - properties: - matchExpressions: - description: matchExpressions - is a list of label selector - requirements. The requirements - are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the - label key that the selector - applies to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - matchLabelKeys: - description: |- - MatchLabelKeys is a set of pod label keys to select which pods will - be taken into consideration. The keys are used to lookup values from the - incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` - to select the group of existing pods which pods will be taken into consideration - for the incoming pod's pod (anti) affinity. - items: - type: string - type: array - x-kubernetes-list-type: atomic - mismatchLabelKeys: - description: |- - MismatchLabelKeys is a set of pod label keys to select which pods will - be taken into consideration. The keys are used to lookup values from the - incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` - to select the group of existing pods which pods will be taken into consideration - for the incoming pod's pod (anti) affinity. - items: - type: string - type: array - x-kubernetes-list-type: atomic - namespaceSelector: - description: |- - A label query over the set of namespaces that the term applies to. - The term is applied to the union of the namespaces selected by this field - and the ones listed in the namespaces field. - null selector and null or empty namespaces list means "this pod's namespace". - An empty selector ({}) matches all namespaces. - properties: - matchExpressions: - description: matchExpressions - is a list of label selector - requirements. The requirements - are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the - label key that the selector - applies to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - namespaces: - description: |- - namespaces specifies a static list of namespace names that the term applies to. - The term is applied to the union of the namespaces listed in this field - and the ones selected by namespaceSelector. - null or empty namespaces list and null namespaceSelector means "this pod's namespace". - items: - type: string - type: array - x-kubernetes-list-type: atomic - topologyKey: - description: |- - This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching - the labelSelector in the specified namespaces, where co-located is defined as running on a node - whose value of the label with key topologyKey matches that of any node on which any of the - selected pods is running. - Empty topologyKey is not allowed. - type: string - required: - - topologyKey - type: object - weight: - description: |- - weight associated with matching the corresponding podAffinityTerm, - in the range 1-100. - format: int32 - type: integer - required: - - podAffinityTerm - - weight - type: object - type: array - x-kubernetes-list-type: atomic - requiredDuringSchedulingIgnoredDuringExecution: - description: |- - If the affinity requirements specified by this field are not met at - scheduling time, the pod will not be scheduled onto the node. - If the affinity requirements specified by this field cease to be met - at some point during pod execution (e.g. due to a pod label update), the - system may or may not try to eventually evict the pod from its node. - items: - description: |- - Defines a set of pods (namely those matching the labelSelector - relative to the given namespace(s)) that this pod should be - co-located (affinity) or not co-located (anti-affinity) with, - where co-located is defined as running on a node whose value of - the label with key matches that of any node on which - a pod of the set of pods is running - properties: - labelSelector: - description: |- - A label query over a set of resources, in this case pods. - If it's null, this PodAffinityTerm matches with no Pods. - properties: - matchExpressions: - description: matchExpressions is a - list of label selector requirements. - The requirements are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the label - key that the selector applies - to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - matchLabelKeys: - description: |- - MatchLabelKeys is a set of pod label keys to select which pods will - be taken into consideration. The keys are used to lookup values from the - incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` - to select the group of existing pods which pods will be taken into consideration - for the incoming pod's pod (anti) affinity. - items: - type: string - type: array - x-kubernetes-list-type: atomic - mismatchLabelKeys: - description: |- - MismatchLabelKeys is a set of pod label keys to select which pods will - be taken into consideration. The keys are used to lookup values from the - incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` - to select the group of existing pods which pods will be taken into consideration - for the incoming pod's pod (anti) affinity. - items: - type: string - type: array - x-kubernetes-list-type: atomic - namespaceSelector: - description: |- - A label query over the set of namespaces that the term applies to. - The term is applied to the union of the namespaces selected by this field - and the ones listed in the namespaces field. - null selector and null or empty namespaces list means "this pod's namespace". - An empty selector ({}) matches all namespaces. - properties: - matchExpressions: - description: matchExpressions is a - list of label selector requirements. - The requirements are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the label - key that the selector applies - to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - namespaces: - description: |- - namespaces specifies a static list of namespace names that the term applies to. - The term is applied to the union of the namespaces listed in this field - and the ones selected by namespaceSelector. - null or empty namespaces list and null namespaceSelector means "this pod's namespace". - items: - type: string - type: array - x-kubernetes-list-type: atomic - topologyKey: - description: |- - This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching - the labelSelector in the specified namespaces, where co-located is defined as running on a node - whose value of the label with key topologyKey matches that of any node on which any of the - selected pods is running. - Empty topologyKey is not allowed. - type: string - required: - - topologyKey - type: object - type: array - x-kubernetes-list-type: atomic - type: object - podAntiAffinity: - description: Describes pod anti-affinity scheduling - rules (e.g. avoid putting this pod in the same - node, zone, etc. as some other pod(s)). - properties: - preferredDuringSchedulingIgnoredDuringExecution: - description: |- - The scheduler will prefer to schedule pods to nodes that satisfy - the anti-affinity expressions specified by this field, but it may choose - a node that violates one or more of the expressions. The node that is - most preferred is the one with the greatest sum of weights, i.e. - items: - description: The weights of all of the matched - WeightedPodAffinityTerm fields are added - per-node to find the most preferred node(s) - properties: - podAffinityTerm: - description: Required. A pod affinity - term, associated with the corresponding - weight. - properties: - labelSelector: - description: |- - A label query over a set of resources, in this case pods. - If it's null, this PodAffinityTerm matches with no Pods. - properties: - matchExpressions: - description: matchExpressions - is a list of label selector - requirements. The requirements - are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the - label key that the selector - applies to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - matchLabelKeys: - description: |- - MatchLabelKeys is a set of pod label keys to select which pods will - be taken into consideration. The keys are used to lookup values from the - incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` - to select the group of existing pods which pods will be taken into consideration - for the incoming pod's pod (anti) affinity. - items: - type: string - type: array - x-kubernetes-list-type: atomic - mismatchLabelKeys: - description: |- - MismatchLabelKeys is a set of pod label keys to select which pods will - be taken into consideration. The keys are used to lookup values from the - incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` - to select the group of existing pods which pods will be taken into consideration - for the incoming pod's pod (anti) affinity. - items: - type: string - type: array - x-kubernetes-list-type: atomic - namespaceSelector: - description: |- - A label query over the set of namespaces that the term applies to. - The term is applied to the union of the namespaces selected by this field - and the ones listed in the namespaces field. - null selector and null or empty namespaces list means "this pod's namespace". - An empty selector ({}) matches all namespaces. - properties: - matchExpressions: - description: matchExpressions - is a list of label selector - requirements. The requirements - are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the - label key that the selector - applies to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - namespaces: - description: |- - namespaces specifies a static list of namespace names that the term applies to. - The term is applied to the union of the namespaces listed in this field - and the ones selected by namespaceSelector. - null or empty namespaces list and null namespaceSelector means "this pod's namespace". - items: - type: string - type: array - x-kubernetes-list-type: atomic - topologyKey: - description: |- - This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching - the labelSelector in the specified namespaces, where co-located is defined as running on a node - whose value of the label with key topologyKey matches that of any node on which any of the - selected pods is running. - Empty topologyKey is not allowed. - type: string - required: - - topologyKey - type: object - weight: - description: |- - weight associated with matching the corresponding podAffinityTerm, - in the range 1-100. - format: int32 - type: integer - required: - - podAffinityTerm - - weight - type: object - type: array - x-kubernetes-list-type: atomic - requiredDuringSchedulingIgnoredDuringExecution: - description: |- - If the anti-affinity requirements specified by this field are not met at - scheduling time, the pod will not be scheduled onto the node. - If the anti-affinity requirements specified by this field cease to be met - at some point during pod execution (e.g. due to a pod label update), the - system may or may not try to eventually evict the pod from its node. - items: - description: |- - Defines a set of pods (namely those matching the labelSelector - relative to the given namespace(s)) that this pod should be - co-located (affinity) or not co-located (anti-affinity) with, - where co-located is defined as running on a node whose value of - the label with key matches that of any node on which - a pod of the set of pods is running - properties: - labelSelector: - description: |- - A label query over a set of resources, in this case pods. - If it's null, this PodAffinityTerm matches with no Pods. - properties: - matchExpressions: - description: matchExpressions is a - list of label selector requirements. - The requirements are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the label - key that the selector applies - to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - matchLabelKeys: - description: |- - MatchLabelKeys is a set of pod label keys to select which pods will - be taken into consideration. The keys are used to lookup values from the - incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` - to select the group of existing pods which pods will be taken into consideration - for the incoming pod's pod (anti) affinity. - items: - type: string - type: array - x-kubernetes-list-type: atomic - mismatchLabelKeys: - description: |- - MismatchLabelKeys is a set of pod label keys to select which pods will - be taken into consideration. The keys are used to lookup values from the - incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` - to select the group of existing pods which pods will be taken into consideration - for the incoming pod's pod (anti) affinity. - items: - type: string - type: array - x-kubernetes-list-type: atomic - namespaceSelector: - description: |- - A label query over the set of namespaces that the term applies to. - The term is applied to the union of the namespaces selected by this field - and the ones listed in the namespaces field. - null selector and null or empty namespaces list means "this pod's namespace". - An empty selector ({}) matches all namespaces. - properties: - matchExpressions: - description: matchExpressions is a - list of label selector requirements. - The requirements are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the label - key that the selector applies - to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - namespaces: - description: |- - namespaces specifies a static list of namespace names that the term applies to. - The term is applied to the union of the namespaces listed in this field - and the ones selected by namespaceSelector. - null or empty namespaces list and null namespaceSelector means "this pod's namespace". - items: - type: string - type: array - x-kubernetes-list-type: atomic - topologyKey: - description: |- - This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching - the labelSelector in the specified namespaces, where co-located is defined as running on a node - whose value of the label with key topologyKey matches that of any node on which any of the - selected pods is running. - Empty topologyKey is not allowed. - type: string - required: - - topologyKey - type: object - type: array - x-kubernetes-list-type: atomic - type: object - type: object - automountServiceAccountToken: - description: AutomountServiceAccountToken indicates - whether a service account token should be automatically - mounted. - type: boolean - containers: - description: |- - List of containers belonging to the pod. - Containers cannot currently be added or removed. - There must be at least one container in a Pod. - Cannot be updated. - items: - description: A single application container that you - want to run within a pod. - properties: - args: - description: |- - Arguments to the entrypoint. - The container image's CMD is used if this is not provided. - Variable references $(VAR_NAME) are expanded using the container's environment. If a variable - cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced - to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. - items: - type: string - type: array - x-kubernetes-list-type: atomic - command: - description: |- - Entrypoint array. Not executed within a shell. - The container image's ENTRYPOINT is used if this is not provided. - Variable references $(VAR_NAME) are expanded using the container's environment. If a variable - cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced - to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. - items: - type: string - type: array - x-kubernetes-list-type: atomic - env: - description: |- - List of environment variables to set in the container. - Cannot be updated. - items: - description: EnvVar represents an environment - variable present in a Container. - properties: - name: - description: Name of the environment variable. - Must be a C_IDENTIFIER. - type: string - value: - description: |- - Variable references $(VAR_NAME) are expanded - using the previously defined environment variables in the container and - any service environment variables. If a variable cannot be resolved, - the reference in the input string will be unchanged. Double $$ are reduced - to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. - "$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)". - type: string - valueFrom: - description: Source for the environment - variable's value. Cannot be used if value - is not empty. - properties: - configMapKeyRef: - description: Selects a key of a ConfigMap. - properties: - key: - description: The key to select. - type: string - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the - ConfigMap or its key must be defined - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - fieldRef: - description: |- - Selects a field of the pod: supports metadata.name, metadata.namespace, `metadata.labels['']`, `metadata.annotations['']`, - spec.nodeName, spec.serviceAccountName, status.hostIP, status.podIP, status.podIPs. - properties: - apiVersion: - description: Version of the schema - the FieldPath is written in terms - of, defaults to "v1". - type: string - fieldPath: - description: Path of the field to - select in the specified API version. - type: string - required: - - fieldPath - type: object - x-kubernetes-map-type: atomic - resourceFieldRef: - description: |- - Selects a resource of the container: only resources limits and requests - (limits.cpu, limits.memory, limits.ephemeral-storage, requests.cpu, requests.memory and requests.ephemeral-storage) are currently supported. - properties: - containerName: - description: 'Container name: required - for volumes, optional for env - vars' - type: string - divisor: - anyOf: - - type: integer - - type: string - description: Specifies the output - format of the exposed resources, - defaults to "1" - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - resource: - description: 'Required: resource - to select' - type: string - required: - - resource - type: object - x-kubernetes-map-type: atomic - secretKeyRef: - description: Selects a key of a secret - in the pod's namespace - properties: - key: - description: The key of the secret - to select from. Must be a valid - secret key. - type: string - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the - Secret or its key must be defined - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - type: object - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - envFrom: - description: |- - List of sources to populate environment variables in the container. - The keys defined within a source must be a C_IDENTIFIER. All invalid keys - will be reported as an event when the container is starting. When a key exists in multiple - sources, the value associated with the last source will take precedence. - Values defined by an Env with a duplicate key will take precedence. - Cannot be updated. - items: - description: EnvFromSource represents the source - of a set of ConfigMaps - properties: - configMapRef: - description: The ConfigMap to select from - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the ConfigMap - must be defined - type: boolean - type: object - x-kubernetes-map-type: atomic - prefix: - description: An optional identifier to prepend - to each key in the ConfigMap. Must be - a C_IDENTIFIER. - type: string - secretRef: - description: The Secret to select from - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the Secret - must be defined - type: boolean - type: object - x-kubernetes-map-type: atomic - type: object - type: array - x-kubernetes-list-type: atomic - image: - description: |- - Container image name. - More info: https://kubernetes.io/docs/concepts/containers/images - This field is optional to allow higher level config management to default or override - container images in workload controllers like Deployments and StatefulSets. - type: string - imagePullPolicy: - description: |- - Image pull policy. - One of Always, Never, IfNotPresent. - Defaults to Always if :latest tag is specified, or IfNotPresent otherwise. - Cannot be updated. - More info: https://kubernetes.io/docs/concepts/containers/images#updating-images - type: string - lifecycle: - description: |- - Actions that the management system should take in response to container lifecycle events. - Cannot be updated. - properties: - postStart: - description: |- - PostStart is called immediately after a container is created. If the handler fails, - the container is terminated and restarted according to its restart policy. - Other management of the container blocks until the hook completes. - More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks - properties: - exec: - description: Exec specifies the action - to take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - httpGet: - description: HTTPGet specifies the http - request to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set - in the request. HTTP allows repeated - headers. - items: - description: HTTPHeader describes - a custom header to be used in - HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field - value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the - HTTP server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - sleep: - description: Sleep represents the duration - that the container should sleep before - being terminated. - properties: - seconds: - description: Seconds is the number - of seconds to sleep. - format: int64 - type: integer - required: - - seconds - type: object - tcpSocket: - description: |- - Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept - for the backward compatibility. There are no validation of this field and - lifecycle hooks will fail in runtime when tcp handler is specified. - properties: - host: - description: 'Optional: Host name - to connect to, defaults to the pod - IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - type: object - preStop: - description: |- - PreStop is called immediately before a container is terminated due to an - API request or management event such as liveness/startup probe failure, - preemption, resource contention, etc. The handler is not called if the - container crashes or exits. The Pod's termination grace period countdown begins before the - PreStop hook is executed. - properties: - exec: - description: Exec specifies the action - to take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - httpGet: - description: HTTPGet specifies the http - request to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set - in the request. HTTP allows repeated - headers. - items: - description: HTTPHeader describes - a custom header to be used in - HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field - value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the - HTTP server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - sleep: - description: Sleep represents the duration - that the container should sleep before - being terminated. - properties: - seconds: - description: Seconds is the number - of seconds to sleep. - format: int64 - type: integer - required: - - seconds - type: object - tcpSocket: - description: |- - Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept - for the backward compatibility. There are no validation of this field and - lifecycle hooks will fail in runtime when tcp handler is specified. - properties: - host: - description: 'Optional: Host name - to connect to, defaults to the pod - IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - type: object - type: object - livenessProbe: - description: |- - Periodic probe of container liveness. - Container will be restarted if the probe fails. - Cannot be updated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - properties: - exec: - description: Exec specifies the action to - take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - failureThreshold: - description: |- - Minimum consecutive failures for the probe to be considered failed after having succeeded. - Defaults to 3. Minimum value is 1. - format: int32 - type: integer - grpc: - description: GRPC specifies an action involving - a GRPC port. - properties: - port: - description: Port number of the gRPC service. - Number must be in the range 1 to 65535. - format: int32 - type: integer - service: - default: "" - description: |- - Service is the name of the service to place in the gRPC HealthCheckRequest - (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - - If this is not specified, the default behavior is defined by gRPC. - type: string - required: - - port - type: object - httpGet: - description: HTTPGet specifies the http request - to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set in - the request. HTTP allows repeated headers. - items: - description: HTTPHeader describes a - custom header to be used in HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the HTTP - server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - initialDelaySeconds: - description: |- - Number of seconds after the container has started before liveness probes are initiated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - periodSeconds: - description: |- - How often (in seconds) to perform the probe. - Default to 10 seconds. Minimum value is 1. - format: int32 - type: integer - successThreshold: - description: |- - Minimum consecutive successes for the probe to be considered successful after having failed. - Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. - format: int32 - type: integer - tcpSocket: - description: TCPSocket specifies an action - involving a TCP port. - properties: - host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - description: |- - Optional duration in seconds the pod needs to terminate gracefully upon probe failure. - The grace period is the duration in seconds after the processes running in the pod are sent - a termination signal and the time when the processes are forcibly halted with a kill signal. - Set this value longer than the expected cleanup time for your process. - format: int64 - type: integer - timeoutSeconds: - description: |- - Number of seconds after which the probe times out. - Defaults to 1 second. Minimum value is 1. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - type: object - name: - description: |- - Name of the container specified as a DNS_LABEL. - Each container in a pod must have a unique name (DNS_LABEL). - Cannot be updated. - type: string - ports: - description: |- - List of ports to expose from the container. Not specifying a port here - DOES NOT prevent that port from being exposed. Any port which is - listening on the default "0.0.0.0" address inside a container will be - accessible from the network. - Modifying this array with strategic merge patch may corrupt the data. - For more information See https://github.com/kubernetes/kubernetes/issues/108255. - items: - description: ContainerPort represents a network - port in a single container. - properties: - containerPort: - description: |- - Number of port to expose on the pod's IP address. - This must be a valid port number, 0 < x < 65536. - format: int32 - type: integer - hostIP: - description: What host IP to bind the external - port to. - type: string - hostPort: - description: |- - Number of port to expose on the host. - If specified, this must be a valid port number, 0 < x < 65536. - If HostNetwork is specified, this must match ContainerPort. - Most containers do not need this. - format: int32 - type: integer - name: - description: |- - If specified, this must be an IANA_SVC_NAME and unique within the pod. Each - named port in a pod must have a unique name. Name for the port that can be - referred to by services. - type: string - protocol: - default: TCP - description: |- - Protocol for port. Must be UDP, TCP, or SCTP. - Defaults to "TCP". - type: string - required: - - containerPort - type: object - type: array - x-kubernetes-list-map-keys: - - containerPort - - protocol - x-kubernetes-list-type: map - readinessProbe: - description: |- - Periodic probe of container service readiness. - Container will be removed from service endpoints if the probe fails. - Cannot be updated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - properties: - exec: - description: Exec specifies the action to - take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - failureThreshold: - description: |- - Minimum consecutive failures for the probe to be considered failed after having succeeded. - Defaults to 3. Minimum value is 1. - format: int32 - type: integer - grpc: - description: GRPC specifies an action involving - a GRPC port. - properties: - port: - description: Port number of the gRPC service. - Number must be in the range 1 to 65535. - format: int32 - type: integer - service: - default: "" - description: |- - Service is the name of the service to place in the gRPC HealthCheckRequest - (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - - If this is not specified, the default behavior is defined by gRPC. - type: string - required: - - port - type: object - httpGet: - description: HTTPGet specifies the http request - to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set in - the request. HTTP allows repeated headers. - items: - description: HTTPHeader describes a - custom header to be used in HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the HTTP - server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - initialDelaySeconds: - description: |- - Number of seconds after the container has started before liveness probes are initiated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - periodSeconds: - description: |- - How often (in seconds) to perform the probe. - Default to 10 seconds. Minimum value is 1. - format: int32 - type: integer - successThreshold: - description: |- - Minimum consecutive successes for the probe to be considered successful after having failed. - Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. - format: int32 - type: integer - tcpSocket: - description: TCPSocket specifies an action - involving a TCP port. - properties: - host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - description: |- - Optional duration in seconds the pod needs to terminate gracefully upon probe failure. - The grace period is the duration in seconds after the processes running in the pod are sent - a termination signal and the time when the processes are forcibly halted with a kill signal. - Set this value longer than the expected cleanup time for your process. - format: int64 - type: integer - timeoutSeconds: - description: |- - Number of seconds after which the probe times out. - Defaults to 1 second. Minimum value is 1. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - type: object - resizePolicy: - description: Resources resize policy for the container. - items: - description: ContainerResizePolicy represents - resource resize policy for the container. - properties: - resourceName: - description: |- - Name of the resource to which this resource resize policy applies. - Supported values: cpu, memory. - type: string - restartPolicy: - description: |- - Restart policy to apply when specified resource is resized. - If not specified, it defaults to NotRequired. - type: string - required: - - resourceName - - restartPolicy - type: object - type: array - x-kubernetes-list-type: atomic - resources: - description: |- - Compute Resources required by this container. - Cannot be updated. - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - properties: - claims: - description: |- - Claims lists the names of resources, defined in spec.resourceClaims, - that are used by this container. - - This is an alpha field and requires enabling the - DynamicResourceAllocation feature gate. - - This field is immutable. It can only be set for containers. - items: - description: ResourceClaim references one - entry in PodSpec.ResourceClaims. - properties: - name: - description: |- - Name must match the name of one entry in pod.spec.resourceClaims of - the Pod where this field is used. It makes that resource available - inside a container. - type: string - request: - description: |- - Request is the name chosen for a request in the referenced claim. - If empty, everything from the claim is made available, otherwise - only the result of this request. - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - limits: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: |- - Limits describes the maximum amount of compute resources allowed. - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - type: object - requests: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: |- - Requests describes the minimum amount of compute resources required. - If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, - otherwise to an implementation-defined value. Requests cannot exceed Limits. - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - type: object - type: object - restartPolicy: - description: |- - RestartPolicy defines the restart behavior of individual containers in a pod. - This field may only be set for init containers, and the only allowed value is "Always". - For non-init containers or when this field is not specified, - the restart behavior is defined by the Pod's restart policy and the container type. - type: string - securityContext: - description: |- - SecurityContext defines the security options the container should be run with. - If set, the fields of SecurityContext override the equivalent fields of PodSecurityContext. - More info: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/ - properties: - allowPrivilegeEscalation: - description: |- - AllowPrivilegeEscalation controls whether a process can gain more - privileges than its parent process. This bool directly controls if - the no_new_privs flag will be set on the container process. - AllowPrivilegeEscalation is true always when the container is: - 1) run as Privileged - 2) has CAP_SYS_ADMIN - Note that this field cannot be set when spec.os.name is windows. - type: boolean - appArmorProfile: - description: |- - appArmorProfile is the AppArmor options to use by this container. If set, this profile - overrides the pod's appArmorProfile. - Note that this field cannot be set when spec.os.name is windows. - properties: - localhostProfile: - description: |- - localhostProfile indicates a profile loaded on the node that should be used. - The profile must be preconfigured on the node to work. - Must match the loaded name of the profile. - Must be set if and only if type is "Localhost". - type: string - type: - description: |- - type indicates which kind of AppArmor profile will be applied. - Valid options are: - Localhost - a profile pre-loaded on the node. - RuntimeDefault - the container runtime's default profile. - Unconfined - no AppArmor enforcement. - type: string - required: - - type - type: object - capabilities: - description: |- - The capabilities to add/drop when running containers. - Defaults to the default set of capabilities granted by the container runtime. - Note that this field cannot be set when spec.os.name is windows. - properties: - add: - description: Added capabilities - items: - description: Capability represent POSIX - capabilities type - type: string - type: array - x-kubernetes-list-type: atomic - drop: - description: Removed capabilities - items: - description: Capability represent POSIX - capabilities type - type: string - type: array - x-kubernetes-list-type: atomic - type: object - privileged: - description: |- - Run container in privileged mode. - Processes in privileged containers are essentially equivalent to root on the host. - Defaults to false. - Note that this field cannot be set when spec.os.name is windows. - type: boolean - procMount: - description: |- - procMount denotes the type of proc mount to use for the containers. - The default value is Default which uses the container runtime defaults for - readonly paths and masked paths. - This requires the ProcMountType feature flag to be enabled. - Note that this field cannot be set when spec.os.name is windows. - type: string - readOnlyRootFilesystem: - description: |- - Whether this container has a read-only root filesystem. - Default is false. - Note that this field cannot be set when spec.os.name is windows. - type: boolean - runAsGroup: - description: |- - The GID to run the entrypoint of the container process. - Uses runtime default if unset. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is windows. - format: int64 - type: integer - runAsNonRoot: - description: |- - Indicates that the container must run as a non-root user. - If true, the Kubelet will validate the image at runtime to ensure that it - does not run as UID 0 (root) and fail to start the container if it does. - If unset or false, no such validation will be performed. - May also be set in PodSecurityContext. - type: boolean - runAsUser: - description: |- - The UID to run the entrypoint of the container process. - Defaults to user specified in image metadata if unspecified. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is windows. - format: int64 - type: integer - seLinuxOptions: - description: |- - The SELinux context to be applied to the container. - If unspecified, the container runtime will allocate a random SELinux context for each - container. May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is windows. - properties: - level: - description: Level is SELinux level label - that applies to the container. - type: string - role: - description: Role is a SELinux role label - that applies to the container. - type: string - type: - description: Type is a SELinux type label - that applies to the container. - type: string - user: - description: User is a SELinux user label - that applies to the container. - type: string - type: object - seccompProfile: - description: |- - The seccomp options to use by this container. If seccomp options are - provided at both the pod & container level, the container options - override the pod options. - Note that this field cannot be set when spec.os.name is windows. - properties: - localhostProfile: - description: |- - localhostProfile indicates a profile defined in a file on the node should be used. - The profile must be preconfigured on the node to work. - Must be a descending path, relative to the kubelet's configured seccomp profile location. - Must be set if type is "Localhost". Must NOT be set for any other type. - type: string - type: - description: |- - type indicates which kind of seccomp profile will be applied. - Valid options are: - - Localhost - a profile defined in a file on the node should be used. - RuntimeDefault - the container runtime default profile should be used. - Unconfined - no profile should be applied. - type: string - required: - - type - type: object - windowsOptions: - description: |- - The Windows specific settings applied to all containers. - If unspecified, the options from the PodSecurityContext will be used. - If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is linux. - properties: - gmsaCredentialSpec: - description: |- - GMSACredentialSpec is where the GMSA admission webhook - (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the - GMSA credential spec named by the GMSACredentialSpecName field. - type: string - gmsaCredentialSpecName: - description: GMSACredentialSpecName is - the name of the GMSA credential spec - to use. - type: string - hostProcess: - description: |- - HostProcess determines if a container should be run as a 'Host Process' container. - All of a Pod's containers must have the same effective HostProcess value - (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers). - In addition, if HostProcess is true then HostNetwork must also be set to true. - type: boolean - runAsUserName: - description: |- - The UserName in Windows to run the entrypoint of the container process. - Defaults to the user specified in image metadata if unspecified. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - type: string - type: object - type: object - startupProbe: - description: |- - StartupProbe indicates that the Pod has successfully initialized. - If specified, no other probes are executed until this completes successfully. - If this probe fails, the Pod will be restarted, just as if the livenessProbe failed. - properties: - exec: - description: Exec specifies the action to - take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - failureThreshold: - description: |- - Minimum consecutive failures for the probe to be considered failed after having succeeded. - Defaults to 3. Minimum value is 1. - format: int32 - type: integer - grpc: - description: GRPC specifies an action involving - a GRPC port. - properties: - port: - description: Port number of the gRPC service. - Number must be in the range 1 to 65535. - format: int32 - type: integer - service: - default: "" - description: |- - Service is the name of the service to place in the gRPC HealthCheckRequest - (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - - If this is not specified, the default behavior is defined by gRPC. - type: string - required: - - port - type: object - httpGet: - description: HTTPGet specifies the http request - to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set in - the request. HTTP allows repeated headers. - items: - description: HTTPHeader describes a - custom header to be used in HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the HTTP - server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - initialDelaySeconds: - description: |- - Number of seconds after the container has started before liveness probes are initiated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - periodSeconds: - description: |- - How often (in seconds) to perform the probe. - Default to 10 seconds. Minimum value is 1. - format: int32 - type: integer - successThreshold: - description: |- - Minimum consecutive successes for the probe to be considered successful after having failed. - Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. - format: int32 - type: integer - tcpSocket: - description: TCPSocket specifies an action - involving a TCP port. - properties: - host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - description: |- - Optional duration in seconds the pod needs to terminate gracefully upon probe failure. - The grace period is the duration in seconds after the processes running in the pod are sent - a termination signal and the time when the processes are forcibly halted with a kill signal. - Set this value longer than the expected cleanup time for your process. - format: int64 - type: integer - timeoutSeconds: - description: |- - Number of seconds after which the probe times out. - Defaults to 1 second. Minimum value is 1. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - type: object - stdin: - description: |- - Whether this container should allocate a buffer for stdin in the container runtime. If this - is not set, reads from stdin in the container will always result in EOF. - Default is false. - type: boolean - stdinOnce: - description: |- - Whether the container runtime should close the stdin channel after it has been opened by - a single attach. When stdin is true the stdin stream will remain open across multiple attach - sessions. - type: boolean - terminationMessagePath: - description: |- - Optional: Path at which the file to which the container's termination message - will be written is mounted into the container's filesystem. - Message written is intended to be brief final status, such as an assertion failure message. - Will be truncated by the node if greater than 4096 bytes. The total message length across - all containers will be limited to 12kb. - Defaults to /dev/termination-log. - type: string - terminationMessagePolicy: - description: |- - Indicate how the termination message should be populated. File will use the contents of - terminationMessagePath to populate the container status message on both success and failure. - FallbackToLogsOnError will use the last chunk of container log output if the termination - message file is empty and the container exited with an error. - type: string - tty: - description: |- - Whether this container should allocate a TTY for itself, also requires 'stdin' to be true. - Default is false. - type: boolean - volumeDevices: - description: volumeDevices is the list of block - devices to be used by the container. - items: - description: volumeDevice describes a mapping - of a raw block device within a container. - properties: - devicePath: - description: devicePath is the path inside - of the container that the device will - be mapped to. - type: string - name: - description: name must match the name of - a persistentVolumeClaim in the pod - type: string - required: - - devicePath - - name - type: object - type: array - x-kubernetes-list-map-keys: - - devicePath - x-kubernetes-list-type: map - volumeMounts: - description: |- - Pod volumes to mount into the container's filesystem. - Cannot be updated. - items: - description: VolumeMount describes a mounting - of a Volume within a container. - properties: - mountPath: - description: |- - Path within the container at which the volume should be mounted. Must - not contain ':'. - type: string - mountPropagation: - description: |- - mountPropagation determines how mounts are propagated from the host - to container and the other way around. - When not set, MountPropagationNone is used. - This field is beta in 1.10. - When RecursiveReadOnly is set to IfPossible or to Enabled, MountPropagation must be None or unspecified - (which defaults to None). - type: string - name: - description: This must match the Name of - a Volume. - type: string - readOnly: - description: |- - Mounted read-only if true, read-write otherwise (false or unspecified). - Defaults to false. - type: boolean - recursiveReadOnly: - description: |- - RecursiveReadOnly specifies whether read-only mounts should be handled - recursively. - - If ReadOnly is false, this field has no meaning and must be unspecified. - - If ReadOnly is true, and this field is set to Disabled, the mount is not made - recursively read-only. If this field is set to IfPossible, the mount is made - recursively read-only, if it is supported by the container runtime. - type: string - subPath: - description: |- - Path within the volume from which the container's volume should be mounted. - Defaults to "" (volume's root). - type: string - subPathExpr: - description: |- - Expanded path within the volume from which the container's volume should be mounted. - Behaves similarly to SubPath but environment variable references $(VAR_NAME) are expanded using the container's environment. - Defaults to "" (volume's root). - SubPathExpr and SubPath are mutually exclusive. - type: string - required: - - mountPath - - name - type: object - type: array - x-kubernetes-list-map-keys: - - mountPath - x-kubernetes-list-type: map - workingDir: - description: |- - Container's working directory. - If not specified, the container runtime's default will be used, which - might be configured in the container image. - Cannot be updated. - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - dnsConfig: - description: |- - Specifies the DNS parameters of a pod. - Parameters specified here will be merged to the generated DNS - configuration based on DNSPolicy. - properties: - nameservers: - description: |- - A list of DNS name server IP addresses. - This will be appended to the base nameservers generated from DNSPolicy. - Duplicated nameservers will be removed. - items: - type: string - type: array - x-kubernetes-list-type: atomic - options: - description: |- - A list of DNS resolver options. - This will be merged with the base options generated from DNSPolicy. - Duplicated entries will be removed. Resolution options given in Options - will override those that appear in the base DNSPolicy. - items: - description: PodDNSConfigOption defines DNS resolver - options of a pod. - properties: - name: - description: Required. - type: string - value: - type: string - type: object - type: array - x-kubernetes-list-type: atomic - searches: - description: |- - A list of DNS search domains for host-name lookup. - This will be appended to the base search paths generated from DNSPolicy. - Duplicated search paths will be removed. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - dnsPolicy: - description: |- - Set DNS policy for the pod. - Defaults to "ClusterFirst". - Valid values are 'ClusterFirstWithHostNet', 'ClusterFirst', 'Default' or 'None'. - DNS parameters given in DNSConfig will be merged with the policy selected with DNSPolicy. - To have DNS options set along with hostNetwork, you have to specify DNS policy - explicitly to 'ClusterFirstWithHostNet'. - type: string - enableServiceLinks: - description: |- - EnableServiceLinks indicates whether information about services should be injected into pod's - environment variables, matching the syntax of Docker links. - Optional: Defaults to true. - type: boolean - ephemeralContainers: - description: |- - List of ephemeral containers run in this pod. Ephemeral containers may be run in an existing - pod to perform user-initiated actions such as debugging. This list cannot be specified when - creating a pod, and it cannot be modified by updating the pod spec. In order to add an - ephemeral container to an existing pod, use the pod's ephemeralcontainers subresource. - items: - description: |- - An EphemeralContainer is a temporary container that you may add to an existing Pod for - user-initiated activities such as debugging. Ephemeral containers have no resource or - scheduling guarantees, and they will not be restarted when they exit or when a Pod is - removed or restarted. The kubelet may evict a Pod if an ephemeral container causes the - Pod to exceed its resource allocation. - properties: - args: - description: |- - Arguments to the entrypoint. - The image's CMD is used if this is not provided. - Variable references $(VAR_NAME) are expanded using the container's environment. If a variable - cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced - to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. "$$(VAR_NAME)" will - produce the string literal "$(VAR_NAME)". - items: - type: string - type: array - x-kubernetes-list-type: atomic - command: - description: |- - Entrypoint array. Not executed within a shell. - The image's ENTRYPOINT is used if this is not provided. - Variable references $(VAR_NAME) are expanded using the container's environment. If a variable - cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced - to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. - items: - type: string - type: array - x-kubernetes-list-type: atomic - env: - description: |- - List of environment variables to set in the container. - Cannot be updated. - items: - description: EnvVar represents an environment - variable present in a Container. - properties: - name: - description: Name of the environment variable. - Must be a C_IDENTIFIER. - type: string - value: - description: |- - Variable references $(VAR_NAME) are expanded - using the previously defined environment variables in the container and - any service environment variables. If a variable cannot be resolved, - the reference in the input string will be unchanged. Double $$ are reduced - to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. - "$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)". - type: string - valueFrom: - description: Source for the environment - variable's value. Cannot be used if value - is not empty. - properties: - configMapKeyRef: - description: Selects a key of a ConfigMap. - properties: - key: - description: The key to select. - type: string - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the - ConfigMap or its key must be defined - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - fieldRef: - description: |- - Selects a field of the pod: supports metadata.name, metadata.namespace, `metadata.labels['']`, `metadata.annotations['']`, - spec.nodeName, spec.serviceAccountName, status.hostIP, status.podIP, status.podIPs. - properties: - apiVersion: - description: Version of the schema - the FieldPath is written in terms - of, defaults to "v1". - type: string - fieldPath: - description: Path of the field to - select in the specified API version. - type: string - required: - - fieldPath - type: object - x-kubernetes-map-type: atomic - resourceFieldRef: - description: |- - Selects a resource of the container: only resources limits and requests - (limits.cpu, limits.memory, limits.ephemeral-storage, requests.cpu, requests.memory and requests.ephemeral-storage) are currently supported. - properties: - containerName: - description: 'Container name: required - for volumes, optional for env - vars' - type: string - divisor: - anyOf: - - type: integer - - type: string - description: Specifies the output - format of the exposed resources, - defaults to "1" - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - resource: - description: 'Required: resource - to select' - type: string - required: - - resource - type: object - x-kubernetes-map-type: atomic - secretKeyRef: - description: Selects a key of a secret - in the pod's namespace - properties: - key: - description: The key of the secret - to select from. Must be a valid - secret key. - type: string - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the - Secret or its key must be defined - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - type: object - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - envFrom: - description: |- - List of sources to populate environment variables in the container. - The keys defined within a source must be a C_IDENTIFIER. All invalid keys - will be reported as an event when the container is starting. When a key exists in multiple - sources, the value associated with the last source will take precedence. - Values defined by an Env with a duplicate key will take precedence. - Cannot be updated. - items: - description: EnvFromSource represents the source - of a set of ConfigMaps - properties: - configMapRef: - description: The ConfigMap to select from - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the ConfigMap - must be defined - type: boolean - type: object - x-kubernetes-map-type: atomic - prefix: - description: An optional identifier to prepend - to each key in the ConfigMap. Must be - a C_IDENTIFIER. - type: string - secretRef: - description: The Secret to select from - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the Secret - must be defined - type: boolean - type: object - x-kubernetes-map-type: atomic - type: object - type: array - x-kubernetes-list-type: atomic - image: - description: |- - Container image name. - More info: https://kubernetes.io/docs/concepts/containers/images - type: string - imagePullPolicy: - description: |- - Image pull policy. - One of Always, Never, IfNotPresent. - Defaults to Always if :latest tag is specified, or IfNotPresent otherwise. - Cannot be updated. - More info: https://kubernetes.io/docs/concepts/containers/images#updating-images - type: string - lifecycle: - description: Lifecycle is not allowed for ephemeral - containers. - properties: - postStart: - description: |- - PostStart is called immediately after a container is created. If the handler fails, - the container is terminated and restarted according to its restart policy. - Other management of the container blocks until the hook completes. - More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks - properties: - exec: - description: Exec specifies the action - to take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - httpGet: - description: HTTPGet specifies the http - request to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set - in the request. HTTP allows repeated - headers. - items: - description: HTTPHeader describes - a custom header to be used in - HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field - value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the - HTTP server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - sleep: - description: Sleep represents the duration - that the container should sleep before - being terminated. - properties: - seconds: - description: Seconds is the number - of seconds to sleep. - format: int64 - type: integer - required: - - seconds - type: object - tcpSocket: - description: |- - Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept - for the backward compatibility. There are no validation of this field and - lifecycle hooks will fail in runtime when tcp handler is specified. - properties: - host: - description: 'Optional: Host name - to connect to, defaults to the pod - IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - type: object - preStop: - description: |- - PreStop is called immediately before a container is terminated due to an - API request or management event such as liveness/startup probe failure, - preemption, resource contention, etc. The handler is not called if the - container crashes or exits. The Pod's termination grace period countdown begins before the - PreStop hook is executed. - properties: - exec: - description: Exec specifies the action - to take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - httpGet: - description: HTTPGet specifies the http - request to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set - in the request. HTTP allows repeated - headers. - items: - description: HTTPHeader describes - a custom header to be used in - HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field - value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the - HTTP server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - sleep: - description: Sleep represents the duration - that the container should sleep before - being terminated. - properties: - seconds: - description: Seconds is the number - of seconds to sleep. - format: int64 - type: integer - required: - - seconds - type: object - tcpSocket: - description: |- - Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept - for the backward compatibility. There are no validation of this field and - lifecycle hooks will fail in runtime when tcp handler is specified. - properties: - host: - description: 'Optional: Host name - to connect to, defaults to the pod - IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - type: object - type: object - livenessProbe: - description: Probes are not allowed for ephemeral - containers. - properties: - exec: - description: Exec specifies the action to - take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - failureThreshold: - description: |- - Minimum consecutive failures for the probe to be considered failed after having succeeded. - Defaults to 3. Minimum value is 1. - format: int32 - type: integer - grpc: - description: GRPC specifies an action involving - a GRPC port. - properties: - port: - description: Port number of the gRPC service. - Number must be in the range 1 to 65535. - format: int32 - type: integer - service: - default: "" - description: |- - Service is the name of the service to place in the gRPC HealthCheckRequest - (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - - If this is not specified, the default behavior is defined by gRPC. - type: string - required: - - port - type: object - httpGet: - description: HTTPGet specifies the http request - to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set in - the request. HTTP allows repeated headers. - items: - description: HTTPHeader describes a - custom header to be used in HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the HTTP - server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - initialDelaySeconds: - description: |- - Number of seconds after the container has started before liveness probes are initiated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - periodSeconds: - description: |- - How often (in seconds) to perform the probe. - Default to 10 seconds. Minimum value is 1. - format: int32 - type: integer - successThreshold: - description: |- - Minimum consecutive successes for the probe to be considered successful after having failed. - Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. - format: int32 - type: integer - tcpSocket: - description: TCPSocket specifies an action - involving a TCP port. - properties: - host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - description: |- - Optional duration in seconds the pod needs to terminate gracefully upon probe failure. - The grace period is the duration in seconds after the processes running in the pod are sent - a termination signal and the time when the processes are forcibly halted with a kill signal. - Set this value longer than the expected cleanup time for your process. - format: int64 - type: integer - timeoutSeconds: - description: |- - Number of seconds after which the probe times out. - Defaults to 1 second. Minimum value is 1. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - type: object - name: - description: |- - Name of the ephemeral container specified as a DNS_LABEL. - This name must be unique among all containers, init containers and ephemeral containers. - type: string - ports: - description: Ports are not allowed for ephemeral - containers. - items: - description: ContainerPort represents a network - port in a single container. - properties: - containerPort: - description: |- - Number of port to expose on the pod's IP address. - This must be a valid port number, 0 < x < 65536. - format: int32 - type: integer - hostIP: - description: What host IP to bind the external - port to. - type: string - hostPort: - description: |- - Number of port to expose on the host. - If specified, this must be a valid port number, 0 < x < 65536. - If HostNetwork is specified, this must match ContainerPort. - Most containers do not need this. - format: int32 - type: integer - name: - description: |- - If specified, this must be an IANA_SVC_NAME and unique within the pod. Each - named port in a pod must have a unique name. Name for the port that can be - referred to by services. - type: string - protocol: - default: TCP - description: |- - Protocol for port. Must be UDP, TCP, or SCTP. - Defaults to "TCP". - type: string - required: - - containerPort - type: object - type: array - x-kubernetes-list-map-keys: - - containerPort - - protocol - x-kubernetes-list-type: map - readinessProbe: - description: Probes are not allowed for ephemeral - containers. - properties: - exec: - description: Exec specifies the action to - take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - failureThreshold: - description: |- - Minimum consecutive failures for the probe to be considered failed after having succeeded. - Defaults to 3. Minimum value is 1. - format: int32 - type: integer - grpc: - description: GRPC specifies an action involving - a GRPC port. - properties: - port: - description: Port number of the gRPC service. - Number must be in the range 1 to 65535. - format: int32 - type: integer - service: - default: "" - description: |- - Service is the name of the service to place in the gRPC HealthCheckRequest - (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - - If this is not specified, the default behavior is defined by gRPC. - type: string - required: - - port - type: object - httpGet: - description: HTTPGet specifies the http request - to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set in - the request. HTTP allows repeated headers. - items: - description: HTTPHeader describes a - custom header to be used in HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the HTTP - server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - initialDelaySeconds: - description: |- - Number of seconds after the container has started before liveness probes are initiated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - periodSeconds: - description: |- - How often (in seconds) to perform the probe. - Default to 10 seconds. Minimum value is 1. - format: int32 - type: integer - successThreshold: - description: |- - Minimum consecutive successes for the probe to be considered successful after having failed. - Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. - format: int32 - type: integer - tcpSocket: - description: TCPSocket specifies an action - involving a TCP port. - properties: - host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - description: |- - Optional duration in seconds the pod needs to terminate gracefully upon probe failure. - The grace period is the duration in seconds after the processes running in the pod are sent - a termination signal and the time when the processes are forcibly halted with a kill signal. - Set this value longer than the expected cleanup time for your process. - format: int64 - type: integer - timeoutSeconds: - description: |- - Number of seconds after which the probe times out. - Defaults to 1 second. Minimum value is 1. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - type: object - resizePolicy: - description: Resources resize policy for the container. - items: - description: ContainerResizePolicy represents - resource resize policy for the container. - properties: - resourceName: - description: |- - Name of the resource to which this resource resize policy applies. - Supported values: cpu, memory. - type: string - restartPolicy: - description: |- - Restart policy to apply when specified resource is resized. - If not specified, it defaults to NotRequired. - type: string - required: - - resourceName - - restartPolicy - type: object - type: array - x-kubernetes-list-type: atomic - resources: - description: |- - Resources are not allowed for ephemeral containers. Ephemeral containers use spare resources - already allocated to the pod. - properties: - claims: - description: |- - Claims lists the names of resources, defined in spec.resourceClaims, - that are used by this container. - - This is an alpha field and requires enabling the - DynamicResourceAllocation feature gate. - - This field is immutable. It can only be set for containers. - items: - description: ResourceClaim references one - entry in PodSpec.ResourceClaims. - properties: - name: - description: |- - Name must match the name of one entry in pod.spec.resourceClaims of - the Pod where this field is used. It makes that resource available - inside a container. - type: string - request: - description: |- - Request is the name chosen for a request in the referenced claim. - If empty, everything from the claim is made available, otherwise - only the result of this request. - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - limits: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: |- - Limits describes the maximum amount of compute resources allowed. - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - type: object - requests: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: |- - Requests describes the minimum amount of compute resources required. - If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, - otherwise to an implementation-defined value. Requests cannot exceed Limits. - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - type: object - type: object - restartPolicy: - description: |- - Restart policy for the container to manage the restart behavior of each - container within a pod. - This may only be set for init containers. You cannot set this field on - ephemeral containers. - type: string - securityContext: - description: |- - Optional: SecurityContext defines the security options the ephemeral container should be run with. - If set, the fields of SecurityContext override the equivalent fields of PodSecurityContext. - properties: - allowPrivilegeEscalation: - description: |- - AllowPrivilegeEscalation controls whether a process can gain more - privileges than its parent process. This bool directly controls if - the no_new_privs flag will be set on the container process. - AllowPrivilegeEscalation is true always when the container is: - 1) run as Privileged - 2) has CAP_SYS_ADMIN - Note that this field cannot be set when spec.os.name is windows. - type: boolean - appArmorProfile: - description: |- - appArmorProfile is the AppArmor options to use by this container. If set, this profile - overrides the pod's appArmorProfile. - Note that this field cannot be set when spec.os.name is windows. - properties: - localhostProfile: - description: |- - localhostProfile indicates a profile loaded on the node that should be used. - The profile must be preconfigured on the node to work. - Must match the loaded name of the profile. - Must be set if and only if type is "Localhost". - type: string - type: - description: |- - type indicates which kind of AppArmor profile will be applied. - Valid options are: - Localhost - a profile pre-loaded on the node. - RuntimeDefault - the container runtime's default profile. - Unconfined - no AppArmor enforcement. - type: string - required: - - type - type: object - capabilities: - description: |- - The capabilities to add/drop when running containers. - Defaults to the default set of capabilities granted by the container runtime. - Note that this field cannot be set when spec.os.name is windows. - properties: - add: - description: Added capabilities - items: - description: Capability represent POSIX - capabilities type - type: string - type: array - x-kubernetes-list-type: atomic - drop: - description: Removed capabilities - items: - description: Capability represent POSIX - capabilities type - type: string - type: array - x-kubernetes-list-type: atomic - type: object - privileged: - description: |- - Run container in privileged mode. - Processes in privileged containers are essentially equivalent to root on the host. - Defaults to false. - Note that this field cannot be set when spec.os.name is windows. - type: boolean - procMount: - description: |- - procMount denotes the type of proc mount to use for the containers. - The default value is Default which uses the container runtime defaults for - readonly paths and masked paths. - This requires the ProcMountType feature flag to be enabled. - Note that this field cannot be set when spec.os.name is windows. - type: string - readOnlyRootFilesystem: - description: |- - Whether this container has a read-only root filesystem. - Default is false. - Note that this field cannot be set when spec.os.name is windows. - type: boolean - runAsGroup: - description: |- - The GID to run the entrypoint of the container process. - Uses runtime default if unset. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is windows. - format: int64 - type: integer - runAsNonRoot: - description: |- - Indicates that the container must run as a non-root user. - If true, the Kubelet will validate the image at runtime to ensure that it - does not run as UID 0 (root) and fail to start the container if it does. - If unset or false, no such validation will be performed. - May also be set in PodSecurityContext. - type: boolean - runAsUser: - description: |- - The UID to run the entrypoint of the container process. - Defaults to user specified in image metadata if unspecified. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is windows. - format: int64 - type: integer - seLinuxOptions: - description: |- - The SELinux context to be applied to the container. - If unspecified, the container runtime will allocate a random SELinux context for each - container. May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is windows. - properties: - level: - description: Level is SELinux level label - that applies to the container. - type: string - role: - description: Role is a SELinux role label - that applies to the container. - type: string - type: - description: Type is a SELinux type label - that applies to the container. - type: string - user: - description: User is a SELinux user label - that applies to the container. - type: string - type: object - seccompProfile: - description: |- - The seccomp options to use by this container. If seccomp options are - provided at both the pod & container level, the container options - override the pod options. - Note that this field cannot be set when spec.os.name is windows. - properties: - localhostProfile: - description: |- - localhostProfile indicates a profile defined in a file on the node should be used. - The profile must be preconfigured on the node to work. - Must be a descending path, relative to the kubelet's configured seccomp profile location. - Must be set if type is "Localhost". Must NOT be set for any other type. - type: string - type: - description: |- - type indicates which kind of seccomp profile will be applied. - Valid options are: - - Localhost - a profile defined in a file on the node should be used. - RuntimeDefault - the container runtime default profile should be used. - Unconfined - no profile should be applied. - type: string - required: - - type - type: object - windowsOptions: - description: |- - The Windows specific settings applied to all containers. - If unspecified, the options from the PodSecurityContext will be used. - If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is linux. - properties: - gmsaCredentialSpec: - description: |- - GMSACredentialSpec is where the GMSA admission webhook - (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the - GMSA credential spec named by the GMSACredentialSpecName field. - type: string - gmsaCredentialSpecName: - description: GMSACredentialSpecName is - the name of the GMSA credential spec - to use. - type: string - hostProcess: - description: |- - HostProcess determines if a container should be run as a 'Host Process' container. - All of a Pod's containers must have the same effective HostProcess value - (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers). - In addition, if HostProcess is true then HostNetwork must also be set to true. - type: boolean - runAsUserName: - description: |- - The UserName in Windows to run the entrypoint of the container process. - Defaults to the user specified in image metadata if unspecified. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - type: string - type: object - type: object - startupProbe: - description: Probes are not allowed for ephemeral - containers. - properties: - exec: - description: Exec specifies the action to - take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - failureThreshold: - description: |- - Minimum consecutive failures for the probe to be considered failed after having succeeded. - Defaults to 3. Minimum value is 1. - format: int32 - type: integer - grpc: - description: GRPC specifies an action involving - a GRPC port. - properties: - port: - description: Port number of the gRPC service. - Number must be in the range 1 to 65535. - format: int32 - type: integer - service: - default: "" - description: |- - Service is the name of the service to place in the gRPC HealthCheckRequest - (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - - If this is not specified, the default behavior is defined by gRPC. - type: string - required: - - port - type: object - httpGet: - description: HTTPGet specifies the http request - to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set in - the request. HTTP allows repeated headers. - items: - description: HTTPHeader describes a - custom header to be used in HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the HTTP - server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - initialDelaySeconds: - description: |- - Number of seconds after the container has started before liveness probes are initiated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - periodSeconds: - description: |- - How often (in seconds) to perform the probe. - Default to 10 seconds. Minimum value is 1. - format: int32 - type: integer - successThreshold: - description: |- - Minimum consecutive successes for the probe to be considered successful after having failed. - Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. - format: int32 - type: integer - tcpSocket: - description: TCPSocket specifies an action - involving a TCP port. - properties: - host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - description: |- - Optional duration in seconds the pod needs to terminate gracefully upon probe failure. - The grace period is the duration in seconds after the processes running in the pod are sent - a termination signal and the time when the processes are forcibly halted with a kill signal. - Set this value longer than the expected cleanup time for your process. - format: int64 - type: integer - timeoutSeconds: - description: |- - Number of seconds after which the probe times out. - Defaults to 1 second. Minimum value is 1. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - type: object - stdin: - description: |- - Whether this container should allocate a buffer for stdin in the container runtime. If this - is not set, reads from stdin in the container will always result in EOF. - Default is false. - type: boolean - stdinOnce: - description: |- - Whether the container runtime should close the stdin channel after it has been opened by - a single attach. When stdin is true the stdin stream will remain open across multiple attach - sessions. - type: boolean - targetContainerName: - description: |- - If set, the name of the container from PodSpec that this ephemeral container targets. - The ephemeral container will be run in the namespaces (IPC, PID, etc) of this container. - If not set then the ephemeral container uses the namespaces configured in the Pod spec. - - The container runtime must implement support for this feature. - type: string - terminationMessagePath: - description: |- - Optional: Path at which the file to which the container's termination message - will be written is mounted into the container's filesystem. - Message written is intended to be brief final status, such as an assertion failure message. - Will be truncated by the node if greater than 4096 bytes. The total message length across - all containers will be limited to 12kb. - Defaults to /dev/termination-log. - type: string - terminationMessagePolicy: - description: |- - Indicate how the termination message should be populated. File will use the contents of - terminationMessagePath to populate the container status message on both success and failure. - FallbackToLogsOnError will use the last chunk of container log output if the termination - message file is empty and the container exited with an error. - type: string - tty: - description: |- - Whether this container should allocate a TTY for itself, also requires 'stdin' to be true. - Default is false. - type: boolean - volumeDevices: - description: volumeDevices is the list of block - devices to be used by the container. - items: - description: volumeDevice describes a mapping - of a raw block device within a container. - properties: - devicePath: - description: devicePath is the path inside - of the container that the device will - be mapped to. - type: string - name: - description: name must match the name of - a persistentVolumeClaim in the pod - type: string - required: - - devicePath - - name - type: object - type: array - x-kubernetes-list-map-keys: - - devicePath - x-kubernetes-list-type: map - volumeMounts: - description: |- - Pod volumes to mount into the container's filesystem. Subpath mounts are not allowed for ephemeral containers. - Cannot be updated. - items: - description: VolumeMount describes a mounting - of a Volume within a container. - properties: - mountPath: - description: |- - Path within the container at which the volume should be mounted. Must - not contain ':'. - type: string - mountPropagation: - description: |- - mountPropagation determines how mounts are propagated from the host - to container and the other way around. - When not set, MountPropagationNone is used. - This field is beta in 1.10. - When RecursiveReadOnly is set to IfPossible or to Enabled, MountPropagation must be None or unspecified - (which defaults to None). - type: string - name: - description: This must match the Name of - a Volume. - type: string - readOnly: - description: |- - Mounted read-only if true, read-write otherwise (false or unspecified). - Defaults to false. - type: boolean - recursiveReadOnly: - description: |- - RecursiveReadOnly specifies whether read-only mounts should be handled - recursively. - - If ReadOnly is false, this field has no meaning and must be unspecified. - - If ReadOnly is true, and this field is set to Disabled, the mount is not made - recursively read-only. If this field is set to IfPossible, the mount is made - recursively read-only, if it is supported by the container runtime. - type: string - subPath: - description: |- - Path within the volume from which the container's volume should be mounted. - Defaults to "" (volume's root). - type: string - subPathExpr: - description: |- - Expanded path within the volume from which the container's volume should be mounted. - Behaves similarly to SubPath but environment variable references $(VAR_NAME) are expanded using the container's environment. - Defaults to "" (volume's root). - SubPathExpr and SubPath are mutually exclusive. - type: string - required: - - mountPath - - name - type: object - type: array - x-kubernetes-list-map-keys: - - mountPath - x-kubernetes-list-type: map - workingDir: - description: |- - Container's working directory. - If not specified, the container runtime's default will be used, which - might be configured in the container image. - Cannot be updated. - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - hostAliases: - description: |- - HostAliases is an optional list of hosts and IPs that will be injected into the pod's hosts - file if specified. - items: - description: |- - HostAlias holds the mapping between IP and hostnames that will be injected as an entry in the - pod's hosts file. - properties: - hostnames: - description: Hostnames for the above IP address. - items: - type: string - type: array - x-kubernetes-list-type: atomic - ip: - description: IP address of the host file entry. - type: string - required: - - ip - type: object - type: array - x-kubernetes-list-map-keys: - - ip - x-kubernetes-list-type: map - hostIPC: - description: |- - Use the host's ipc namespace. - Optional: Default to false. - type: boolean - hostNetwork: - description: |- - Host networking requested for this pod. Use the host's network namespace. - If this option is set, the ports that will be used must be specified. - Default to false. - type: boolean - hostPID: - description: |- - Use the host's pid namespace. - Optional: Default to false. - type: boolean - hostUsers: - description: |- - Use the host's user namespace. - Optional: Default to true. - If set to true or not present, the pod will be run in the host user namespace, useful - for when the pod needs a feature only available to the host user namespace, such as - loading a kernel module with CAP_SYS_MODULE. - When set to false, a new userns is created for the pod. - type: boolean - hostname: - description: |- - Specifies the hostname of the Pod - If not specified, the pod's hostname will be set to a system-defined value. - type: string - imagePullSecrets: - description: |- - ImagePullSecrets is an optional list of references to secrets in the same namespace to use for pulling any of the images used by this PodSpec. - If specified, these secrets will be passed to individual puller implementations for them to use. - More info: https://kubernetes.io/docs/concepts/containers/images#specifying-imagepullsecrets-on-a-pod - items: - description: |- - LocalObjectReference contains enough information to let you locate the - referenced object inside the same namespace. - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - type: object - x-kubernetes-map-type: atomic - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - initContainers: - description: |- - List of initialization containers belonging to the pod. - Init containers are executed in order prior to containers being started. If any - init container fails, the pod is considered to have failed and is handled according - to its restartPolicy. The name for an init container or normal container must be - unique among all containers. - items: - description: A single application container that you - want to run within a pod. - properties: - args: - description: |- - Arguments to the entrypoint. - The container image's CMD is used if this is not provided. - Variable references $(VAR_NAME) are expanded using the container's environment. If a variable - cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced - to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. - items: - type: string - type: array - x-kubernetes-list-type: atomic - command: - description: |- - Entrypoint array. Not executed within a shell. - The container image's ENTRYPOINT is used if this is not provided. - Variable references $(VAR_NAME) are expanded using the container's environment. If a variable - cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced - to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. - items: - type: string - type: array - x-kubernetes-list-type: atomic - env: - description: |- - List of environment variables to set in the container. - Cannot be updated. - items: - description: EnvVar represents an environment - variable present in a Container. - properties: - name: - description: Name of the environment variable. - Must be a C_IDENTIFIER. - type: string - value: - description: |- - Variable references $(VAR_NAME) are expanded - using the previously defined environment variables in the container and - any service environment variables. If a variable cannot be resolved, - the reference in the input string will be unchanged. Double $$ are reduced - to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. - "$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)". - type: string - valueFrom: - description: Source for the environment - variable's value. Cannot be used if value - is not empty. - properties: - configMapKeyRef: - description: Selects a key of a ConfigMap. - properties: - key: - description: The key to select. - type: string - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the - ConfigMap or its key must be defined - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - fieldRef: - description: |- - Selects a field of the pod: supports metadata.name, metadata.namespace, `metadata.labels['']`, `metadata.annotations['']`, - spec.nodeName, spec.serviceAccountName, status.hostIP, status.podIP, status.podIPs. - properties: - apiVersion: - description: Version of the schema - the FieldPath is written in terms - of, defaults to "v1". - type: string - fieldPath: - description: Path of the field to - select in the specified API version. - type: string - required: - - fieldPath - type: object - x-kubernetes-map-type: atomic - resourceFieldRef: - description: |- - Selects a resource of the container: only resources limits and requests - (limits.cpu, limits.memory, limits.ephemeral-storage, requests.cpu, requests.memory and requests.ephemeral-storage) are currently supported. - properties: - containerName: - description: 'Container name: required - for volumes, optional for env - vars' - type: string - divisor: - anyOf: - - type: integer - - type: string - description: Specifies the output - format of the exposed resources, - defaults to "1" - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - resource: - description: 'Required: resource - to select' - type: string - required: - - resource - type: object - x-kubernetes-map-type: atomic - secretKeyRef: - description: Selects a key of a secret - in the pod's namespace - properties: - key: - description: The key of the secret - to select from. Must be a valid - secret key. - type: string - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the - Secret or its key must be defined - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - type: object - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - envFrom: - description: |- - List of sources to populate environment variables in the container. - The keys defined within a source must be a C_IDENTIFIER. All invalid keys - will be reported as an event when the container is starting. When a key exists in multiple - sources, the value associated with the last source will take precedence. - Values defined by an Env with a duplicate key will take precedence. - Cannot be updated. - items: - description: EnvFromSource represents the source - of a set of ConfigMaps - properties: - configMapRef: - description: The ConfigMap to select from - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the ConfigMap - must be defined - type: boolean - type: object - x-kubernetes-map-type: atomic - prefix: - description: An optional identifier to prepend - to each key in the ConfigMap. Must be - a C_IDENTIFIER. - type: string - secretRef: - description: The Secret to select from - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the Secret - must be defined - type: boolean - type: object - x-kubernetes-map-type: atomic - type: object - type: array - x-kubernetes-list-type: atomic - image: - description: |- - Container image name. - More info: https://kubernetes.io/docs/concepts/containers/images - This field is optional to allow higher level config management to default or override - container images in workload controllers like Deployments and StatefulSets. - type: string - imagePullPolicy: - description: |- - Image pull policy. - One of Always, Never, IfNotPresent. - Defaults to Always if :latest tag is specified, or IfNotPresent otherwise. - Cannot be updated. - More info: https://kubernetes.io/docs/concepts/containers/images#updating-images - type: string - lifecycle: - description: |- - Actions that the management system should take in response to container lifecycle events. - Cannot be updated. - properties: - postStart: - description: |- - PostStart is called immediately after a container is created. If the handler fails, - the container is terminated and restarted according to its restart policy. - Other management of the container blocks until the hook completes. - More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks - properties: - exec: - description: Exec specifies the action - to take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - httpGet: - description: HTTPGet specifies the http - request to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set - in the request. HTTP allows repeated - headers. - items: - description: HTTPHeader describes - a custom header to be used in - HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field - value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the - HTTP server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - sleep: - description: Sleep represents the duration - that the container should sleep before - being terminated. - properties: - seconds: - description: Seconds is the number - of seconds to sleep. - format: int64 - type: integer - required: - - seconds - type: object - tcpSocket: - description: |- - Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept - for the backward compatibility. There are no validation of this field and - lifecycle hooks will fail in runtime when tcp handler is specified. - properties: - host: - description: 'Optional: Host name - to connect to, defaults to the pod - IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - type: object - preStop: - description: |- - PreStop is called immediately before a container is terminated due to an - API request or management event such as liveness/startup probe failure, - preemption, resource contention, etc. The handler is not called if the - container crashes or exits. The Pod's termination grace period countdown begins before the - PreStop hook is executed. - properties: - exec: - description: Exec specifies the action - to take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - httpGet: - description: HTTPGet specifies the http - request to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set - in the request. HTTP allows repeated - headers. - items: - description: HTTPHeader describes - a custom header to be used in - HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field - value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the - HTTP server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - sleep: - description: Sleep represents the duration - that the container should sleep before - being terminated. - properties: - seconds: - description: Seconds is the number - of seconds to sleep. - format: int64 - type: integer - required: - - seconds - type: object - tcpSocket: - description: |- - Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept - for the backward compatibility. There are no validation of this field and - lifecycle hooks will fail in runtime when tcp handler is specified. - properties: - host: - description: 'Optional: Host name - to connect to, defaults to the pod - IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - type: object - type: object - livenessProbe: - description: |- - Periodic probe of container liveness. - Container will be restarted if the probe fails. - Cannot be updated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - properties: - exec: - description: Exec specifies the action to - take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - failureThreshold: - description: |- - Minimum consecutive failures for the probe to be considered failed after having succeeded. - Defaults to 3. Minimum value is 1. - format: int32 - type: integer - grpc: - description: GRPC specifies an action involving - a GRPC port. - properties: - port: - description: Port number of the gRPC service. - Number must be in the range 1 to 65535. - format: int32 - type: integer - service: - default: "" - description: |- - Service is the name of the service to place in the gRPC HealthCheckRequest - (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - - If this is not specified, the default behavior is defined by gRPC. - type: string - required: - - port - type: object - httpGet: - description: HTTPGet specifies the http request - to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set in - the request. HTTP allows repeated headers. - items: - description: HTTPHeader describes a - custom header to be used in HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the HTTP - server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - initialDelaySeconds: - description: |- - Number of seconds after the container has started before liveness probes are initiated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - periodSeconds: - description: |- - How often (in seconds) to perform the probe. - Default to 10 seconds. Minimum value is 1. - format: int32 - type: integer - successThreshold: - description: |- - Minimum consecutive successes for the probe to be considered successful after having failed. - Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. - format: int32 - type: integer - tcpSocket: - description: TCPSocket specifies an action - involving a TCP port. - properties: - host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - description: |- - Optional duration in seconds the pod needs to terminate gracefully upon probe failure. - The grace period is the duration in seconds after the processes running in the pod are sent - a termination signal and the time when the processes are forcibly halted with a kill signal. - Set this value longer than the expected cleanup time for your process. - format: int64 - type: integer - timeoutSeconds: - description: |- - Number of seconds after which the probe times out. - Defaults to 1 second. Minimum value is 1. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - type: object - name: - description: |- - Name of the container specified as a DNS_LABEL. - Each container in a pod must have a unique name (DNS_LABEL). - Cannot be updated. - type: string - ports: - description: |- - List of ports to expose from the container. Not specifying a port here - DOES NOT prevent that port from being exposed. Any port which is - listening on the default "0.0.0.0" address inside a container will be - accessible from the network. - Modifying this array with strategic merge patch may corrupt the data. - For more information See https://github.com/kubernetes/kubernetes/issues/108255. - items: - description: ContainerPort represents a network - port in a single container. - properties: - containerPort: - description: |- - Number of port to expose on the pod's IP address. - This must be a valid port number, 0 < x < 65536. - format: int32 - type: integer - hostIP: - description: What host IP to bind the external - port to. - type: string - hostPort: - description: |- - Number of port to expose on the host. - If specified, this must be a valid port number, 0 < x < 65536. - If HostNetwork is specified, this must match ContainerPort. - Most containers do not need this. - format: int32 - type: integer - name: - description: |- - If specified, this must be an IANA_SVC_NAME and unique within the pod. Each - named port in a pod must have a unique name. Name for the port that can be - referred to by services. - type: string - protocol: - default: TCP - description: |- - Protocol for port. Must be UDP, TCP, or SCTP. - Defaults to "TCP". - type: string - required: - - containerPort - type: object - type: array - x-kubernetes-list-map-keys: - - containerPort - - protocol - x-kubernetes-list-type: map - readinessProbe: - description: |- - Periodic probe of container service readiness. - Container will be removed from service endpoints if the probe fails. - Cannot be updated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - properties: - exec: - description: Exec specifies the action to - take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - failureThreshold: - description: |- - Minimum consecutive failures for the probe to be considered failed after having succeeded. - Defaults to 3. Minimum value is 1. - format: int32 - type: integer - grpc: - description: GRPC specifies an action involving - a GRPC port. - properties: - port: - description: Port number of the gRPC service. - Number must be in the range 1 to 65535. - format: int32 - type: integer - service: - default: "" - description: |- - Service is the name of the service to place in the gRPC HealthCheckRequest - (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - - If this is not specified, the default behavior is defined by gRPC. - type: string - required: - - port - type: object - httpGet: - description: HTTPGet specifies the http request - to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set in - the request. HTTP allows repeated headers. - items: - description: HTTPHeader describes a - custom header to be used in HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the HTTP - server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - initialDelaySeconds: - description: |- - Number of seconds after the container has started before liveness probes are initiated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - periodSeconds: - description: |- - How often (in seconds) to perform the probe. - Default to 10 seconds. Minimum value is 1. - format: int32 - type: integer - successThreshold: - description: |- - Minimum consecutive successes for the probe to be considered successful after having failed. - Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. - format: int32 - type: integer - tcpSocket: - description: TCPSocket specifies an action - involving a TCP port. - properties: - host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - description: |- - Optional duration in seconds the pod needs to terminate gracefully upon probe failure. - The grace period is the duration in seconds after the processes running in the pod are sent - a termination signal and the time when the processes are forcibly halted with a kill signal. - Set this value longer than the expected cleanup time for your process. - format: int64 - type: integer - timeoutSeconds: - description: |- - Number of seconds after which the probe times out. - Defaults to 1 second. Minimum value is 1. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - type: object - resizePolicy: - description: Resources resize policy for the container. - items: - description: ContainerResizePolicy represents - resource resize policy for the container. - properties: - resourceName: - description: |- - Name of the resource to which this resource resize policy applies. - Supported values: cpu, memory. - type: string - restartPolicy: - description: |- - Restart policy to apply when specified resource is resized. - If not specified, it defaults to NotRequired. - type: string - required: - - resourceName - - restartPolicy - type: object - type: array - x-kubernetes-list-type: atomic - resources: - description: |- - Compute Resources required by this container. - Cannot be updated. - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - properties: - claims: - description: |- - Claims lists the names of resources, defined in spec.resourceClaims, - that are used by this container. - - This is an alpha field and requires enabling the - DynamicResourceAllocation feature gate. - - This field is immutable. It can only be set for containers. - items: - description: ResourceClaim references one - entry in PodSpec.ResourceClaims. - properties: - name: - description: |- - Name must match the name of one entry in pod.spec.resourceClaims of - the Pod where this field is used. It makes that resource available - inside a container. - type: string - request: - description: |- - Request is the name chosen for a request in the referenced claim. - If empty, everything from the claim is made available, otherwise - only the result of this request. - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - limits: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: |- - Limits describes the maximum amount of compute resources allowed. - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - type: object - requests: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: |- - Requests describes the minimum amount of compute resources required. - If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, - otherwise to an implementation-defined value. Requests cannot exceed Limits. - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - type: object - type: object - restartPolicy: - description: |- - RestartPolicy defines the restart behavior of individual containers in a pod. - This field may only be set for init containers, and the only allowed value is "Always". - For non-init containers or when this field is not specified, - the restart behavior is defined by the Pod's restart policy and the container type. - type: string - securityContext: - description: |- - SecurityContext defines the security options the container should be run with. - If set, the fields of SecurityContext override the equivalent fields of PodSecurityContext. - More info: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/ - properties: - allowPrivilegeEscalation: - description: |- - AllowPrivilegeEscalation controls whether a process can gain more - privileges than its parent process. This bool directly controls if - the no_new_privs flag will be set on the container process. - AllowPrivilegeEscalation is true always when the container is: - 1) run as Privileged - 2) has CAP_SYS_ADMIN - Note that this field cannot be set when spec.os.name is windows. - type: boolean - appArmorProfile: - description: |- - appArmorProfile is the AppArmor options to use by this container. If set, this profile - overrides the pod's appArmorProfile. - Note that this field cannot be set when spec.os.name is windows. - properties: - localhostProfile: - description: |- - localhostProfile indicates a profile loaded on the node that should be used. - The profile must be preconfigured on the node to work. - Must match the loaded name of the profile. - Must be set if and only if type is "Localhost". - type: string - type: - description: |- - type indicates which kind of AppArmor profile will be applied. - Valid options are: - Localhost - a profile pre-loaded on the node. - RuntimeDefault - the container runtime's default profile. - Unconfined - no AppArmor enforcement. - type: string - required: - - type - type: object - capabilities: - description: |- - The capabilities to add/drop when running containers. - Defaults to the default set of capabilities granted by the container runtime. - Note that this field cannot be set when spec.os.name is windows. - properties: - add: - description: Added capabilities - items: - description: Capability represent POSIX - capabilities type - type: string - type: array - x-kubernetes-list-type: atomic - drop: - description: Removed capabilities - items: - description: Capability represent POSIX - capabilities type - type: string - type: array - x-kubernetes-list-type: atomic - type: object - privileged: - description: |- - Run container in privileged mode. - Processes in privileged containers are essentially equivalent to root on the host. - Defaults to false. - Note that this field cannot be set when spec.os.name is windows. - type: boolean - procMount: - description: |- - procMount denotes the type of proc mount to use for the containers. - The default value is Default which uses the container runtime defaults for - readonly paths and masked paths. - This requires the ProcMountType feature flag to be enabled. - Note that this field cannot be set when spec.os.name is windows. - type: string - readOnlyRootFilesystem: - description: |- - Whether this container has a read-only root filesystem. - Default is false. - Note that this field cannot be set when spec.os.name is windows. - type: boolean - runAsGroup: - description: |- - The GID to run the entrypoint of the container process. - Uses runtime default if unset. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is windows. - format: int64 - type: integer - runAsNonRoot: - description: |- - Indicates that the container must run as a non-root user. - If true, the Kubelet will validate the image at runtime to ensure that it - does not run as UID 0 (root) and fail to start the container if it does. - If unset or false, no such validation will be performed. - May also be set in PodSecurityContext. - type: boolean - runAsUser: - description: |- - The UID to run the entrypoint of the container process. - Defaults to user specified in image metadata if unspecified. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is windows. - format: int64 - type: integer - seLinuxOptions: - description: |- - The SELinux context to be applied to the container. - If unspecified, the container runtime will allocate a random SELinux context for each - container. May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is windows. - properties: - level: - description: Level is SELinux level label - that applies to the container. - type: string - role: - description: Role is a SELinux role label - that applies to the container. - type: string - type: - description: Type is a SELinux type label - that applies to the container. - type: string - user: - description: User is a SELinux user label - that applies to the container. - type: string - type: object - seccompProfile: - description: |- - The seccomp options to use by this container. If seccomp options are - provided at both the pod & container level, the container options - override the pod options. - Note that this field cannot be set when spec.os.name is windows. - properties: - localhostProfile: - description: |- - localhostProfile indicates a profile defined in a file on the node should be used. - The profile must be preconfigured on the node to work. - Must be a descending path, relative to the kubelet's configured seccomp profile location. - Must be set if type is "Localhost". Must NOT be set for any other type. - type: string - type: - description: |- - type indicates which kind of seccomp profile will be applied. - Valid options are: - - Localhost - a profile defined in a file on the node should be used. - RuntimeDefault - the container runtime default profile should be used. - Unconfined - no profile should be applied. - type: string - required: - - type - type: object - windowsOptions: - description: |- - The Windows specific settings applied to all containers. - If unspecified, the options from the PodSecurityContext will be used. - If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is linux. - properties: - gmsaCredentialSpec: - description: |- - GMSACredentialSpec is where the GMSA admission webhook - (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the - GMSA credential spec named by the GMSACredentialSpecName field. - type: string - gmsaCredentialSpecName: - description: GMSACredentialSpecName is - the name of the GMSA credential spec - to use. - type: string - hostProcess: - description: |- - HostProcess determines if a container should be run as a 'Host Process' container. - All of a Pod's containers must have the same effective HostProcess value - (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers). - In addition, if HostProcess is true then HostNetwork must also be set to true. - type: boolean - runAsUserName: - description: |- - The UserName in Windows to run the entrypoint of the container process. - Defaults to the user specified in image metadata if unspecified. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - type: string - type: object - type: object - startupProbe: - description: |- - StartupProbe indicates that the Pod has successfully initialized. - If specified, no other probes are executed until this completes successfully. - If this probe fails, the Pod will be restarted, just as if the livenessProbe failed. - properties: - exec: - description: Exec specifies the action to - take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - failureThreshold: - description: |- - Minimum consecutive failures for the probe to be considered failed after having succeeded. - Defaults to 3. Minimum value is 1. - format: int32 - type: integer - grpc: - description: GRPC specifies an action involving - a GRPC port. - properties: - port: - description: Port number of the gRPC service. - Number must be in the range 1 to 65535. - format: int32 - type: integer - service: - default: "" - description: |- - Service is the name of the service to place in the gRPC HealthCheckRequest - (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - - If this is not specified, the default behavior is defined by gRPC. - type: string - required: - - port - type: object - httpGet: - description: HTTPGet specifies the http request - to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set in - the request. HTTP allows repeated headers. - items: - description: HTTPHeader describes a - custom header to be used in HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the HTTP - server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - initialDelaySeconds: - description: |- - Number of seconds after the container has started before liveness probes are initiated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - periodSeconds: - description: |- - How often (in seconds) to perform the probe. - Default to 10 seconds. Minimum value is 1. - format: int32 - type: integer - successThreshold: - description: |- - Minimum consecutive successes for the probe to be considered successful after having failed. - Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. - format: int32 - type: integer - tcpSocket: - description: TCPSocket specifies an action - involving a TCP port. - properties: - host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - description: |- - Optional duration in seconds the pod needs to terminate gracefully upon probe failure. - The grace period is the duration in seconds after the processes running in the pod are sent - a termination signal and the time when the processes are forcibly halted with a kill signal. - Set this value longer than the expected cleanup time for your process. - format: int64 - type: integer - timeoutSeconds: - description: |- - Number of seconds after which the probe times out. - Defaults to 1 second. Minimum value is 1. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - type: object - stdin: - description: |- - Whether this container should allocate a buffer for stdin in the container runtime. If this - is not set, reads from stdin in the container will always result in EOF. - Default is false. - type: boolean - stdinOnce: - description: |- - Whether the container runtime should close the stdin channel after it has been opened by - a single attach. When stdin is true the stdin stream will remain open across multiple attach - sessions. - type: boolean - terminationMessagePath: - description: |- - Optional: Path at which the file to which the container's termination message - will be written is mounted into the container's filesystem. - Message written is intended to be brief final status, such as an assertion failure message. - Will be truncated by the node if greater than 4096 bytes. The total message length across - all containers will be limited to 12kb. - Defaults to /dev/termination-log. - type: string - terminationMessagePolicy: - description: |- - Indicate how the termination message should be populated. File will use the contents of - terminationMessagePath to populate the container status message on both success and failure. - FallbackToLogsOnError will use the last chunk of container log output if the termination - message file is empty and the container exited with an error. - type: string - tty: - description: |- - Whether this container should allocate a TTY for itself, also requires 'stdin' to be true. - Default is false. - type: boolean - volumeDevices: - description: volumeDevices is the list of block - devices to be used by the container. - items: - description: volumeDevice describes a mapping - of a raw block device within a container. - properties: - devicePath: - description: devicePath is the path inside - of the container that the device will - be mapped to. - type: string - name: - description: name must match the name of - a persistentVolumeClaim in the pod - type: string - required: - - devicePath - - name - type: object - type: array - x-kubernetes-list-map-keys: - - devicePath - x-kubernetes-list-type: map - volumeMounts: - description: |- - Pod volumes to mount into the container's filesystem. - Cannot be updated. - items: - description: VolumeMount describes a mounting - of a Volume within a container. - properties: - mountPath: - description: |- - Path within the container at which the volume should be mounted. Must - not contain ':'. - type: string - mountPropagation: - description: |- - mountPropagation determines how mounts are propagated from the host - to container and the other way around. - When not set, MountPropagationNone is used. - This field is beta in 1.10. - When RecursiveReadOnly is set to IfPossible or to Enabled, MountPropagation must be None or unspecified - (which defaults to None). - type: string - name: - description: This must match the Name of - a Volume. - type: string - readOnly: - description: |- - Mounted read-only if true, read-write otherwise (false or unspecified). - Defaults to false. - type: boolean - recursiveReadOnly: - description: |- - RecursiveReadOnly specifies whether read-only mounts should be handled - recursively. - - If ReadOnly is false, this field has no meaning and must be unspecified. - - If ReadOnly is true, and this field is set to Disabled, the mount is not made - recursively read-only. If this field is set to IfPossible, the mount is made - recursively read-only, if it is supported by the container runtime. - type: string - subPath: - description: |- - Path within the volume from which the container's volume should be mounted. - Defaults to "" (volume's root). - type: string - subPathExpr: - description: |- - Expanded path within the volume from which the container's volume should be mounted. - Behaves similarly to SubPath but environment variable references $(VAR_NAME) are expanded using the container's environment. - Defaults to "" (volume's root). - SubPathExpr and SubPath are mutually exclusive. - type: string - required: - - mountPath - - name - type: object - type: array - x-kubernetes-list-map-keys: - - mountPath - x-kubernetes-list-type: map - workingDir: - description: |- - Container's working directory. - If not specified, the container runtime's default will be used, which - might be configured in the container image. - Cannot be updated. - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - nodeName: - description: |- - NodeName indicates in which node this pod is scheduled. - If empty, this pod is a candidate for scheduling by the scheduler defined in schedulerName. - Once this field is set, the kubelet for this node becomes responsible for the lifecycle of this pod. - This field should not be used to express a desire for the pod to be scheduled on a specific node. - https://kubernetes. - type: string - nodeSelector: - additionalProperties: - type: string - description: |- - NodeSelector is a selector which must be true for the pod to fit on a node. - Selector which must match a node's labels for the pod to be scheduled on that node. - More info: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/ - type: object - x-kubernetes-map-type: atomic - os: - description: |- - Specifies the OS of the containers in the pod. - Some pod and container fields are restricted if this is set. - - If the OS field is set to linux, the following fields must be unset: - -securityContext.windowsOptions - - If the OS field is set to windows, following fields must be unset: - - spec.hostPID - - spec.hostIPC - - spec.hostUsers - - spec.securityContext.appArmorProfile - - spec.securityContext. - properties: - name: - description: |- - Name is the name of the operating system. The currently supported values are linux and windows. - Additional value may be defined in future and can be one of: - https://github.com/opencontainers/runtime-spec/blob/master/config.md#platform-specific-configuration - Clients should expect to handle additional values and treat unrecognized values in this field as os: null - type: string - required: - - name - type: object - overhead: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: |- - Overhead represents the resource overhead associated with running a pod for a given RuntimeClass. - This field will be autopopulated at admission time by the RuntimeClass admission controller. If - the RuntimeClass admission controller is enabled, overhead must not be set in Pod create requests. - The RuntimeClass admission controller will reject Pod create requests which have the overhead already - set. - type: object - preemptionPolicy: - description: |- - PreemptionPolicy is the Policy for preempting pods with lower priority. - One of Never, PreemptLowerPriority. - Defaults to PreemptLowerPriority if unset. - type: string - priority: - description: |- - The priority value. Various system components use this field to find the - priority of the pod. When Priority Admission Controller is enabled, it - prevents users from setting this field. The admission controller populates - this field from PriorityClassName. - The higher the value, the higher the priority. - format: int32 - type: integer - priorityClassName: - description: |- - If specified, indicates the pod's priority. "system-node-critical" and - "system-cluster-critical" are two special keywords which indicate the - highest priorities with the former being the highest priority. Any other - name must be defined by creating a PriorityClass object with that name. - If not specified, the pod priority will be default or zero if there is no - default. - type: string - readinessGates: - description: |- - If specified, all readiness gates will be evaluated for pod readiness. - A pod is ready when all its containers are ready AND - all conditions specified in the readiness gates have status equal to "True" - More info: https://git.k8s.io/enhancements/keps/sig-network/580-pod-readiness-gates - items: - description: PodReadinessGate contains the reference - to a pod condition - properties: - conditionType: - description: ConditionType refers to a condition - in the pod's condition list with matching type. - type: string - required: - - conditionType - type: object - type: array - x-kubernetes-list-type: atomic - resourceClaims: - description: |- - ResourceClaims defines which ResourceClaims must be allocated - and reserved before the Pod is allowed to start. The resources - will be made available to those containers which consume them - by name. - - This is an alpha field and requires enabling the - DynamicResourceAllocation feature gate. - - This field is immutable. - items: - description: |- - PodResourceClaim references exactly one ResourceClaim, either directly - or by naming a ResourceClaimTemplate which is then turned into a ResourceClaim - for the pod. - - It adds a name to it that uniquely identifies the ResourceClaim inside the Pod. - Containers that need access to the ResourceClaim reference it with this name. - properties: - name: - description: |- - Name uniquely identifies this resource claim inside the pod. - This must be a DNS_LABEL. - type: string - resourceClaimName: - description: |- - ResourceClaimName is the name of a ResourceClaim object in the same - namespace as this pod. - - Exactly one of ResourceClaimName and ResourceClaimTemplateName must - be set. - type: string - resourceClaimTemplateName: - description: |- - ResourceClaimTemplateName is the name of a ResourceClaimTemplate - object in the same namespace as this pod. - - The template will be used to create a new ResourceClaim, which will - be bound to this pod. When this pod is deleted, the ResourceClaim - will also be deleted. - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - restartPolicy: - description: |- - Restart policy for all containers within the pod. - One of Always, OnFailure, Never. In some contexts, only a subset of those values may be permitted. - Default to Always. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#restart-policy - type: string - runtimeClassName: - description: |- - RuntimeClassName refers to a RuntimeClass object in the node.k8s.io group, which should be used - to run this pod. If no RuntimeClass resource matches the named class, the pod will not be run. - If unset or empty, the "legacy" RuntimeClass will be used, which is an implicit class with an - empty definition that uses the default runtime handler. - More info: https://git.k8s. - type: string - schedulerName: - description: |- - If specified, the pod will be dispatched by specified scheduler. - If not specified, the pod will be dispatched by default scheduler. - type: string - schedulingGates: - description: |- - SchedulingGates is an opaque list of values that if specified will block scheduling the pod. - If schedulingGates is not empty, the pod will stay in the SchedulingGated state and the - scheduler will not attempt to schedule the pod. - - SchedulingGates can only be set at pod creation time, and be removed only afterwards. - items: - description: PodSchedulingGate is associated to a - Pod to guard its scheduling. - properties: - name: - description: |- - Name of the scheduling gate. - Each scheduling gate must have a unique name field. - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - securityContext: - description: |- - SecurityContext holds pod-level security attributes and common container settings. - Optional: Defaults to empty. See type description for default values of each field. - properties: - appArmorProfile: - description: |- - appArmorProfile is the AppArmor options to use by the containers in this pod. - Note that this field cannot be set when spec.os.name is windows. - properties: - localhostProfile: - description: |- - localhostProfile indicates a profile loaded on the node that should be used. - The profile must be preconfigured on the node to work. - Must match the loaded name of the profile. - Must be set if and only if type is "Localhost". - type: string - type: - description: |- - type indicates which kind of AppArmor profile will be applied. - Valid options are: - Localhost - a profile pre-loaded on the node. - RuntimeDefault - the container runtime's default profile. - Unconfined - no AppArmor enforcement. - type: string - required: - - type - type: object - fsGroup: - description: |- - A special supplemental group that applies to all containers in a pod. - Some volume types allow the Kubelet to change the ownership of that volume - to be owned by the pod: - - 1. The owning GID will be the FSGroup - 2. The setgid bit is set (new files created in the volume will be owned by FSGroup) - 3. - format: int64 - type: integer - fsGroupChangePolicy: - description: |- - fsGroupChangePolicy defines behavior of changing ownership and permission of the volume - before being exposed inside Pod. This field will only apply to - volume types which support fsGroup based ownership(and permissions). - It will have no effect on ephemeral volume types such as: secret, configmaps - and emptydir. - Valid values are "OnRootMismatch" and "Always". If not specified, "Always" is used. - type: string - runAsGroup: - description: |- - The GID to run the entrypoint of the container process. - Uses runtime default if unset. - May also be set in SecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence - for that container. - Note that this field cannot be set when spec.os.name is windows. - format: int64 - type: integer - runAsNonRoot: - description: |- - Indicates that the container must run as a non-root user. - If true, the Kubelet will validate the image at runtime to ensure that it - does not run as UID 0 (root) and fail to start the container if it does. - If unset or false, no such validation will be performed. - May also be set in SecurityContext. - type: boolean - runAsUser: - description: |- - The UID to run the entrypoint of the container process. - Defaults to user specified in image metadata if unspecified. - May also be set in SecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence - for that container. - Note that this field cannot be set when spec.os.name is windows. - format: int64 - type: integer - seLinuxOptions: - description: |- - The SELinux context to be applied to all containers. - If unspecified, the container runtime will allocate a random SELinux context for each - container. May also be set in SecurityContext. If set in - both SecurityContext and PodSecurityContext, the value specified in SecurityContext - takes precedence for that container. - Note that this field cannot be set when spec.os.name is windows. - properties: - level: - description: Level is SELinux level label that - applies to the container. - type: string - role: - description: Role is a SELinux role label that - applies to the container. - type: string - type: - description: Type is a SELinux type label that - applies to the container. - type: string - user: - description: User is a SELinux user label that - applies to the container. - type: string - type: object - seccompProfile: - description: |- - The seccomp options to use by the containers in this pod. - Note that this field cannot be set when spec.os.name is windows. - properties: - localhostProfile: - description: |- - localhostProfile indicates a profile defined in a file on the node should be used. - The profile must be preconfigured on the node to work. - Must be a descending path, relative to the kubelet's configured seccomp profile location. - Must be set if type is "Localhost". Must NOT be set for any other type. - type: string - type: - description: |- - type indicates which kind of seccomp profile will be applied. - Valid options are: - - Localhost - a profile defined in a file on the node should be used. - RuntimeDefault - the container runtime default profile should be used. - Unconfined - no profile should be applied. - type: string - required: - - type - type: object - supplementalGroups: - description: |- - A list of groups applied to the first process run in each container, in - addition to the container's primary GID and fsGroup (if specified). If - the SupplementalGroupsPolicy feature is enabled, the - supplementalGroupsPolicy field determines whether these are in addition - to or instead of any group memberships defined in the container image. - items: - format: int64 - type: integer - type: array - x-kubernetes-list-type: atomic - supplementalGroupsPolicy: - description: |- - Defines how supplemental groups of the first container processes are calculated. - Valid values are "Merge" and "Strict". If not specified, "Merge" is used. - (Alpha) Using the field requires the SupplementalGroupsPolicy feature gate to be enabled - and the container runtime must implement support for this feature. - Note that this field cannot be set when spec.os.name is windows. - type: string - sysctls: - description: |- - Sysctls hold a list of namespaced sysctls used for the pod. Pods with unsupported - sysctls (by the container runtime) might fail to launch. - Note that this field cannot be set when spec.os.name is windows. - items: - description: Sysctl defines a kernel parameter - to be set - properties: - name: - description: Name of a property to set - type: string - value: - description: Value of a property to set - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - windowsOptions: - description: |- - The Windows specific settings applied to all containers. - If unspecified, the options within a container's SecurityContext will be used. - If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is linux. - properties: - gmsaCredentialSpec: - description: |- - GMSACredentialSpec is where the GMSA admission webhook - (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the - GMSA credential spec named by the GMSACredentialSpecName field. - type: string - gmsaCredentialSpecName: - description: GMSACredentialSpecName is the name - of the GMSA credential spec to use. - type: string - hostProcess: - description: |- - HostProcess determines if a container should be run as a 'Host Process' container. - All of a Pod's containers must have the same effective HostProcess value - (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers). - In addition, if HostProcess is true then HostNetwork must also be set to true. - type: boolean - runAsUserName: - description: |- - The UserName in Windows to run the entrypoint of the container process. - Defaults to the user specified in image metadata if unspecified. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - type: string - type: object - type: object - serviceAccount: - description: |- - DeprecatedServiceAccount is a deprecated alias for ServiceAccountName. - Deprecated: Use serviceAccountName instead. - type: string - serviceAccountName: - description: |- - ServiceAccountName is the name of the ServiceAccount to use to run this pod. - More info: https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/ - type: string - setHostnameAsFQDN: - description: |- - If true the pod's hostname will be configured as the pod's FQDN, rather than the leaf name (the default). - In Linux containers, this means setting the FQDN in the hostname field of the kernel (the nodename field of struct utsname). - type: boolean - shareProcessNamespace: - description: |- - Share a single process namespace between all of the containers in a pod. - When this is set containers will be able to view and signal processes from other containers - in the same pod, and the first process in each container will not be assigned PID 1. - HostPID and ShareProcessNamespace cannot both be set. - Optional: Default to false. - type: boolean - subdomain: - description: |- - If specified, the fully qualified Pod hostname will be "...svc.". - If not specified, the pod will not have a domainname at all. - type: string - terminationGracePeriodSeconds: - description: |- - Optional duration in seconds the pod needs to terminate gracefully. May be decreased in delete request. - Value must be non-negative integer. The value zero indicates stop immediately via - the kill signal (no opportunity to shut down). - If this value is nil, the default grace period will be used instead. - format: int64 - type: integer - tolerations: - description: If specified, the pod's tolerations. - items: - description: |- - The pod this Toleration is attached to tolerates any taint that matches - the triple using the matching operator . - properties: - effect: - description: |- - Effect indicates the taint effect to match. Empty means match all taint effects. - When specified, allowed values are NoSchedule, PreferNoSchedule and NoExecute. - type: string - key: - description: |- - Key is the taint key that the toleration applies to. Empty means match all taint keys. - If the key is empty, operator must be Exists; this combination means to match all values and all keys. - type: string - operator: - description: |- - Operator represents a key's relationship to the value. - Valid operators are Exists and Equal. Defaults to Equal. - Exists is equivalent to wildcard for value, so that a pod can - tolerate all taints of a particular category. - type: string - tolerationSeconds: - description: |- - TolerationSeconds represents the period of time the toleration (which must be - of effect NoExecute, otherwise this field is ignored) tolerates the taint. By default, - it is not set, which means tolerate the taint forever (do not evict). Zero and - negative values will be treated as 0 (evict immediately) by the system. - format: int64 - type: integer - value: - description: |- - Value is the taint value the toleration matches to. - If the operator is Exists, the value should be empty, otherwise just a regular string. - type: string - type: object - type: array - x-kubernetes-list-type: atomic - topologySpreadConstraints: - description: |- - TopologySpreadConstraints describes how a group of pods ought to spread across topology - domains. Scheduler will schedule pods in a way which abides by the constraints. - All topologySpreadConstraints are ANDed. - items: - description: TopologySpreadConstraint specifies how - to spread matching pods among the given topology. - properties: - labelSelector: - description: |- - LabelSelector is used to find matching pods. - Pods that match this label selector are counted to determine the number of pods - in their corresponding topology domain. - properties: - matchExpressions: - description: matchExpressions is a list of - label selector requirements. The requirements - are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the label key that - the selector applies to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - matchLabelKeys: - description: |- - MatchLabelKeys is a set of pod label keys to select the pods over which - spreading will be calculated. The keys are used to lookup values from the - incoming pod labels, those key-value labels are ANDed with labelSelector - to select the group of existing pods over which spreading will be calculated - for the incoming pod. The same key is forbidden to exist in both MatchLabelKeys and LabelSelector. - items: - type: string - type: array - x-kubernetes-list-type: atomic - maxSkew: - description: |- - MaxSkew describes the degree to which pods may be unevenly distributed. - When `whenUnsatisfiable=DoNotSchedule`, it is the maximum permitted difference - between the number of matching pods in the target topology and the global minimum. - The global minimum is the minimum number of matching pods in an eligible domain - or zero if the number of eligible domains is less than MinDomains. - format: int32 - type: integer - minDomains: - description: |- - MinDomains indicates a minimum number of eligible domains. - When the number of eligible domains with matching topology keys is less than minDomains, - Pod Topology Spread treats "global minimum" as 0, and then the calculation of Skew is performed. - And when the number of eligible domains with matching topology keys equals or greater than minDomains, - this value has no effect on scheduling. - format: int32 - type: integer - nodeAffinityPolicy: - description: |- - NodeAffinityPolicy indicates how we will treat Pod's nodeAffinity/nodeSelector - when calculating pod topology spread skew. Options are: - - Honor: only nodes matching nodeAffinity/nodeSelector are included in the calculations. - - Ignore: nodeAffinity/nodeSelector are ignored. All nodes are included in the calculations. - - If this value is nil, the behavior is equivalent to the Honor policy. - type: string - nodeTaintsPolicy: - description: |- - NodeTaintsPolicy indicates how we will treat node taints when calculating - pod topology spread skew. Options are: - - Honor: nodes without taints, along with tainted nodes for which the incoming pod - has a toleration, are included. - - Ignore: node taints are ignored. All nodes are included. - - If this value is nil, the behavior is equivalent to the Ignore policy. - type: string - topologyKey: - description: |- - TopologyKey is the key of node labels. Nodes that have a label with this key - and identical values are considered to be in the same topology. - We consider each as a "bucket", and try to put balanced number - of pods into each bucket. - We define a domain as a particular instance of a topology. - type: string - whenUnsatisfiable: - description: |- - WhenUnsatisfiable indicates how to deal with a pod if it doesn't satisfy - the spread constraint. - - DoNotSchedule (default) tells the scheduler not to schedule it. - - ScheduleAnyway tells the scheduler to schedule the pod in any location, - but giving higher precedence to topologies that would help reduce the - skew. - type: string - required: - - maxSkew - - topologyKey - - whenUnsatisfiable - type: object - type: array - x-kubernetes-list-map-keys: - - topologyKey - - whenUnsatisfiable - x-kubernetes-list-type: map - volumes: - description: |- - List of volumes that can be mounted by containers belonging to the pod. - More info: https://kubernetes.io/docs/concepts/storage/volumes - items: - description: Volume represents a named volume in a - pod that may be accessed by any container in the - pod. - properties: - awsElasticBlockStore: - description: |- - awsElasticBlockStore represents an AWS Disk resource that is attached to a - kubelet's host machine and then exposed to the pod. - More info: https://kubernetes.io/docs/concepts/storage/volumes#awselasticblockstore - properties: - fsType: - description: |- - fsType is the filesystem type of the volume that you want to mount. - Tip: Ensure that the filesystem type is supported by the host operating system. - Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - More info: https://kubernetes.io/docs/concepts/storage/volumes#awselasticblockstore - type: string - partition: - description: |- - partition is the partition in the volume that you want to mount. - If omitted, the default is to mount by volume name. - Examples: For volume /dev/sda1, you specify the partition as "1". - Similarly, the volume partition for /dev/sda is "0" (or you can leave the property empty). - format: int32 - type: integer - readOnly: - description: |- - readOnly value true will force the readOnly setting in VolumeMounts. - More info: https://kubernetes.io/docs/concepts/storage/volumes#awselasticblockstore - type: boolean - volumeID: - description: |- - volumeID is unique ID of the persistent disk resource in AWS (Amazon EBS volume). - More info: https://kubernetes.io/docs/concepts/storage/volumes#awselasticblockstore - type: string - required: - - volumeID - type: object - azureDisk: - description: azureDisk represents an Azure Data - Disk mount on the host and bind mount to the - pod. - properties: - cachingMode: - description: 'cachingMode is the Host Caching - mode: None, Read Only, Read Write.' - type: string - diskName: - description: diskName is the Name of the data - disk in the blob storage - type: string - diskURI: - description: diskURI is the URI of data disk - in the blob storage - type: string - fsType: - default: ext4 - description: |- - fsType is Filesystem type to mount. - Must be a filesystem type supported by the host operating system. - Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - type: string - kind: - description: 'kind expected values are Shared: - multiple blob disks per storage account Dedicated: - single blob disk per storage account Managed: - azure managed data disk (only in managed - availability set). defaults to shared' - type: string - readOnly: - default: false - description: |- - readOnly Defaults to false (read/write). ReadOnly here will force - the ReadOnly setting in VolumeMounts. - type: boolean - required: - - diskName - - diskURI - type: object - azureFile: - description: azureFile represents an Azure File - Service mount on the host and bind mount to - the pod. - properties: - readOnly: - description: |- - readOnly defaults to false (read/write). ReadOnly here will force - the ReadOnly setting in VolumeMounts. - type: boolean - secretName: - description: secretName is the name of secret - that contains Azure Storage Account Name - and Key - type: string - shareName: - description: shareName is the azure share - Name - type: string - required: - - secretName - - shareName - type: object - cephfs: - description: cephFS represents a Ceph FS mount - on the host that shares a pod's lifetime - properties: - monitors: - description: |- - monitors is Required: Monitors is a collection of Ceph monitors - More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it - items: - type: string - type: array - x-kubernetes-list-type: atomic - path: - description: 'path is Optional: Used as the - mounted root, rather than the full Ceph - tree, default is /' - type: string - readOnly: - description: |- - readOnly is Optional: Defaults to false (read/write). ReadOnly here will force - the ReadOnly setting in VolumeMounts. - More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it - type: boolean - secretFile: - description: |- - secretFile is Optional: SecretFile is the path to key ring for User, default is /etc/ceph/user.secret - More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it - type: string - secretRef: - description: |- - secretRef is Optional: SecretRef is reference to the authentication secret for User, default is empty. - More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - type: object - x-kubernetes-map-type: atomic - user: - description: |- - user is optional: User is the rados user name, default is admin - More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it - type: string - required: - - monitors - type: object - cinder: - description: |- - cinder represents a cinder volume attached and mounted on kubelets host machine. - More info: https://examples.k8s.io/mysql-cinder-pd/README.md - properties: - fsType: - description: |- - fsType is the filesystem type to mount. - Must be a filesystem type supported by the host operating system. - Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - More info: https://examples.k8s.io/mysql-cinder-pd/README.md - type: string - readOnly: - description: |- - readOnly defaults to false (read/write). ReadOnly here will force - the ReadOnly setting in VolumeMounts. - More info: https://examples.k8s.io/mysql-cinder-pd/README.md - type: boolean - secretRef: - description: |- - secretRef is optional: points to a secret object containing parameters used to connect - to OpenStack. - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - type: object - x-kubernetes-map-type: atomic - volumeID: - description: |- - volumeID used to identify the volume in cinder. - More info: https://examples.k8s.io/mysql-cinder-pd/README.md - type: string - required: - - volumeID - type: object - configMap: - description: configMap represents a configMap - that should populate this volume - properties: - defaultMode: - description: |- - defaultMode is optional: mode bits used to set permissions on created files by default. - Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. - YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. - Defaults to 0644. - Directories within the path are not affected by this setting. - format: int32 - type: integer - items: - description: |- - items if unspecified, each key-value pair in the Data field of the referenced - ConfigMap will be projected into the volume as a file whose name is the - key and content is the value. If specified, the listed keys will be - projected into the specified paths, and unlisted keys will not be - present. - items: - description: Maps a string key to a path - within a volume. - properties: - key: - description: key is the key to project. - type: string - mode: - description: |- - mode is Optional: mode bits used to set permissions on this file. - Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. - YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. - If not specified, the volume defaultMode will be used. - format: int32 - type: integer - path: - description: |- - path is the relative path of the file to map the key to. - May not be an absolute path. - May not contain the path element '..'. - May not start with the string '..'. - type: string - required: - - key - - path - type: object - type: array - x-kubernetes-list-type: atomic - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: optional specify whether the - ConfigMap or its keys must be defined - type: boolean - type: object - x-kubernetes-map-type: atomic - csi: - description: csi (Container Storage Interface) - represents ephemeral storage that is handled - by certain external CSI drivers (Beta feature). - properties: - driver: - description: |- - driver is the name of the CSI driver that handles this volume. - Consult with your admin for the correct name as registered in the cluster. - type: string - fsType: - description: |- - fsType to mount. Ex. "ext4", "xfs", "ntfs". - If not provided, the empty value is passed to the associated CSI driver - which will determine the default filesystem to apply. - type: string - nodePublishSecretRef: - description: |- - nodePublishSecretRef is a reference to the secret object containing - sensitive information to pass to the CSI driver to complete the CSI - NodePublishVolume and NodeUnpublishVolume calls. - This field is optional, and may be empty if no secret is required. If the - secret object contains more than one secret, all secret references are passed. - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - type: object - x-kubernetes-map-type: atomic - readOnly: - description: |- - readOnly specifies a read-only configuration for the volume. - Defaults to false (read/write). - type: boolean - volumeAttributes: - additionalProperties: - type: string - description: |- - volumeAttributes stores driver-specific properties that are passed to the CSI - driver. Consult your driver's documentation for supported values. - type: object - required: - - driver - type: object - downwardAPI: - description: downwardAPI represents downward API - about the pod that should populate this volume - properties: - defaultMode: - description: |- - Optional: mode bits to use on created files by default. Must be a - Optional: mode bits used to set permissions on created files by default. - Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. - YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. - Defaults to 0644. - Directories within the path are not affected by this setting. - format: int32 - type: integer - items: - description: Items is a list of downward API - volume file - items: - description: DownwardAPIVolumeFile represents - information to create the file containing - the pod field - properties: - fieldRef: - description: 'Required: Selects a field - of the pod: only annotations, labels, - name, namespace and uid are supported.' - properties: - apiVersion: - description: Version of the schema - the FieldPath is written in terms - of, defaults to "v1". - type: string - fieldPath: - description: Path of the field to - select in the specified API version. - type: string - required: - - fieldPath - type: object - x-kubernetes-map-type: atomic - mode: - description: |- - Optional: mode bits used to set permissions on this file, must be an octal value - between 0000 and 0777 or a decimal value between 0 and 511. - YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. - If not specified, the volume defaultMode will be used. - format: int32 - type: integer - path: - description: 'Required: Path is the - relative path name of the file to - be created. Must not be absolute or - contain the ''..'' path. Must be utf-8 - encoded. The first item of the relative - path must not start with ''..''' - type: string - resourceFieldRef: - description: |- - Selects a resource of the container: only resources limits and requests - (limits.cpu, limits.memory, requests.cpu and requests.memory) are currently supported. - properties: - containerName: - description: 'Container name: required - for volumes, optional for env - vars' - type: string - divisor: - anyOf: - - type: integer - - type: string - description: Specifies the output - format of the exposed resources, - defaults to "1" - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - resource: - description: 'Required: resource - to select' - type: string - required: - - resource - type: object - x-kubernetes-map-type: atomic - required: - - path - type: object - type: array - x-kubernetes-list-type: atomic - type: object - emptyDir: - description: |- - emptyDir represents a temporary directory that shares a pod's lifetime. - More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir - properties: - medium: - description: |- - medium represents what type of storage medium should back this directory. - The default is "" which means to use the node's default medium. - Must be an empty string (default) or Memory. - More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir - type: string - sizeLimit: - anyOf: - - type: integer - - type: string - description: |- - sizeLimit is the total amount of local storage required for this EmptyDir volume. - The size limit is also applicable for memory medium. - The maximum usage on memory medium EmptyDir would be the minimum value between - the SizeLimit specified here and the sum of memory limits of all containers in a pod. - The default is nil which means that the limit is undefined. - More info: https://kubernetes. - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - type: object - ephemeral: - description: |- - ephemeral represents a volume that is handled by a cluster storage driver. - The volume's lifecycle is tied to the pod that defines it - it will be created before the pod starts, - and deleted when the pod is removed. - properties: - volumeClaimTemplate: - description: |- - Will be used to create a stand-alone PVC to provision the volume. - The pod in which this EphemeralVolumeSource is embedded will be the - owner of the PVC, i.e. the PVC will be deleted together with the - pod. The name of the PVC will be `-` where - `` is the name from the `PodSpec.Volumes` array - entry. - properties: - metadata: - description: |- - May contain labels and annotations that will be copied into the PVC - when creating it. No other fields are allowed and will be rejected during - validation. - properties: - annotations: - additionalProperties: - type: string - type: object - finalizers: - items: - type: string - type: array - labels: - additionalProperties: - type: string - type: object - name: - type: string - namespace: - type: string - type: object - spec: - description: |- - The specification for the PersistentVolumeClaim. The entire content is - copied unchanged into the PVC that gets created from this - template. The same fields as in a PersistentVolumeClaim - are also valid here. - properties: - accessModes: - description: |- - accessModes contains the desired access modes the volume should have. - More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#access-modes-1 - items: - type: string - type: array - x-kubernetes-list-type: atomic - dataSource: - description: |- - dataSource field can be used to specify either: - * An existing VolumeSnapshot object (snapshot.storage.k8s.io/VolumeSnapshot) - * An existing PVC (PersistentVolumeClaim) - If the provisioner or an external controller can support the specified data source, - it will create a new volume based on the contents of the specified data source. - properties: - apiGroup: - description: |- - APIGroup is the group for the resource being referenced. - If APIGroup is not specified, the specified Kind must be in the core API group. - For any other third-party types, APIGroup is required. - type: string - kind: - description: Kind is the type - of resource being referenced - type: string - name: - description: Name is the name - of resource being referenced - type: string - required: - - kind - - name - type: object - x-kubernetes-map-type: atomic - dataSourceRef: - description: |- - dataSourceRef specifies the object from which to populate the volume with data, if a non-empty - volume is desired. This may be any object from a non-empty API group (non - core object) or a PersistentVolumeClaim object. - When this field is specified, volume binding will only succeed if the type of - the specified object matches some installed volume populator or dynamic - provisioner. - properties: - apiGroup: - description: |- - APIGroup is the group for the resource being referenced. - If APIGroup is not specified, the specified Kind must be in the core API group. - For any other third-party types, APIGroup is required. - type: string - kind: - description: Kind is the type - of resource being referenced - type: string - name: - description: Name is the name - of resource being referenced - type: string - namespace: - description: |- - Namespace is the namespace of resource being referenced - Note that when a namespace is specified, a gateway.networking.k8s.io/ReferenceGrant object is required in the referent namespace to allow that namespace's owner to accept the reference. See the ReferenceGrant documentation for details. - (Alpha) This field requires the CrossNamespaceVolumeDataSource feature gate to be enabled. - type: string - required: - - kind - - name - type: object - resources: - description: |- - resources represents the minimum resources the volume should have. - If RecoverVolumeExpansionFailure feature is enabled users are allowed to specify resource requirements - that are lower than previous value but must still be higher than capacity recorded in the - status field of the claim. - More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#resources - properties: - limits: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: |- - Limits describes the maximum amount of compute resources allowed. - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - type: object - requests: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: |- - Requests describes the minimum amount of compute resources required. - If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, - otherwise to an implementation-defined value. Requests cannot exceed Limits. - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - type: object - type: object - selector: - description: selector is a label query - over volumes to consider for binding. - properties: - matchExpressions: - description: matchExpressions - is a list of label selector - requirements. The requirements - are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the - label key that the selector - applies to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - storageClassName: - description: |- - storageClassName is the name of the StorageClass required by the claim. - More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#class-1 - type: string - volumeAttributesClassName: - description: |- - volumeAttributesClassName may be used to set the VolumeAttributesClass used by this claim. - If specified, the CSI driver will create or update the volume with the attributes defined - in the corresponding VolumeAttributesClass. This has a different purpose than storageClassName, - it can be changed after the claim is created. - type: string - volumeMode: - description: |- - volumeMode defines what type of volume is required by the claim. - Value of Filesystem is implied when not included in claim spec. - type: string - volumeName: - description: volumeName is the binding - reference to the PersistentVolume - backing this claim. - type: string - type: object - required: - - spec - type: object - type: object - fc: - description: fc represents a Fibre Channel resource - that is attached to a kubelet's host machine - and then exposed to the pod. - properties: - fsType: - description: |- - fsType is the filesystem type to mount. - Must be a filesystem type supported by the host operating system. - Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - type: string - lun: - description: 'lun is Optional: FC target lun - number' - format: int32 - type: integer - readOnly: - description: |- - readOnly is Optional: Defaults to false (read/write). ReadOnly here will force - the ReadOnly setting in VolumeMounts. - type: boolean - targetWWNs: - description: 'targetWWNs is Optional: FC target - worldwide names (WWNs)' - items: - type: string - type: array - x-kubernetes-list-type: atomic - wwids: - description: |- - wwids Optional: FC volume world wide identifiers (wwids) - Either wwids or combination of targetWWNs and lun must be set, but not both simultaneously. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - flexVolume: - description: |- - flexVolume represents a generic volume resource that is - provisioned/attached using an exec based plugin. - properties: - driver: - description: driver is the name of the driver - to use for this volume. - type: string - fsType: - description: |- - fsType is the filesystem type to mount. - Must be a filesystem type supported by the host operating system. - Ex. "ext4", "xfs", "ntfs". The default filesystem depends on FlexVolume script. - type: string - options: - additionalProperties: - type: string - description: 'options is Optional: this field - holds extra command options if any.' - type: object - readOnly: - description: |- - readOnly is Optional: defaults to false (read/write). ReadOnly here will force - the ReadOnly setting in VolumeMounts. - type: boolean - secretRef: - description: |- - secretRef is Optional: secretRef is reference to the secret object containing - sensitive information to pass to the plugin scripts. This may be - empty if no secret object is specified. If the secret object - contains more than one secret, all secrets are passed to the plugin - scripts. - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - type: object - x-kubernetes-map-type: atomic - required: - - driver - type: object - flocker: - description: flocker represents a Flocker volume - attached to a kubelet's host machine. This depends - on the Flocker control service being running - properties: - datasetName: - description: |- - datasetName is Name of the dataset stored as metadata -> name on the dataset for Flocker - should be considered as deprecated - type: string - datasetUUID: - description: datasetUUID is the UUID of the - dataset. This is unique identifier of a - Flocker dataset - type: string - type: object - gcePersistentDisk: - description: |- - gcePersistentDisk represents a GCE Disk resource that is attached to a - kubelet's host machine and then exposed to the pod. - More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk - properties: - fsType: - description: |- - fsType is filesystem type of the volume that you want to mount. - Tip: Ensure that the filesystem type is supported by the host operating system. - Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk - type: string - partition: - description: |- - partition is the partition in the volume that you want to mount. - If omitted, the default is to mount by volume name. - Examples: For volume /dev/sda1, you specify the partition as "1". - Similarly, the volume partition for /dev/sda is "0" (or you can leave the property empty). - More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk - format: int32 - type: integer - pdName: - description: |- - pdName is unique name of the PD resource in GCE. Used to identify the disk in GCE. - More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk - type: string - readOnly: - description: |- - readOnly here will force the ReadOnly setting in VolumeMounts. - Defaults to false. - More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk - type: boolean - required: - - pdName - type: object - gitRepo: - description: |- - gitRepo represents a git repository at a particular revision. - DEPRECATED: GitRepo is deprecated. To provision a container with a git repo, mount an - EmptyDir into an InitContainer that clones the repo using git, then mount the EmptyDir - into the Pod's container. - properties: - directory: - description: |- - directory is the target directory name. - Must not contain or start with '..'. If '.' is supplied, the volume directory will be the - git repository. Otherwise, if specified, the volume will contain the git repository in - the subdirectory with the given name. - type: string - repository: - description: repository is the URL - type: string - revision: - description: revision is the commit hash for - the specified revision. - type: string - required: - - repository - type: object - glusterfs: - description: |- - glusterfs represents a Glusterfs mount on the host that shares a pod's lifetime. - More info: https://examples.k8s.io/volumes/glusterfs/README.md - properties: - endpoints: - description: |- - endpoints is the endpoint name that details Glusterfs topology. - More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod - type: string - path: - description: |- - path is the Glusterfs volume path. - More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod - type: string - readOnly: - description: |- - readOnly here will force the Glusterfs volume to be mounted with read-only permissions. - Defaults to false. - More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod - type: boolean - required: - - endpoints - - path - type: object - hostPath: - description: |- - hostPath represents a pre-existing file or directory on the host - machine that is directly exposed to the container. This is generally - used for system agents or other privileged things that are allowed - to see the host machine. Most containers will NOT need this. - More info: https://kubernetes.io/docs/concepts/storage/volumes#hostpath - properties: - path: - description: |- - path of the directory on the host. - If the path is a symlink, it will follow the link to the real path. - More info: https://kubernetes.io/docs/concepts/storage/volumes#hostpath - type: string - type: - description: |- - type for HostPath Volume - Defaults to "" - More info: https://kubernetes.io/docs/concepts/storage/volumes#hostpath - type: string - required: - - path - type: object - image: - description: |- - image represents an OCI object (a container image or artifact) pulled and mounted on the kubelet's host machine. - The volume is resolved at pod startup depending on which PullPolicy value is provided: - - - Always: the kubelet always attempts to pull the reference. Container creation will fail If the pull fails. - - Never: the kubelet never pulls the reference and only uses a local image or artifact. - properties: - pullPolicy: - description: |- - Policy for pulling OCI objects. Possible values are: - Always: the kubelet always attempts to pull the reference. Container creation will fail If the pull fails. - Never: the kubelet never pulls the reference and only uses a local image or artifact. Container creation will fail if the reference isn't present. - IfNotPresent: the kubelet pulls if the reference isn't already present on disk. - type: string - reference: - description: |- - Required: Image or artifact reference to be used. - Behaves in the same way as pod.spec.containers[*].image. - Pull secrets will be assembled in the same way as for the container image by looking up node credentials, SA image pull secrets, and pod spec image pull secrets. - More info: https://kubernetes. - type: string - type: object - iscsi: - description: |- - iscsi represents an ISCSI Disk resource that is attached to a - kubelet's host machine and then exposed to the pod. - More info: https://examples.k8s.io/volumes/iscsi/README.md - properties: - chapAuthDiscovery: - description: chapAuthDiscovery defines whether - support iSCSI Discovery CHAP authentication - type: boolean - chapAuthSession: - description: chapAuthSession defines whether - support iSCSI Session CHAP authentication - type: boolean - fsType: - description: |- - fsType is the filesystem type of the volume that you want to mount. - Tip: Ensure that the filesystem type is supported by the host operating system. - Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - More info: https://kubernetes.io/docs/concepts/storage/volumes#iscsi - type: string - initiatorName: - description: |- - initiatorName is the custom iSCSI Initiator Name. - If initiatorName is specified with iscsiInterface simultaneously, new iSCSI interface - : will be created for the connection. - type: string - iqn: - description: iqn is the target iSCSI Qualified - Name. - type: string - iscsiInterface: - default: default - description: |- - iscsiInterface is the interface Name that uses an iSCSI transport. - Defaults to 'default' (tcp). - type: string - lun: - description: lun represents iSCSI Target Lun - number. - format: int32 - type: integer - portals: - description: |- - portals is the iSCSI Target Portal List. The portal is either an IP or ip_addr:port if the port - is other than default (typically TCP ports 860 and 3260). - items: - type: string - type: array - x-kubernetes-list-type: atomic - readOnly: - description: |- - readOnly here will force the ReadOnly setting in VolumeMounts. - Defaults to false. - type: boolean - secretRef: - description: secretRef is the CHAP Secret - for iSCSI target and initiator authentication - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - type: object - x-kubernetes-map-type: atomic - targetPortal: - description: |- - targetPortal is iSCSI Target Portal. The Portal is either an IP or ip_addr:port if the port - is other than default (typically TCP ports 860 and 3260). - type: string - required: - - iqn - - lun - - targetPortal - type: object - name: - description: |- - name of the volume. - Must be a DNS_LABEL and unique within the pod. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - nfs: - description: |- - nfs represents an NFS mount on the host that shares a pod's lifetime - More info: https://kubernetes.io/docs/concepts/storage/volumes#nfs - properties: - path: - description: |- - path that is exported by the NFS server. - More info: https://kubernetes.io/docs/concepts/storage/volumes#nfs - type: string - readOnly: - description: |- - readOnly here will force the NFS export to be mounted with read-only permissions. - Defaults to false. - More info: https://kubernetes.io/docs/concepts/storage/volumes#nfs - type: boolean - server: - description: |- - server is the hostname or IP address of the NFS server. - More info: https://kubernetes.io/docs/concepts/storage/volumes#nfs - type: string - required: - - path - - server - type: object - persistentVolumeClaim: - description: |- - persistentVolumeClaimVolumeSource represents a reference to a - PersistentVolumeClaim in the same namespace. - More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#persistentvolumeclaims - properties: - claimName: - description: |- - claimName is the name of a PersistentVolumeClaim in the same namespace as the pod using this volume. - More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#persistentvolumeclaims - type: string - readOnly: - description: |- - readOnly Will force the ReadOnly setting in VolumeMounts. - Default false. - type: boolean - required: - - claimName - type: object - photonPersistentDisk: - description: photonPersistentDisk represents a - PhotonController persistent disk attached and - mounted on kubelets host machine - properties: - fsType: - description: |- - fsType is the filesystem type to mount. - Must be a filesystem type supported by the host operating system. - Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - type: string - pdID: - description: pdID is the ID that identifies - Photon Controller persistent disk - type: string - required: - - pdID - type: object - portworxVolume: - description: portworxVolume represents a portworx - volume attached and mounted on kubelets host - machine - properties: - fsType: - description: |- - fSType represents the filesystem type to mount - Must be a filesystem type supported by the host operating system. - Ex. "ext4", "xfs". Implicitly inferred to be "ext4" if unspecified. - type: string - readOnly: - description: |- - readOnly defaults to false (read/write). ReadOnly here will force - the ReadOnly setting in VolumeMounts. - type: boolean - volumeID: - description: volumeID uniquely identifies - a Portworx volume - type: string - required: - - volumeID - type: object - projected: - description: projected items for all in one resources - secrets, configmaps, and downward API - properties: - defaultMode: - description: |- - defaultMode are the mode bits used to set permissions on created files by default. - Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. - YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. - Directories within the path are not affected by this setting. - format: int32 - type: integer - sources: - description: |- - sources is the list of volume projections. Each entry in this list - handles one source. - items: - description: |- - Projection that may be projected along with other supported volume types. - Exactly one of these fields must be set. - properties: - clusterTrustBundle: - description: |- - ClusterTrustBundle allows a pod to access the `.spec.trustBundle` field - of ClusterTrustBundle objects in an auto-updating file. - - Alpha, gated by the ClusterTrustBundleProjection feature gate. - - ClusterTrustBundle objects can either be selected by name, or by the - combination of signer name and a label selector. - properties: - labelSelector: - description: |- - Select all ClusterTrustBundles that match this label selector. Only has - effect if signerName is set. Mutually-exclusive with name. If unset, - interpreted as "match nothing". If set but empty, interpreted as "match - everything". - properties: - matchExpressions: - description: matchExpressions - is a list of label selector - requirements. The requirements - are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the - label key that the selector - applies to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - name: - description: |- - Select a single ClusterTrustBundle by object name. Mutually-exclusive - with signerName and labelSelector. - type: string - optional: - description: |- - If true, don't block pod startup if the referenced ClusterTrustBundle(s) - aren't available. If using name, then the named ClusterTrustBundle is - allowed not to exist. If using signerName, then the combination of - signerName and labelSelector is allowed to match zero - ClusterTrustBundles. - type: boolean - path: - description: Relative path from - the volume root to write the bundle. - type: string - signerName: - description: |- - Select all ClusterTrustBundles that match this signer name. - Mutually-exclusive with name. The contents of all selected - ClusterTrustBundles will be unified and deduplicated. - type: string - required: - - path - type: object - configMap: - description: configMap information about - the configMap data to project - properties: - items: - description: |- - items if unspecified, each key-value pair in the Data field of the referenced - ConfigMap will be projected into the volume as a file whose name is the - key and content is the value. If specified, the listed keys will be - projected into the specified paths, and unlisted keys will not be - present. - items: - description: Maps a string key - to a path within a volume. - properties: - key: - description: key is the key - to project. - type: string - mode: - description: |- - mode is Optional: mode bits used to set permissions on this file. - Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. - YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. - If not specified, the volume defaultMode will be used. - format: int32 - type: integer - path: - description: |- - path is the relative path of the file to map the key to. - May not be an absolute path. - May not contain the path element '..'. - May not start with the string '..'. - type: string - required: - - key - - path - type: object - type: array - x-kubernetes-list-type: atomic - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: optional specify whether - the ConfigMap or its keys must - be defined - type: boolean - type: object - x-kubernetes-map-type: atomic - downwardAPI: - description: downwardAPI information - about the downwardAPI data to project - properties: - items: - description: Items is a list of - DownwardAPIVolume file - items: - description: DownwardAPIVolumeFile - represents information to create - the file containing the pod - field - properties: - fieldRef: - description: 'Required: Selects - a field of the pod: only - annotations, labels, name, - namespace and uid are supported.' - properties: - apiVersion: - description: Version of - the schema the FieldPath - is written in terms - of, defaults to "v1". - type: string - fieldPath: - description: Path of the - field to select in the - specified API version. - type: string - required: - - fieldPath - type: object - x-kubernetes-map-type: atomic - mode: - description: |- - Optional: mode bits used to set permissions on this file, must be an octal value - between 0000 and 0777 or a decimal value between 0 and 511. - YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. - If not specified, the volume defaultMode will be used. - format: int32 - type: integer - path: - description: 'Required: Path - is the relative path name - of the file to be created. - Must not be absolute or - contain the ''..'' path. - Must be utf-8 encoded. The - first item of the relative - path must not start with - ''..''' - type: string - resourceFieldRef: - description: |- - Selects a resource of the container: only resources limits and requests - (limits.cpu, limits.memory, requests.cpu and requests.memory) are currently supported. - properties: - containerName: - description: 'Container - name: required for volumes, - optional for env vars' - type: string - divisor: - anyOf: - - type: integer - - type: string - description: Specifies - the output format of - the exposed resources, - defaults to "1" - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - resource: - description: 'Required: - resource to select' - type: string - required: - - resource - type: object - x-kubernetes-map-type: atomic - required: - - path - type: object - type: array - x-kubernetes-list-type: atomic - type: object - secret: - description: secret information about - the secret data to project - properties: - items: - description: |- - items if unspecified, each key-value pair in the Data field of the referenced - Secret will be projected into the volume as a file whose name is the - key and content is the value. If specified, the listed keys will be - projected into the specified paths, and unlisted keys will not be - present. - items: - description: Maps a string key - to a path within a volume. - properties: - key: - description: key is the key - to project. - type: string - mode: - description: |- - mode is Optional: mode bits used to set permissions on this file. - Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. - YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. - If not specified, the volume defaultMode will be used. - format: int32 - type: integer - path: - description: |- - path is the relative path of the file to map the key to. - May not be an absolute path. - May not contain the path element '..'. - May not start with the string '..'. - type: string - required: - - key - - path - type: object - type: array - x-kubernetes-list-type: atomic - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: optional field specify - whether the Secret or its key - must be defined - type: boolean - type: object - x-kubernetes-map-type: atomic - serviceAccountToken: - description: serviceAccountToken is - information about the serviceAccountToken - data to project - properties: - audience: - description: |- - audience is the intended audience of the token. A recipient of a token - must identify itself with an identifier specified in the audience of the - token, and otherwise should reject the token. The audience defaults to the - identifier of the apiserver. - type: string - expirationSeconds: - description: |- - expirationSeconds is the requested duration of validity of the service - account token. As the token approaches expiration, the kubelet volume - plugin will proactively rotate the service account token. The kubelet will - start trying to rotate the token if the token is older than 80 percent of - its time to live or if the token is older than 24 hours.Defaults to 1 hour - and must be at least 10 minutes. - format: int64 - type: integer - path: - description: |- - path is the path relative to the mount point of the file to project the - token into. - type: string - required: - - path - type: object - type: object - type: array - x-kubernetes-list-type: atomic - type: object - quobyte: - description: quobyte represents a Quobyte mount - on the host that shares a pod's lifetime - properties: - group: - description: |- - group to map volume access to - Default is no group - type: string - readOnly: - description: |- - readOnly here will force the Quobyte volume to be mounted with read-only permissions. - Defaults to false. - type: boolean - registry: - description: |- - registry represents a single or multiple Quobyte Registry services - specified as a string as host:port pair (multiple entries are separated with commas) - which acts as the central registry for volumes - type: string - tenant: - description: |- - tenant owning the given Quobyte volume in the Backend - Used with dynamically provisioned Quobyte volumes, value is set by the plugin - type: string - user: - description: |- - user to map volume access to - Defaults to serivceaccount user - type: string - volume: - description: volume is a string that references - an already created Quobyte volume by name. - type: string - required: - - registry - - volume - type: object - rbd: - description: |- - rbd represents a Rados Block Device mount on the host that shares a pod's lifetime. - More info: https://examples.k8s.io/volumes/rbd/README.md - properties: - fsType: - description: |- - fsType is the filesystem type of the volume that you want to mount. - Tip: Ensure that the filesystem type is supported by the host operating system. - Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - More info: https://kubernetes.io/docs/concepts/storage/volumes#rbd - type: string - image: - description: |- - image is the rados image name. - More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it - type: string - keyring: - default: /etc/ceph/keyring - description: |- - keyring is the path to key ring for RBDUser. - Default is /etc/ceph/keyring. - More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it - type: string - monitors: - description: |- - monitors is a collection of Ceph monitors. - More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it - items: - type: string - type: array - x-kubernetes-list-type: atomic - pool: - default: rbd - description: |- - pool is the rados pool name. - Default is rbd. - More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it - type: string - readOnly: - description: |- - readOnly here will force the ReadOnly setting in VolumeMounts. - Defaults to false. - More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it - type: boolean - secretRef: - description: |- - secretRef is name of the authentication secret for RBDUser. If provided - overrides keyring. - Default is nil. - More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - type: object - x-kubernetes-map-type: atomic - user: - default: admin - description: |- - user is the rados user name. - Default is admin. - More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it - type: string - required: - - image - - monitors - type: object - scaleIO: - description: scaleIO represents a ScaleIO persistent - volume attached and mounted on Kubernetes nodes. - properties: - fsType: - default: xfs - description: |- - fsType is the filesystem type to mount. - Must be a filesystem type supported by the host operating system. - Ex. "ext4", "xfs", "ntfs". - Default is "xfs". - type: string - gateway: - description: gateway is the host address of - the ScaleIO API Gateway. - type: string - protectionDomain: - description: protectionDomain is the name - of the ScaleIO Protection Domain for the - configured storage. - type: string - readOnly: - description: |- - readOnly Defaults to false (read/write). ReadOnly here will force - the ReadOnly setting in VolumeMounts. - type: boolean - secretRef: - description: |- - secretRef references to the secret for ScaleIO user and other - sensitive information. If this is not provided, Login operation will fail. - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - type: object - x-kubernetes-map-type: atomic - sslEnabled: - description: sslEnabled Flag enable/disable - SSL communication with Gateway, default - false - type: boolean - storageMode: - default: ThinProvisioned - description: |- - storageMode indicates whether the storage for a volume should be ThickProvisioned or ThinProvisioned. - Default is ThinProvisioned. - type: string - storagePool: - description: storagePool is the ScaleIO Storage - Pool associated with the protection domain. - type: string - system: - description: system is the name of the storage - system as configured in ScaleIO. - type: string - volumeName: - description: |- - volumeName is the name of a volume already created in the ScaleIO system - that is associated with this volume source. - type: string - required: - - gateway - - secretRef - - system - type: object - secret: - description: |- - secret represents a secret that should populate this volume. - More info: https://kubernetes.io/docs/concepts/storage/volumes#secret - properties: - defaultMode: - description: |- - defaultMode is Optional: mode bits used to set permissions on created files by default. - Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. - YAML accepts both octal and decimal values, JSON requires decimal values - for mode bits. Defaults to 0644. - Directories within the path are not affected by this setting. - format: int32 - type: integer - items: - description: |- - items If unspecified, each key-value pair in the Data field of the referenced - Secret will be projected into the volume as a file whose name is the - key and content is the value. If specified, the listed keys will be - projected into the specified paths, and unlisted keys will not be - present. - items: - description: Maps a string key to a path - within a volume. - properties: - key: - description: key is the key to project. - type: string - mode: - description: |- - mode is Optional: mode bits used to set permissions on this file. - Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. - YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. - If not specified, the volume defaultMode will be used. - format: int32 - type: integer - path: - description: |- - path is the relative path of the file to map the key to. - May not be an absolute path. - May not contain the path element '..'. - May not start with the string '..'. - type: string - required: - - key - - path - type: object - type: array - x-kubernetes-list-type: atomic - optional: - description: optional field specify whether - the Secret or its keys must be defined - type: boolean - secretName: - description: |- - secretName is the name of the secret in the pod's namespace to use. - More info: https://kubernetes.io/docs/concepts/storage/volumes#secret - type: string - type: object - storageos: - description: storageOS represents a StorageOS - volume attached and mounted on Kubernetes nodes. - properties: - fsType: - description: |- - fsType is the filesystem type to mount. - Must be a filesystem type supported by the host operating system. - Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - type: string - readOnly: - description: |- - readOnly defaults to false (read/write). ReadOnly here will force - the ReadOnly setting in VolumeMounts. - type: boolean - secretRef: - description: |- - secretRef specifies the secret to use for obtaining the StorageOS API - credentials. If not specified, default values will be attempted. - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - type: object - x-kubernetes-map-type: atomic - volumeName: - description: |- - volumeName is the human-readable name of the StorageOS volume. Volume - names are only unique within a namespace. - type: string - volumeNamespace: - description: |- - volumeNamespace specifies the scope of the volume within StorageOS. If no - namespace is specified then the Pod's namespace will be used. This allows the - Kubernetes name scoping to be mirrored within StorageOS for tighter integration. - Set VolumeName to any name to override the default behaviour. - Set to "default" if you are not using namespaces within StorageOS. - type: string - type: object - vsphereVolume: - description: vsphereVolume represents a vSphere - volume attached and mounted on kubelets host - machine - properties: - fsType: - description: |- - fsType is filesystem type to mount. - Must be a filesystem type supported by the host operating system. - Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - type: string - storagePolicyID: - description: storagePolicyID is the storage - Policy Based Management (SPBM) profile ID - associated with the StoragePolicyName. - type: string - storagePolicyName: - description: storagePolicyName is the storage - Policy Based Management (SPBM) profile name. - type: string - volumePath: - description: volumePath is the path that identifies - vSphere volume vmdk - type: string - required: - - volumePath - type: object - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - required: - - containers - type: object - type: object - type: object - description: |- - A map of JAXReplicaType (type) to ReplicaSpec (value). Specifies the JAX cluster configuration. - For example, - { - "Worker": JAXReplicaSpec, - } - type: object - runPolicy: - description: |- - RunPolicy encapsulates various runtime policies of the distributed training - job, for example how to clean up resources and how long the job can stay - active. - properties: - activeDeadlineSeconds: - description: |- - Specifies the duration in seconds relative to the startTime that the job may be active - before the system tries to terminate it; value must be positive integer. - format: int64 - type: integer - backoffLimit: - description: Optional number of retries before marking this job - failed. - format: int32 - type: integer - cleanPodPolicy: - description: |- - CleanPodPolicy defines the policy to kill pods after the job completes. - Default to None. - type: string - managedBy: - description: |- - ManagedBy is used to indicate the controller or entity that manages a job. - The value must be either an empty, 'kubeflow.org/training-operator' or - 'kueue.x-k8s.io/multikueue'. - The training-operator reconciles a job which doesn't have this - field at all or the field value is the reserved string - 'kubeflow.org/training-operator', but delegates reconciling the job - with 'kueue.x-k8s. - type: string - schedulingPolicy: - description: SchedulingPolicy defines the policy related to scheduling, - e.g. gang-scheduling - properties: - minAvailable: - format: int32 - type: integer - minResources: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - type: object - priorityClass: - type: string - queue: - type: string - x-kubernetes-validations: - - message: spec.runPolicy.schedulingPolicy.queue is immutable - rule: self == oldSelf - scheduleTimeoutSeconds: - format: int32 - type: integer - type: object - suspend: - default: false - description: |- - suspend specifies whether the Job controller should create Pods or not. - If a Job is created with suspend set to true, no Pods are created by - the Job controller. If a Job is suspended after creation (i.e. the - flag goes from false to true), the Job controller will delete all - active Pods and PodGroups associated with this Job. - Users must design their workload to gracefully handle this. - type: boolean - ttlSecondsAfterFinished: - description: |- - TTLSecondsAfterFinished is the TTL to clean up jobs. - It may take extra ReconcilePeriod seconds for the cleanup, since - reconcile gets called periodically. - Default to infinite. - format: int32 - type: integer - type: object - required: - - jaxReplicaSpecs - type: object - status: - description: |- - Most recently observed status of the JAXJob. - Read-only (modified by the system). - properties: - completionTime: - description: |- - Represents time when the job was completed. It is not guaranteed to - be set in happens-before order across separate operations. - It is represented in RFC3339 form and is in UTC. - format: date-time - type: string - conditions: - description: Conditions is an array of current observed job conditions. - items: - description: JobCondition describes the state of the job at a certain - point. - properties: - lastTransitionTime: - description: Last time the condition transitioned from one status - to another. - format: date-time - type: string - lastUpdateTime: - description: The last time this condition was updated. - format: date-time - type: string - message: - description: A human readable message indicating details about - the transition. - type: string - reason: - description: The reason for the condition's last transition. - type: string - status: - description: Status of the condition, one of True, False, Unknown. - type: string - type: - description: Type of job condition. - type: string - required: - - status - - type - type: object - type: array - lastReconcileTime: - description: |- - Represents last time when the job was reconciled. It is not guaranteed to - be set in happens-before order across separate operations. - It is represented in RFC3339 form and is in UTC. - format: date-time - type: string - replicaStatuses: - additionalProperties: - description: ReplicaStatus represents the current observed state - of the replica. - properties: - active: - description: The number of actively running pods. - format: int32 - type: integer - failed: - description: The number of pods which reached phase Failed. - format: int32 - type: integer - labelSelector: - description: 'Deprecated: Use Selector instead' - properties: - matchExpressions: - description: matchExpressions is a list of label selector - requirements. The requirements are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the label key that the selector - applies to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - selector: - description: |- - A Selector is a label query over a set of resources. The result of matchLabels and - matchExpressions are ANDed. An empty Selector matches all objects. A null - Selector matches no objects. - type: string - succeeded: - description: The number of pods which reached phase Succeeded. - format: int32 - type: integer - type: object - description: |- - ReplicaStatuses is map of ReplicaType and ReplicaStatus, - specifies the status of each replica. - type: object - startTime: - description: |- - Represents time when the job was acknowledged by the job controller. - It is not guaranteed to be set in happens-before order across separate operations. - It is represented in RFC3339 form and is in UTC. - format: date-time - type: string - type: object - type: object - served: true - storage: true - subresources: - scale: - labelSelectorPath: .status.replicaStatuses.Worker.selector - specReplicasPath: .spec.jaxReplicaSpecs.Worker.replicas - statusReplicasPath: .status.replicaStatuses.Worker.active - status: {} diff --git a/manifests/base/crds/kubeflow.org_mpijobs.yaml b/manifests/base/crds/kubeflow.org_mpijobs.yaml deleted file mode 100644 index 017a97b71f..0000000000 --- a/manifests/base/crds/kubeflow.org_mpijobs.yaml +++ /dev/null @@ -1,7907 +0,0 @@ ---- -apiVersion: apiextensions.k8s.io/v1 -kind: CustomResourceDefinition -metadata: - annotations: - controller-gen.kubebuilder.io/version: v0.16.5 - name: mpijobs.kubeflow.org -spec: - group: kubeflow.org - names: - kind: MPIJob - listKind: MPIJobList - plural: mpijobs - singular: mpijob - scope: Namespaced - versions: - - additionalPrinterColumns: - - jsonPath: .metadata.creationTimestamp - name: Age - type: date - - jsonPath: .status.conditions[-1:].type - name: State - type: string - name: v1 - schema: - openAPIV3Schema: - properties: - apiVersion: - description: |- - APIVersion defines the versioned schema of this representation of an object. - Servers should convert recognized schemas to the latest internal value, and - may reject unrecognized values. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources - type: string - kind: - description: |- - Kind is a string value representing the REST resource this object represents. - Servers may infer this from the endpoint the client submits requests to. - Cannot be updated. - In CamelCase. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds - type: string - metadata: - type: object - spec: - properties: - cleanPodPolicy: - description: |- - CleanPodPolicy defines the policy that whether to kill pods after the job completes. - Defaults to None. - type: string - mainContainer: - description: |- - MainContainer specifies name of the main container which - executes the MPI code. - type: string - mpiReplicaSpecs: - additionalProperties: - description: ReplicaSpec is a description of the replica - properties: - replicas: - description: |- - Replicas is the desired number of replicas of the given template. - If unspecified, defaults to 1. - format: int32 - type: integer - restartPolicy: - description: |- - Restart policy for all replicas within the job. - One of Always, OnFailure, Never and ExitCode. - Default to Never. - type: string - template: - description: |- - Template is the object that describes the pod that - will be created for this replica. RestartPolicy in PodTemplateSpec - will be overide by RestartPolicy in ReplicaSpec - properties: - metadata: - description: |- - Standard object's metadata. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#metadata - properties: - annotations: - additionalProperties: - type: string - type: object - finalizers: - items: - type: string - type: array - labels: - additionalProperties: - type: string - type: object - name: - type: string - namespace: - type: string - type: object - spec: - description: |- - Specification of the desired behavior of the pod. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#spec-and-status - properties: - activeDeadlineSeconds: - description: |- - Optional duration in seconds the pod may be active on the node relative to - StartTime before the system will actively try to mark it failed and kill associated containers. - Value must be a positive integer. - format: int64 - type: integer - affinity: - description: If specified, the pod's scheduling constraints - properties: - nodeAffinity: - description: Describes node affinity scheduling - rules for the pod. - properties: - preferredDuringSchedulingIgnoredDuringExecution: - description: |- - The scheduler will prefer to schedule pods to nodes that satisfy - the affinity expressions specified by this field, but it may choose - a node that violates one or more of the expressions. The node that is - most preferred is the one with the greatest sum of weights, i.e. - for each node that meets all of the scheduling requirements (resource - request, requiredDuringScheduling affinity expressions, etc. - items: - description: |- - An empty preferred scheduling term matches all objects with implicit weight 0 - (i.e. it's a no-op). A null preferred scheduling term matches no objects (i.e. is also a no-op). - properties: - preference: - description: A node selector term, associated - with the corresponding weight. - properties: - matchExpressions: - description: A list of node selector - requirements by node's labels. - items: - description: |- - A node selector requirement is a selector that contains values, a key, and an operator - that relates the key and values. - properties: - key: - description: The label key that - the selector applies to. - type: string - operator: - description: |- - Represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. - type: string - values: - description: |- - An array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. If the operator is Gt or Lt, the values - array must have a single element, which will be interpreted as an integer. - This array is replaced during a strategic merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchFields: - description: A list of node selector - requirements by node's fields. - items: - description: |- - A node selector requirement is a selector that contains values, a key, and an operator - that relates the key and values. - properties: - key: - description: The label key that - the selector applies to. - type: string - operator: - description: |- - Represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. - type: string - values: - description: |- - An array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. If the operator is Gt or Lt, the values - array must have a single element, which will be interpreted as an integer. - This array is replaced during a strategic merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - type: object - x-kubernetes-map-type: atomic - weight: - description: Weight associated with matching - the corresponding nodeSelectorTerm, - in the range 1-100. - format: int32 - type: integer - required: - - preference - - weight - type: object - type: array - x-kubernetes-list-type: atomic - requiredDuringSchedulingIgnoredDuringExecution: - description: |- - If the affinity requirements specified by this field are not met at - scheduling time, the pod will not be scheduled onto the node. - If the affinity requirements specified by this field cease to be met - at some point during pod execution (e.g. due to an update), the system - may or may not try to eventually evict the pod from its node. - properties: - nodeSelectorTerms: - description: Required. A list of node selector - terms. The terms are ORed. - items: - description: |- - A null or empty node selector term matches no objects. The requirements of - them are ANDed. - The TopologySelectorTerm type implements a subset of the NodeSelectorTerm. - properties: - matchExpressions: - description: A list of node selector - requirements by node's labels. - items: - description: |- - A node selector requirement is a selector that contains values, a key, and an operator - that relates the key and values. - properties: - key: - description: The label key that - the selector applies to. - type: string - operator: - description: |- - Represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. - type: string - values: - description: |- - An array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. If the operator is Gt or Lt, the values - array must have a single element, which will be interpreted as an integer. - This array is replaced during a strategic merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchFields: - description: A list of node selector - requirements by node's fields. - items: - description: |- - A node selector requirement is a selector that contains values, a key, and an operator - that relates the key and values. - properties: - key: - description: The label key that - the selector applies to. - type: string - operator: - description: |- - Represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. - type: string - values: - description: |- - An array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. If the operator is Gt or Lt, the values - array must have a single element, which will be interpreted as an integer. - This array is replaced during a strategic merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - type: object - x-kubernetes-map-type: atomic - type: array - x-kubernetes-list-type: atomic - required: - - nodeSelectorTerms - type: object - x-kubernetes-map-type: atomic - type: object - podAffinity: - description: Describes pod affinity scheduling rules - (e.g. co-locate this pod in the same node, zone, - etc. as some other pod(s)). - properties: - preferredDuringSchedulingIgnoredDuringExecution: - description: |- - The scheduler will prefer to schedule pods to nodes that satisfy - the affinity expressions specified by this field, but it may choose - a node that violates one or more of the expressions. The node that is - most preferred is the one with the greatest sum of weights, i.e. - for each node that meets all of the scheduling requirements (resource - request, requiredDuringScheduling affinity expressions, etc. - items: - description: The weights of all of the matched - WeightedPodAffinityTerm fields are added - per-node to find the most preferred node(s) - properties: - podAffinityTerm: - description: Required. A pod affinity - term, associated with the corresponding - weight. - properties: - labelSelector: - description: |- - A label query over a set of resources, in this case pods. - If it's null, this PodAffinityTerm matches with no Pods. - properties: - matchExpressions: - description: matchExpressions - is a list of label selector - requirements. The requirements - are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the - label key that the selector - applies to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - matchLabelKeys: - description: |- - MatchLabelKeys is a set of pod label keys to select which pods will - be taken into consideration. The keys are used to lookup values from the - incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` - to select the group of existing pods which pods will be taken into consideration - for the incoming pod's pod (anti) affinity. - items: - type: string - type: array - x-kubernetes-list-type: atomic - mismatchLabelKeys: - description: |- - MismatchLabelKeys is a set of pod label keys to select which pods will - be taken into consideration. The keys are used to lookup values from the - incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` - to select the group of existing pods which pods will be taken into consideration - for the incoming pod's pod (anti) affinity. - items: - type: string - type: array - x-kubernetes-list-type: atomic - namespaceSelector: - description: |- - A label query over the set of namespaces that the term applies to. - The term is applied to the union of the namespaces selected by this field - and the ones listed in the namespaces field. - null selector and null or empty namespaces list means "this pod's namespace". - An empty selector ({}) matches all namespaces. - properties: - matchExpressions: - description: matchExpressions - is a list of label selector - requirements. The requirements - are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the - label key that the selector - applies to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - namespaces: - description: |- - namespaces specifies a static list of namespace names that the term applies to. - The term is applied to the union of the namespaces listed in this field - and the ones selected by namespaceSelector. - null or empty namespaces list and null namespaceSelector means "this pod's namespace". - items: - type: string - type: array - x-kubernetes-list-type: atomic - topologyKey: - description: |- - This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching - the labelSelector in the specified namespaces, where co-located is defined as running on a node - whose value of the label with key topologyKey matches that of any node on which any of the - selected pods is running. - Empty topologyKey is not allowed. - type: string - required: - - topologyKey - type: object - weight: - description: |- - weight associated with matching the corresponding podAffinityTerm, - in the range 1-100. - format: int32 - type: integer - required: - - podAffinityTerm - - weight - type: object - type: array - x-kubernetes-list-type: atomic - requiredDuringSchedulingIgnoredDuringExecution: - description: |- - If the affinity requirements specified by this field are not met at - scheduling time, the pod will not be scheduled onto the node. - If the affinity requirements specified by this field cease to be met - at some point during pod execution (e.g. due to a pod label update), the - system may or may not try to eventually evict the pod from its node. - items: - description: |- - Defines a set of pods (namely those matching the labelSelector - relative to the given namespace(s)) that this pod should be - co-located (affinity) or not co-located (anti-affinity) with, - where co-located is defined as running on a node whose value of - the label with key matches that of any node on which - a pod of the set of pods is running - properties: - labelSelector: - description: |- - A label query over a set of resources, in this case pods. - If it's null, this PodAffinityTerm matches with no Pods. - properties: - matchExpressions: - description: matchExpressions is a - list of label selector requirements. - The requirements are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the label - key that the selector applies - to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - matchLabelKeys: - description: |- - MatchLabelKeys is a set of pod label keys to select which pods will - be taken into consideration. The keys are used to lookup values from the - incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` - to select the group of existing pods which pods will be taken into consideration - for the incoming pod's pod (anti) affinity. - items: - type: string - type: array - x-kubernetes-list-type: atomic - mismatchLabelKeys: - description: |- - MismatchLabelKeys is a set of pod label keys to select which pods will - be taken into consideration. The keys are used to lookup values from the - incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` - to select the group of existing pods which pods will be taken into consideration - for the incoming pod's pod (anti) affinity. - items: - type: string - type: array - x-kubernetes-list-type: atomic - namespaceSelector: - description: |- - A label query over the set of namespaces that the term applies to. - The term is applied to the union of the namespaces selected by this field - and the ones listed in the namespaces field. - null selector and null or empty namespaces list means "this pod's namespace". - An empty selector ({}) matches all namespaces. - properties: - matchExpressions: - description: matchExpressions is a - list of label selector requirements. - The requirements are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the label - key that the selector applies - to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - namespaces: - description: |- - namespaces specifies a static list of namespace names that the term applies to. - The term is applied to the union of the namespaces listed in this field - and the ones selected by namespaceSelector. - null or empty namespaces list and null namespaceSelector means "this pod's namespace". - items: - type: string - type: array - x-kubernetes-list-type: atomic - topologyKey: - description: |- - This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching - the labelSelector in the specified namespaces, where co-located is defined as running on a node - whose value of the label with key topologyKey matches that of any node on which any of the - selected pods is running. - Empty topologyKey is not allowed. - type: string - required: - - topologyKey - type: object - type: array - x-kubernetes-list-type: atomic - type: object - podAntiAffinity: - description: Describes pod anti-affinity scheduling - rules (e.g. avoid putting this pod in the same - node, zone, etc. as some other pod(s)). - properties: - preferredDuringSchedulingIgnoredDuringExecution: - description: |- - The scheduler will prefer to schedule pods to nodes that satisfy - the anti-affinity expressions specified by this field, but it may choose - a node that violates one or more of the expressions. The node that is - most preferred is the one with the greatest sum of weights, i.e. - items: - description: The weights of all of the matched - WeightedPodAffinityTerm fields are added - per-node to find the most preferred node(s) - properties: - podAffinityTerm: - description: Required. A pod affinity - term, associated with the corresponding - weight. - properties: - labelSelector: - description: |- - A label query over a set of resources, in this case pods. - If it's null, this PodAffinityTerm matches with no Pods. - properties: - matchExpressions: - description: matchExpressions - is a list of label selector - requirements. The requirements - are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the - label key that the selector - applies to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - matchLabelKeys: - description: |- - MatchLabelKeys is a set of pod label keys to select which pods will - be taken into consideration. The keys are used to lookup values from the - incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` - to select the group of existing pods which pods will be taken into consideration - for the incoming pod's pod (anti) affinity. - items: - type: string - type: array - x-kubernetes-list-type: atomic - mismatchLabelKeys: - description: |- - MismatchLabelKeys is a set of pod label keys to select which pods will - be taken into consideration. The keys are used to lookup values from the - incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` - to select the group of existing pods which pods will be taken into consideration - for the incoming pod's pod (anti) affinity. - items: - type: string - type: array - x-kubernetes-list-type: atomic - namespaceSelector: - description: |- - A label query over the set of namespaces that the term applies to. - The term is applied to the union of the namespaces selected by this field - and the ones listed in the namespaces field. - null selector and null or empty namespaces list means "this pod's namespace". - An empty selector ({}) matches all namespaces. - properties: - matchExpressions: - description: matchExpressions - is a list of label selector - requirements. The requirements - are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the - label key that the selector - applies to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - namespaces: - description: |- - namespaces specifies a static list of namespace names that the term applies to. - The term is applied to the union of the namespaces listed in this field - and the ones selected by namespaceSelector. - null or empty namespaces list and null namespaceSelector means "this pod's namespace". - items: - type: string - type: array - x-kubernetes-list-type: atomic - topologyKey: - description: |- - This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching - the labelSelector in the specified namespaces, where co-located is defined as running on a node - whose value of the label with key topologyKey matches that of any node on which any of the - selected pods is running. - Empty topologyKey is not allowed. - type: string - required: - - topologyKey - type: object - weight: - description: |- - weight associated with matching the corresponding podAffinityTerm, - in the range 1-100. - format: int32 - type: integer - required: - - podAffinityTerm - - weight - type: object - type: array - x-kubernetes-list-type: atomic - requiredDuringSchedulingIgnoredDuringExecution: - description: |- - If the anti-affinity requirements specified by this field are not met at - scheduling time, the pod will not be scheduled onto the node. - If the anti-affinity requirements specified by this field cease to be met - at some point during pod execution (e.g. due to a pod label update), the - system may or may not try to eventually evict the pod from its node. - items: - description: |- - Defines a set of pods (namely those matching the labelSelector - relative to the given namespace(s)) that this pod should be - co-located (affinity) or not co-located (anti-affinity) with, - where co-located is defined as running on a node whose value of - the label with key matches that of any node on which - a pod of the set of pods is running - properties: - labelSelector: - description: |- - A label query over a set of resources, in this case pods. - If it's null, this PodAffinityTerm matches with no Pods. - properties: - matchExpressions: - description: matchExpressions is a - list of label selector requirements. - The requirements are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the label - key that the selector applies - to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - matchLabelKeys: - description: |- - MatchLabelKeys is a set of pod label keys to select which pods will - be taken into consideration. The keys are used to lookup values from the - incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` - to select the group of existing pods which pods will be taken into consideration - for the incoming pod's pod (anti) affinity. - items: - type: string - type: array - x-kubernetes-list-type: atomic - mismatchLabelKeys: - description: |- - MismatchLabelKeys is a set of pod label keys to select which pods will - be taken into consideration. The keys are used to lookup values from the - incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` - to select the group of existing pods which pods will be taken into consideration - for the incoming pod's pod (anti) affinity. - items: - type: string - type: array - x-kubernetes-list-type: atomic - namespaceSelector: - description: |- - A label query over the set of namespaces that the term applies to. - The term is applied to the union of the namespaces selected by this field - and the ones listed in the namespaces field. - null selector and null or empty namespaces list means "this pod's namespace". - An empty selector ({}) matches all namespaces. - properties: - matchExpressions: - description: matchExpressions is a - list of label selector requirements. - The requirements are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the label - key that the selector applies - to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - namespaces: - description: |- - namespaces specifies a static list of namespace names that the term applies to. - The term is applied to the union of the namespaces listed in this field - and the ones selected by namespaceSelector. - null or empty namespaces list and null namespaceSelector means "this pod's namespace". - items: - type: string - type: array - x-kubernetes-list-type: atomic - topologyKey: - description: |- - This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching - the labelSelector in the specified namespaces, where co-located is defined as running on a node - whose value of the label with key topologyKey matches that of any node on which any of the - selected pods is running. - Empty topologyKey is not allowed. - type: string - required: - - topologyKey - type: object - type: array - x-kubernetes-list-type: atomic - type: object - type: object - automountServiceAccountToken: - description: AutomountServiceAccountToken indicates - whether a service account token should be automatically - mounted. - type: boolean - containers: - description: |- - List of containers belonging to the pod. - Containers cannot currently be added or removed. - There must be at least one container in a Pod. - Cannot be updated. - items: - description: A single application container that you - want to run within a pod. - properties: - args: - description: |- - Arguments to the entrypoint. - The container image's CMD is used if this is not provided. - Variable references $(VAR_NAME) are expanded using the container's environment. If a variable - cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced - to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. - items: - type: string - type: array - x-kubernetes-list-type: atomic - command: - description: |- - Entrypoint array. Not executed within a shell. - The container image's ENTRYPOINT is used if this is not provided. - Variable references $(VAR_NAME) are expanded using the container's environment. If a variable - cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced - to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. - items: - type: string - type: array - x-kubernetes-list-type: atomic - env: - description: |- - List of environment variables to set in the container. - Cannot be updated. - items: - description: EnvVar represents an environment - variable present in a Container. - properties: - name: - description: Name of the environment variable. - Must be a C_IDENTIFIER. - type: string - value: - description: |- - Variable references $(VAR_NAME) are expanded - using the previously defined environment variables in the container and - any service environment variables. If a variable cannot be resolved, - the reference in the input string will be unchanged. Double $$ are reduced - to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. - "$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)". - type: string - valueFrom: - description: Source for the environment - variable's value. Cannot be used if value - is not empty. - properties: - configMapKeyRef: - description: Selects a key of a ConfigMap. - properties: - key: - description: The key to select. - type: string - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the - ConfigMap or its key must be defined - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - fieldRef: - description: |- - Selects a field of the pod: supports metadata.name, metadata.namespace, `metadata.labels['']`, `metadata.annotations['']`, - spec.nodeName, spec.serviceAccountName, status.hostIP, status.podIP, status.podIPs. - properties: - apiVersion: - description: Version of the schema - the FieldPath is written in terms - of, defaults to "v1". - type: string - fieldPath: - description: Path of the field to - select in the specified API version. - type: string - required: - - fieldPath - type: object - x-kubernetes-map-type: atomic - resourceFieldRef: - description: |- - Selects a resource of the container: only resources limits and requests - (limits.cpu, limits.memory, limits.ephemeral-storage, requests.cpu, requests.memory and requests.ephemeral-storage) are currently supported. - properties: - containerName: - description: 'Container name: required - for volumes, optional for env - vars' - type: string - divisor: - anyOf: - - type: integer - - type: string - description: Specifies the output - format of the exposed resources, - defaults to "1" - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - resource: - description: 'Required: resource - to select' - type: string - required: - - resource - type: object - x-kubernetes-map-type: atomic - secretKeyRef: - description: Selects a key of a secret - in the pod's namespace - properties: - key: - description: The key of the secret - to select from. Must be a valid - secret key. - type: string - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the - Secret or its key must be defined - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - type: object - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - envFrom: - description: |- - List of sources to populate environment variables in the container. - The keys defined within a source must be a C_IDENTIFIER. All invalid keys - will be reported as an event when the container is starting. When a key exists in multiple - sources, the value associated with the last source will take precedence. - Values defined by an Env with a duplicate key will take precedence. - Cannot be updated. - items: - description: EnvFromSource represents the source - of a set of ConfigMaps - properties: - configMapRef: - description: The ConfigMap to select from - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the ConfigMap - must be defined - type: boolean - type: object - x-kubernetes-map-type: atomic - prefix: - description: An optional identifier to prepend - to each key in the ConfigMap. Must be - a C_IDENTIFIER. - type: string - secretRef: - description: The Secret to select from - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the Secret - must be defined - type: boolean - type: object - x-kubernetes-map-type: atomic - type: object - type: array - x-kubernetes-list-type: atomic - image: - description: |- - Container image name. - More info: https://kubernetes.io/docs/concepts/containers/images - This field is optional to allow higher level config management to default or override - container images in workload controllers like Deployments and StatefulSets. - type: string - imagePullPolicy: - description: |- - Image pull policy. - One of Always, Never, IfNotPresent. - Defaults to Always if :latest tag is specified, or IfNotPresent otherwise. - Cannot be updated. - More info: https://kubernetes.io/docs/concepts/containers/images#updating-images - type: string - lifecycle: - description: |- - Actions that the management system should take in response to container lifecycle events. - Cannot be updated. - properties: - postStart: - description: |- - PostStart is called immediately after a container is created. If the handler fails, - the container is terminated and restarted according to its restart policy. - Other management of the container blocks until the hook completes. - More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks - properties: - exec: - description: Exec specifies the action - to take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - httpGet: - description: HTTPGet specifies the http - request to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set - in the request. HTTP allows repeated - headers. - items: - description: HTTPHeader describes - a custom header to be used in - HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field - value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the - HTTP server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - sleep: - description: Sleep represents the duration - that the container should sleep before - being terminated. - properties: - seconds: - description: Seconds is the number - of seconds to sleep. - format: int64 - type: integer - required: - - seconds - type: object - tcpSocket: - description: |- - Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept - for the backward compatibility. There are no validation of this field and - lifecycle hooks will fail in runtime when tcp handler is specified. - properties: - host: - description: 'Optional: Host name - to connect to, defaults to the pod - IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - type: object - preStop: - description: |- - PreStop is called immediately before a container is terminated due to an - API request or management event such as liveness/startup probe failure, - preemption, resource contention, etc. The handler is not called if the - container crashes or exits. The Pod's termination grace period countdown begins before the - PreStop hook is executed. - properties: - exec: - description: Exec specifies the action - to take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - httpGet: - description: HTTPGet specifies the http - request to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set - in the request. HTTP allows repeated - headers. - items: - description: HTTPHeader describes - a custom header to be used in - HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field - value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the - HTTP server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - sleep: - description: Sleep represents the duration - that the container should sleep before - being terminated. - properties: - seconds: - description: Seconds is the number - of seconds to sleep. - format: int64 - type: integer - required: - - seconds - type: object - tcpSocket: - description: |- - Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept - for the backward compatibility. There are no validation of this field and - lifecycle hooks will fail in runtime when tcp handler is specified. - properties: - host: - description: 'Optional: Host name - to connect to, defaults to the pod - IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - type: object - type: object - livenessProbe: - description: |- - Periodic probe of container liveness. - Container will be restarted if the probe fails. - Cannot be updated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - properties: - exec: - description: Exec specifies the action to - take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - failureThreshold: - description: |- - Minimum consecutive failures for the probe to be considered failed after having succeeded. - Defaults to 3. Minimum value is 1. - format: int32 - type: integer - grpc: - description: GRPC specifies an action involving - a GRPC port. - properties: - port: - description: Port number of the gRPC service. - Number must be in the range 1 to 65535. - format: int32 - type: integer - service: - default: "" - description: |- - Service is the name of the service to place in the gRPC HealthCheckRequest - (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - - If this is not specified, the default behavior is defined by gRPC. - type: string - required: - - port - type: object - httpGet: - description: HTTPGet specifies the http request - to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set in - the request. HTTP allows repeated headers. - items: - description: HTTPHeader describes a - custom header to be used in HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the HTTP - server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - initialDelaySeconds: - description: |- - Number of seconds after the container has started before liveness probes are initiated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - periodSeconds: - description: |- - How often (in seconds) to perform the probe. - Default to 10 seconds. Minimum value is 1. - format: int32 - type: integer - successThreshold: - description: |- - Minimum consecutive successes for the probe to be considered successful after having failed. - Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. - format: int32 - type: integer - tcpSocket: - description: TCPSocket specifies an action - involving a TCP port. - properties: - host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - description: |- - Optional duration in seconds the pod needs to terminate gracefully upon probe failure. - The grace period is the duration in seconds after the processes running in the pod are sent - a termination signal and the time when the processes are forcibly halted with a kill signal. - Set this value longer than the expected cleanup time for your process. - format: int64 - type: integer - timeoutSeconds: - description: |- - Number of seconds after which the probe times out. - Defaults to 1 second. Minimum value is 1. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - type: object - name: - description: |- - Name of the container specified as a DNS_LABEL. - Each container in a pod must have a unique name (DNS_LABEL). - Cannot be updated. - type: string - ports: - description: |- - List of ports to expose from the container. Not specifying a port here - DOES NOT prevent that port from being exposed. Any port which is - listening on the default "0.0.0.0" address inside a container will be - accessible from the network. - Modifying this array with strategic merge patch may corrupt the data. - For more information See https://github.com/kubernetes/kubernetes/issues/108255. - items: - description: ContainerPort represents a network - port in a single container. - properties: - containerPort: - description: |- - Number of port to expose on the pod's IP address. - This must be a valid port number, 0 < x < 65536. - format: int32 - type: integer - hostIP: - description: What host IP to bind the external - port to. - type: string - hostPort: - description: |- - Number of port to expose on the host. - If specified, this must be a valid port number, 0 < x < 65536. - If HostNetwork is specified, this must match ContainerPort. - Most containers do not need this. - format: int32 - type: integer - name: - description: |- - If specified, this must be an IANA_SVC_NAME and unique within the pod. Each - named port in a pod must have a unique name. Name for the port that can be - referred to by services. - type: string - protocol: - default: TCP - description: |- - Protocol for port. Must be UDP, TCP, or SCTP. - Defaults to "TCP". - type: string - required: - - containerPort - type: object - type: array - x-kubernetes-list-map-keys: - - containerPort - - protocol - x-kubernetes-list-type: map - readinessProbe: - description: |- - Periodic probe of container service readiness. - Container will be removed from service endpoints if the probe fails. - Cannot be updated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - properties: - exec: - description: Exec specifies the action to - take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - failureThreshold: - description: |- - Minimum consecutive failures for the probe to be considered failed after having succeeded. - Defaults to 3. Minimum value is 1. - format: int32 - type: integer - grpc: - description: GRPC specifies an action involving - a GRPC port. - properties: - port: - description: Port number of the gRPC service. - Number must be in the range 1 to 65535. - format: int32 - type: integer - service: - default: "" - description: |- - Service is the name of the service to place in the gRPC HealthCheckRequest - (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - - If this is not specified, the default behavior is defined by gRPC. - type: string - required: - - port - type: object - httpGet: - description: HTTPGet specifies the http request - to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set in - the request. HTTP allows repeated headers. - items: - description: HTTPHeader describes a - custom header to be used in HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the HTTP - server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - initialDelaySeconds: - description: |- - Number of seconds after the container has started before liveness probes are initiated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - periodSeconds: - description: |- - How often (in seconds) to perform the probe. - Default to 10 seconds. Minimum value is 1. - format: int32 - type: integer - successThreshold: - description: |- - Minimum consecutive successes for the probe to be considered successful after having failed. - Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. - format: int32 - type: integer - tcpSocket: - description: TCPSocket specifies an action - involving a TCP port. - properties: - host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - description: |- - Optional duration in seconds the pod needs to terminate gracefully upon probe failure. - The grace period is the duration in seconds after the processes running in the pod are sent - a termination signal and the time when the processes are forcibly halted with a kill signal. - Set this value longer than the expected cleanup time for your process. - format: int64 - type: integer - timeoutSeconds: - description: |- - Number of seconds after which the probe times out. - Defaults to 1 second. Minimum value is 1. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - type: object - resizePolicy: - description: Resources resize policy for the container. - items: - description: ContainerResizePolicy represents - resource resize policy for the container. - properties: - resourceName: - description: |- - Name of the resource to which this resource resize policy applies. - Supported values: cpu, memory. - type: string - restartPolicy: - description: |- - Restart policy to apply when specified resource is resized. - If not specified, it defaults to NotRequired. - type: string - required: - - resourceName - - restartPolicy - type: object - type: array - x-kubernetes-list-type: atomic - resources: - description: |- - Compute Resources required by this container. - Cannot be updated. - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - properties: - claims: - description: |- - Claims lists the names of resources, defined in spec.resourceClaims, - that are used by this container. - - This is an alpha field and requires enabling the - DynamicResourceAllocation feature gate. - - This field is immutable. It can only be set for containers. - items: - description: ResourceClaim references one - entry in PodSpec.ResourceClaims. - properties: - name: - description: |- - Name must match the name of one entry in pod.spec.resourceClaims of - the Pod where this field is used. It makes that resource available - inside a container. - type: string - request: - description: |- - Request is the name chosen for a request in the referenced claim. - If empty, everything from the claim is made available, otherwise - only the result of this request. - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - limits: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: |- - Limits describes the maximum amount of compute resources allowed. - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - type: object - requests: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: |- - Requests describes the minimum amount of compute resources required. - If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, - otherwise to an implementation-defined value. Requests cannot exceed Limits. - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - type: object - type: object - restartPolicy: - description: |- - RestartPolicy defines the restart behavior of individual containers in a pod. - This field may only be set for init containers, and the only allowed value is "Always". - For non-init containers or when this field is not specified, - the restart behavior is defined by the Pod's restart policy and the container type. - type: string - securityContext: - description: |- - SecurityContext defines the security options the container should be run with. - If set, the fields of SecurityContext override the equivalent fields of PodSecurityContext. - More info: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/ - properties: - allowPrivilegeEscalation: - description: |- - AllowPrivilegeEscalation controls whether a process can gain more - privileges than its parent process. This bool directly controls if - the no_new_privs flag will be set on the container process. - AllowPrivilegeEscalation is true always when the container is: - 1) run as Privileged - 2) has CAP_SYS_ADMIN - Note that this field cannot be set when spec.os.name is windows. - type: boolean - appArmorProfile: - description: |- - appArmorProfile is the AppArmor options to use by this container. If set, this profile - overrides the pod's appArmorProfile. - Note that this field cannot be set when spec.os.name is windows. - properties: - localhostProfile: - description: |- - localhostProfile indicates a profile loaded on the node that should be used. - The profile must be preconfigured on the node to work. - Must match the loaded name of the profile. - Must be set if and only if type is "Localhost". - type: string - type: - description: |- - type indicates which kind of AppArmor profile will be applied. - Valid options are: - Localhost - a profile pre-loaded on the node. - RuntimeDefault - the container runtime's default profile. - Unconfined - no AppArmor enforcement. - type: string - required: - - type - type: object - capabilities: - description: |- - The capabilities to add/drop when running containers. - Defaults to the default set of capabilities granted by the container runtime. - Note that this field cannot be set when spec.os.name is windows. - properties: - add: - description: Added capabilities - items: - description: Capability represent POSIX - capabilities type - type: string - type: array - x-kubernetes-list-type: atomic - drop: - description: Removed capabilities - items: - description: Capability represent POSIX - capabilities type - type: string - type: array - x-kubernetes-list-type: atomic - type: object - privileged: - description: |- - Run container in privileged mode. - Processes in privileged containers are essentially equivalent to root on the host. - Defaults to false. - Note that this field cannot be set when spec.os.name is windows. - type: boolean - procMount: - description: |- - procMount denotes the type of proc mount to use for the containers. - The default value is Default which uses the container runtime defaults for - readonly paths and masked paths. - This requires the ProcMountType feature flag to be enabled. - Note that this field cannot be set when spec.os.name is windows. - type: string - readOnlyRootFilesystem: - description: |- - Whether this container has a read-only root filesystem. - Default is false. - Note that this field cannot be set when spec.os.name is windows. - type: boolean - runAsGroup: - description: |- - The GID to run the entrypoint of the container process. - Uses runtime default if unset. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is windows. - format: int64 - type: integer - runAsNonRoot: - description: |- - Indicates that the container must run as a non-root user. - If true, the Kubelet will validate the image at runtime to ensure that it - does not run as UID 0 (root) and fail to start the container if it does. - If unset or false, no such validation will be performed. - May also be set in PodSecurityContext. - type: boolean - runAsUser: - description: |- - The UID to run the entrypoint of the container process. - Defaults to user specified in image metadata if unspecified. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is windows. - format: int64 - type: integer - seLinuxOptions: - description: |- - The SELinux context to be applied to the container. - If unspecified, the container runtime will allocate a random SELinux context for each - container. May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is windows. - properties: - level: - description: Level is SELinux level label - that applies to the container. - type: string - role: - description: Role is a SELinux role label - that applies to the container. - type: string - type: - description: Type is a SELinux type label - that applies to the container. - type: string - user: - description: User is a SELinux user label - that applies to the container. - type: string - type: object - seccompProfile: - description: |- - The seccomp options to use by this container. If seccomp options are - provided at both the pod & container level, the container options - override the pod options. - Note that this field cannot be set when spec.os.name is windows. - properties: - localhostProfile: - description: |- - localhostProfile indicates a profile defined in a file on the node should be used. - The profile must be preconfigured on the node to work. - Must be a descending path, relative to the kubelet's configured seccomp profile location. - Must be set if type is "Localhost". Must NOT be set for any other type. - type: string - type: - description: |- - type indicates which kind of seccomp profile will be applied. - Valid options are: - - Localhost - a profile defined in a file on the node should be used. - RuntimeDefault - the container runtime default profile should be used. - Unconfined - no profile should be applied. - type: string - required: - - type - type: object - windowsOptions: - description: |- - The Windows specific settings applied to all containers. - If unspecified, the options from the PodSecurityContext will be used. - If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is linux. - properties: - gmsaCredentialSpec: - description: |- - GMSACredentialSpec is where the GMSA admission webhook - (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the - GMSA credential spec named by the GMSACredentialSpecName field. - type: string - gmsaCredentialSpecName: - description: GMSACredentialSpecName is - the name of the GMSA credential spec - to use. - type: string - hostProcess: - description: |- - HostProcess determines if a container should be run as a 'Host Process' container. - All of a Pod's containers must have the same effective HostProcess value - (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers). - In addition, if HostProcess is true then HostNetwork must also be set to true. - type: boolean - runAsUserName: - description: |- - The UserName in Windows to run the entrypoint of the container process. - Defaults to the user specified in image metadata if unspecified. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - type: string - type: object - type: object - startupProbe: - description: |- - StartupProbe indicates that the Pod has successfully initialized. - If specified, no other probes are executed until this completes successfully. - If this probe fails, the Pod will be restarted, just as if the livenessProbe failed. - properties: - exec: - description: Exec specifies the action to - take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - failureThreshold: - description: |- - Minimum consecutive failures for the probe to be considered failed after having succeeded. - Defaults to 3. Minimum value is 1. - format: int32 - type: integer - grpc: - description: GRPC specifies an action involving - a GRPC port. - properties: - port: - description: Port number of the gRPC service. - Number must be in the range 1 to 65535. - format: int32 - type: integer - service: - default: "" - description: |- - Service is the name of the service to place in the gRPC HealthCheckRequest - (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - - If this is not specified, the default behavior is defined by gRPC. - type: string - required: - - port - type: object - httpGet: - description: HTTPGet specifies the http request - to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set in - the request. HTTP allows repeated headers. - items: - description: HTTPHeader describes a - custom header to be used in HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the HTTP - server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - initialDelaySeconds: - description: |- - Number of seconds after the container has started before liveness probes are initiated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - periodSeconds: - description: |- - How often (in seconds) to perform the probe. - Default to 10 seconds. Minimum value is 1. - format: int32 - type: integer - successThreshold: - description: |- - Minimum consecutive successes for the probe to be considered successful after having failed. - Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. - format: int32 - type: integer - tcpSocket: - description: TCPSocket specifies an action - involving a TCP port. - properties: - host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - description: |- - Optional duration in seconds the pod needs to terminate gracefully upon probe failure. - The grace period is the duration in seconds after the processes running in the pod are sent - a termination signal and the time when the processes are forcibly halted with a kill signal. - Set this value longer than the expected cleanup time for your process. - format: int64 - type: integer - timeoutSeconds: - description: |- - Number of seconds after which the probe times out. - Defaults to 1 second. Minimum value is 1. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - type: object - stdin: - description: |- - Whether this container should allocate a buffer for stdin in the container runtime. If this - is not set, reads from stdin in the container will always result in EOF. - Default is false. - type: boolean - stdinOnce: - description: |- - Whether the container runtime should close the stdin channel after it has been opened by - a single attach. When stdin is true the stdin stream will remain open across multiple attach - sessions. - type: boolean - terminationMessagePath: - description: |- - Optional: Path at which the file to which the container's termination message - will be written is mounted into the container's filesystem. - Message written is intended to be brief final status, such as an assertion failure message. - Will be truncated by the node if greater than 4096 bytes. The total message length across - all containers will be limited to 12kb. - Defaults to /dev/termination-log. - type: string - terminationMessagePolicy: - description: |- - Indicate how the termination message should be populated. File will use the contents of - terminationMessagePath to populate the container status message on both success and failure. - FallbackToLogsOnError will use the last chunk of container log output if the termination - message file is empty and the container exited with an error. - type: string - tty: - description: |- - Whether this container should allocate a TTY for itself, also requires 'stdin' to be true. - Default is false. - type: boolean - volumeDevices: - description: volumeDevices is the list of block - devices to be used by the container. - items: - description: volumeDevice describes a mapping - of a raw block device within a container. - properties: - devicePath: - description: devicePath is the path inside - of the container that the device will - be mapped to. - type: string - name: - description: name must match the name of - a persistentVolumeClaim in the pod - type: string - required: - - devicePath - - name - type: object - type: array - x-kubernetes-list-map-keys: - - devicePath - x-kubernetes-list-type: map - volumeMounts: - description: |- - Pod volumes to mount into the container's filesystem. - Cannot be updated. - items: - description: VolumeMount describes a mounting - of a Volume within a container. - properties: - mountPath: - description: |- - Path within the container at which the volume should be mounted. Must - not contain ':'. - type: string - mountPropagation: - description: |- - mountPropagation determines how mounts are propagated from the host - to container and the other way around. - When not set, MountPropagationNone is used. - This field is beta in 1.10. - When RecursiveReadOnly is set to IfPossible or to Enabled, MountPropagation must be None or unspecified - (which defaults to None). - type: string - name: - description: This must match the Name of - a Volume. - type: string - readOnly: - description: |- - Mounted read-only if true, read-write otherwise (false or unspecified). - Defaults to false. - type: boolean - recursiveReadOnly: - description: |- - RecursiveReadOnly specifies whether read-only mounts should be handled - recursively. - - If ReadOnly is false, this field has no meaning and must be unspecified. - - If ReadOnly is true, and this field is set to Disabled, the mount is not made - recursively read-only. If this field is set to IfPossible, the mount is made - recursively read-only, if it is supported by the container runtime. - type: string - subPath: - description: |- - Path within the volume from which the container's volume should be mounted. - Defaults to "" (volume's root). - type: string - subPathExpr: - description: |- - Expanded path within the volume from which the container's volume should be mounted. - Behaves similarly to SubPath but environment variable references $(VAR_NAME) are expanded using the container's environment. - Defaults to "" (volume's root). - SubPathExpr and SubPath are mutually exclusive. - type: string - required: - - mountPath - - name - type: object - type: array - x-kubernetes-list-map-keys: - - mountPath - x-kubernetes-list-type: map - workingDir: - description: |- - Container's working directory. - If not specified, the container runtime's default will be used, which - might be configured in the container image. - Cannot be updated. - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - dnsConfig: - description: |- - Specifies the DNS parameters of a pod. - Parameters specified here will be merged to the generated DNS - configuration based on DNSPolicy. - properties: - nameservers: - description: |- - A list of DNS name server IP addresses. - This will be appended to the base nameservers generated from DNSPolicy. - Duplicated nameservers will be removed. - items: - type: string - type: array - x-kubernetes-list-type: atomic - options: - description: |- - A list of DNS resolver options. - This will be merged with the base options generated from DNSPolicy. - Duplicated entries will be removed. Resolution options given in Options - will override those that appear in the base DNSPolicy. - items: - description: PodDNSConfigOption defines DNS resolver - options of a pod. - properties: - name: - description: Required. - type: string - value: - type: string - type: object - type: array - x-kubernetes-list-type: atomic - searches: - description: |- - A list of DNS search domains for host-name lookup. - This will be appended to the base search paths generated from DNSPolicy. - Duplicated search paths will be removed. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - dnsPolicy: - description: |- - Set DNS policy for the pod. - Defaults to "ClusterFirst". - Valid values are 'ClusterFirstWithHostNet', 'ClusterFirst', 'Default' or 'None'. - DNS parameters given in DNSConfig will be merged with the policy selected with DNSPolicy. - To have DNS options set along with hostNetwork, you have to specify DNS policy - explicitly to 'ClusterFirstWithHostNet'. - type: string - enableServiceLinks: - description: |- - EnableServiceLinks indicates whether information about services should be injected into pod's - environment variables, matching the syntax of Docker links. - Optional: Defaults to true. - type: boolean - ephemeralContainers: - description: |- - List of ephemeral containers run in this pod. Ephemeral containers may be run in an existing - pod to perform user-initiated actions such as debugging. This list cannot be specified when - creating a pod, and it cannot be modified by updating the pod spec. In order to add an - ephemeral container to an existing pod, use the pod's ephemeralcontainers subresource. - items: - description: |- - An EphemeralContainer is a temporary container that you may add to an existing Pod for - user-initiated activities such as debugging. Ephemeral containers have no resource or - scheduling guarantees, and they will not be restarted when they exit or when a Pod is - removed or restarted. The kubelet may evict a Pod if an ephemeral container causes the - Pod to exceed its resource allocation. - properties: - args: - description: |- - Arguments to the entrypoint. - The image's CMD is used if this is not provided. - Variable references $(VAR_NAME) are expanded using the container's environment. If a variable - cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced - to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. "$$(VAR_NAME)" will - produce the string literal "$(VAR_NAME)". - items: - type: string - type: array - x-kubernetes-list-type: atomic - command: - description: |- - Entrypoint array. Not executed within a shell. - The image's ENTRYPOINT is used if this is not provided. - Variable references $(VAR_NAME) are expanded using the container's environment. If a variable - cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced - to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. - items: - type: string - type: array - x-kubernetes-list-type: atomic - env: - description: |- - List of environment variables to set in the container. - Cannot be updated. - items: - description: EnvVar represents an environment - variable present in a Container. - properties: - name: - description: Name of the environment variable. - Must be a C_IDENTIFIER. - type: string - value: - description: |- - Variable references $(VAR_NAME) are expanded - using the previously defined environment variables in the container and - any service environment variables. If a variable cannot be resolved, - the reference in the input string will be unchanged. Double $$ are reduced - to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. - "$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)". - type: string - valueFrom: - description: Source for the environment - variable's value. Cannot be used if value - is not empty. - properties: - configMapKeyRef: - description: Selects a key of a ConfigMap. - properties: - key: - description: The key to select. - type: string - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the - ConfigMap or its key must be defined - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - fieldRef: - description: |- - Selects a field of the pod: supports metadata.name, metadata.namespace, `metadata.labels['']`, `metadata.annotations['']`, - spec.nodeName, spec.serviceAccountName, status.hostIP, status.podIP, status.podIPs. - properties: - apiVersion: - description: Version of the schema - the FieldPath is written in terms - of, defaults to "v1". - type: string - fieldPath: - description: Path of the field to - select in the specified API version. - type: string - required: - - fieldPath - type: object - x-kubernetes-map-type: atomic - resourceFieldRef: - description: |- - Selects a resource of the container: only resources limits and requests - (limits.cpu, limits.memory, limits.ephemeral-storage, requests.cpu, requests.memory and requests.ephemeral-storage) are currently supported. - properties: - containerName: - description: 'Container name: required - for volumes, optional for env - vars' - type: string - divisor: - anyOf: - - type: integer - - type: string - description: Specifies the output - format of the exposed resources, - defaults to "1" - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - resource: - description: 'Required: resource - to select' - type: string - required: - - resource - type: object - x-kubernetes-map-type: atomic - secretKeyRef: - description: Selects a key of a secret - in the pod's namespace - properties: - key: - description: The key of the secret - to select from. Must be a valid - secret key. - type: string - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the - Secret or its key must be defined - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - type: object - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - envFrom: - description: |- - List of sources to populate environment variables in the container. - The keys defined within a source must be a C_IDENTIFIER. All invalid keys - will be reported as an event when the container is starting. When a key exists in multiple - sources, the value associated with the last source will take precedence. - Values defined by an Env with a duplicate key will take precedence. - Cannot be updated. - items: - description: EnvFromSource represents the source - of a set of ConfigMaps - properties: - configMapRef: - description: The ConfigMap to select from - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the ConfigMap - must be defined - type: boolean - type: object - x-kubernetes-map-type: atomic - prefix: - description: An optional identifier to prepend - to each key in the ConfigMap. Must be - a C_IDENTIFIER. - type: string - secretRef: - description: The Secret to select from - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the Secret - must be defined - type: boolean - type: object - x-kubernetes-map-type: atomic - type: object - type: array - x-kubernetes-list-type: atomic - image: - description: |- - Container image name. - More info: https://kubernetes.io/docs/concepts/containers/images - type: string - imagePullPolicy: - description: |- - Image pull policy. - One of Always, Never, IfNotPresent. - Defaults to Always if :latest tag is specified, or IfNotPresent otherwise. - Cannot be updated. - More info: https://kubernetes.io/docs/concepts/containers/images#updating-images - type: string - lifecycle: - description: Lifecycle is not allowed for ephemeral - containers. - properties: - postStart: - description: |- - PostStart is called immediately after a container is created. If the handler fails, - the container is terminated and restarted according to its restart policy. - Other management of the container blocks until the hook completes. - More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks - properties: - exec: - description: Exec specifies the action - to take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - httpGet: - description: HTTPGet specifies the http - request to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set - in the request. HTTP allows repeated - headers. - items: - description: HTTPHeader describes - a custom header to be used in - HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field - value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the - HTTP server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - sleep: - description: Sleep represents the duration - that the container should sleep before - being terminated. - properties: - seconds: - description: Seconds is the number - of seconds to sleep. - format: int64 - type: integer - required: - - seconds - type: object - tcpSocket: - description: |- - Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept - for the backward compatibility. There are no validation of this field and - lifecycle hooks will fail in runtime when tcp handler is specified. - properties: - host: - description: 'Optional: Host name - to connect to, defaults to the pod - IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - type: object - preStop: - description: |- - PreStop is called immediately before a container is terminated due to an - API request or management event such as liveness/startup probe failure, - preemption, resource contention, etc. The handler is not called if the - container crashes or exits. The Pod's termination grace period countdown begins before the - PreStop hook is executed. - properties: - exec: - description: Exec specifies the action - to take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - httpGet: - description: HTTPGet specifies the http - request to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set - in the request. HTTP allows repeated - headers. - items: - description: HTTPHeader describes - a custom header to be used in - HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field - value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the - HTTP server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - sleep: - description: Sleep represents the duration - that the container should sleep before - being terminated. - properties: - seconds: - description: Seconds is the number - of seconds to sleep. - format: int64 - type: integer - required: - - seconds - type: object - tcpSocket: - description: |- - Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept - for the backward compatibility. There are no validation of this field and - lifecycle hooks will fail in runtime when tcp handler is specified. - properties: - host: - description: 'Optional: Host name - to connect to, defaults to the pod - IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - type: object - type: object - livenessProbe: - description: Probes are not allowed for ephemeral - containers. - properties: - exec: - description: Exec specifies the action to - take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - failureThreshold: - description: |- - Minimum consecutive failures for the probe to be considered failed after having succeeded. - Defaults to 3. Minimum value is 1. - format: int32 - type: integer - grpc: - description: GRPC specifies an action involving - a GRPC port. - properties: - port: - description: Port number of the gRPC service. - Number must be in the range 1 to 65535. - format: int32 - type: integer - service: - default: "" - description: |- - Service is the name of the service to place in the gRPC HealthCheckRequest - (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - - If this is not specified, the default behavior is defined by gRPC. - type: string - required: - - port - type: object - httpGet: - description: HTTPGet specifies the http request - to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set in - the request. HTTP allows repeated headers. - items: - description: HTTPHeader describes a - custom header to be used in HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the HTTP - server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - initialDelaySeconds: - description: |- - Number of seconds after the container has started before liveness probes are initiated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - periodSeconds: - description: |- - How often (in seconds) to perform the probe. - Default to 10 seconds. Minimum value is 1. - format: int32 - type: integer - successThreshold: - description: |- - Minimum consecutive successes for the probe to be considered successful after having failed. - Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. - format: int32 - type: integer - tcpSocket: - description: TCPSocket specifies an action - involving a TCP port. - properties: - host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - description: |- - Optional duration in seconds the pod needs to terminate gracefully upon probe failure. - The grace period is the duration in seconds after the processes running in the pod are sent - a termination signal and the time when the processes are forcibly halted with a kill signal. - Set this value longer than the expected cleanup time for your process. - format: int64 - type: integer - timeoutSeconds: - description: |- - Number of seconds after which the probe times out. - Defaults to 1 second. Minimum value is 1. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - type: object - name: - description: |- - Name of the ephemeral container specified as a DNS_LABEL. - This name must be unique among all containers, init containers and ephemeral containers. - type: string - ports: - description: Ports are not allowed for ephemeral - containers. - items: - description: ContainerPort represents a network - port in a single container. - properties: - containerPort: - description: |- - Number of port to expose on the pod's IP address. - This must be a valid port number, 0 < x < 65536. - format: int32 - type: integer - hostIP: - description: What host IP to bind the external - port to. - type: string - hostPort: - description: |- - Number of port to expose on the host. - If specified, this must be a valid port number, 0 < x < 65536. - If HostNetwork is specified, this must match ContainerPort. - Most containers do not need this. - format: int32 - type: integer - name: - description: |- - If specified, this must be an IANA_SVC_NAME and unique within the pod. Each - named port in a pod must have a unique name. Name for the port that can be - referred to by services. - type: string - protocol: - default: TCP - description: |- - Protocol for port. Must be UDP, TCP, or SCTP. - Defaults to "TCP". - type: string - required: - - containerPort - type: object - type: array - x-kubernetes-list-map-keys: - - containerPort - - protocol - x-kubernetes-list-type: map - readinessProbe: - description: Probes are not allowed for ephemeral - containers. - properties: - exec: - description: Exec specifies the action to - take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - failureThreshold: - description: |- - Minimum consecutive failures for the probe to be considered failed after having succeeded. - Defaults to 3. Minimum value is 1. - format: int32 - type: integer - grpc: - description: GRPC specifies an action involving - a GRPC port. - properties: - port: - description: Port number of the gRPC service. - Number must be in the range 1 to 65535. - format: int32 - type: integer - service: - default: "" - description: |- - Service is the name of the service to place in the gRPC HealthCheckRequest - (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - - If this is not specified, the default behavior is defined by gRPC. - type: string - required: - - port - type: object - httpGet: - description: HTTPGet specifies the http request - to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set in - the request. HTTP allows repeated headers. - items: - description: HTTPHeader describes a - custom header to be used in HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the HTTP - server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - initialDelaySeconds: - description: |- - Number of seconds after the container has started before liveness probes are initiated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - periodSeconds: - description: |- - How often (in seconds) to perform the probe. - Default to 10 seconds. Minimum value is 1. - format: int32 - type: integer - successThreshold: - description: |- - Minimum consecutive successes for the probe to be considered successful after having failed. - Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. - format: int32 - type: integer - tcpSocket: - description: TCPSocket specifies an action - involving a TCP port. - properties: - host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - description: |- - Optional duration in seconds the pod needs to terminate gracefully upon probe failure. - The grace period is the duration in seconds after the processes running in the pod are sent - a termination signal and the time when the processes are forcibly halted with a kill signal. - Set this value longer than the expected cleanup time for your process. - format: int64 - type: integer - timeoutSeconds: - description: |- - Number of seconds after which the probe times out. - Defaults to 1 second. Minimum value is 1. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - type: object - resizePolicy: - description: Resources resize policy for the container. - items: - description: ContainerResizePolicy represents - resource resize policy for the container. - properties: - resourceName: - description: |- - Name of the resource to which this resource resize policy applies. - Supported values: cpu, memory. - type: string - restartPolicy: - description: |- - Restart policy to apply when specified resource is resized. - If not specified, it defaults to NotRequired. - type: string - required: - - resourceName - - restartPolicy - type: object - type: array - x-kubernetes-list-type: atomic - resources: - description: |- - Resources are not allowed for ephemeral containers. Ephemeral containers use spare resources - already allocated to the pod. - properties: - claims: - description: |- - Claims lists the names of resources, defined in spec.resourceClaims, - that are used by this container. - - This is an alpha field and requires enabling the - DynamicResourceAllocation feature gate. - - This field is immutable. It can only be set for containers. - items: - description: ResourceClaim references one - entry in PodSpec.ResourceClaims. - properties: - name: - description: |- - Name must match the name of one entry in pod.spec.resourceClaims of - the Pod where this field is used. It makes that resource available - inside a container. - type: string - request: - description: |- - Request is the name chosen for a request in the referenced claim. - If empty, everything from the claim is made available, otherwise - only the result of this request. - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - limits: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: |- - Limits describes the maximum amount of compute resources allowed. - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - type: object - requests: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: |- - Requests describes the minimum amount of compute resources required. - If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, - otherwise to an implementation-defined value. Requests cannot exceed Limits. - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - type: object - type: object - restartPolicy: - description: |- - Restart policy for the container to manage the restart behavior of each - container within a pod. - This may only be set for init containers. You cannot set this field on - ephemeral containers. - type: string - securityContext: - description: |- - Optional: SecurityContext defines the security options the ephemeral container should be run with. - If set, the fields of SecurityContext override the equivalent fields of PodSecurityContext. - properties: - allowPrivilegeEscalation: - description: |- - AllowPrivilegeEscalation controls whether a process can gain more - privileges than its parent process. This bool directly controls if - the no_new_privs flag will be set on the container process. - AllowPrivilegeEscalation is true always when the container is: - 1) run as Privileged - 2) has CAP_SYS_ADMIN - Note that this field cannot be set when spec.os.name is windows. - type: boolean - appArmorProfile: - description: |- - appArmorProfile is the AppArmor options to use by this container. If set, this profile - overrides the pod's appArmorProfile. - Note that this field cannot be set when spec.os.name is windows. - properties: - localhostProfile: - description: |- - localhostProfile indicates a profile loaded on the node that should be used. - The profile must be preconfigured on the node to work. - Must match the loaded name of the profile. - Must be set if and only if type is "Localhost". - type: string - type: - description: |- - type indicates which kind of AppArmor profile will be applied. - Valid options are: - Localhost - a profile pre-loaded on the node. - RuntimeDefault - the container runtime's default profile. - Unconfined - no AppArmor enforcement. - type: string - required: - - type - type: object - capabilities: - description: |- - The capabilities to add/drop when running containers. - Defaults to the default set of capabilities granted by the container runtime. - Note that this field cannot be set when spec.os.name is windows. - properties: - add: - description: Added capabilities - items: - description: Capability represent POSIX - capabilities type - type: string - type: array - x-kubernetes-list-type: atomic - drop: - description: Removed capabilities - items: - description: Capability represent POSIX - capabilities type - type: string - type: array - x-kubernetes-list-type: atomic - type: object - privileged: - description: |- - Run container in privileged mode. - Processes in privileged containers are essentially equivalent to root on the host. - Defaults to false. - Note that this field cannot be set when spec.os.name is windows. - type: boolean - procMount: - description: |- - procMount denotes the type of proc mount to use for the containers. - The default value is Default which uses the container runtime defaults for - readonly paths and masked paths. - This requires the ProcMountType feature flag to be enabled. - Note that this field cannot be set when spec.os.name is windows. - type: string - readOnlyRootFilesystem: - description: |- - Whether this container has a read-only root filesystem. - Default is false. - Note that this field cannot be set when spec.os.name is windows. - type: boolean - runAsGroup: - description: |- - The GID to run the entrypoint of the container process. - Uses runtime default if unset. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is windows. - format: int64 - type: integer - runAsNonRoot: - description: |- - Indicates that the container must run as a non-root user. - If true, the Kubelet will validate the image at runtime to ensure that it - does not run as UID 0 (root) and fail to start the container if it does. - If unset or false, no such validation will be performed. - May also be set in PodSecurityContext. - type: boolean - runAsUser: - description: |- - The UID to run the entrypoint of the container process. - Defaults to user specified in image metadata if unspecified. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is windows. - format: int64 - type: integer - seLinuxOptions: - description: |- - The SELinux context to be applied to the container. - If unspecified, the container runtime will allocate a random SELinux context for each - container. May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is windows. - properties: - level: - description: Level is SELinux level label - that applies to the container. - type: string - role: - description: Role is a SELinux role label - that applies to the container. - type: string - type: - description: Type is a SELinux type label - that applies to the container. - type: string - user: - description: User is a SELinux user label - that applies to the container. - type: string - type: object - seccompProfile: - description: |- - The seccomp options to use by this container. If seccomp options are - provided at both the pod & container level, the container options - override the pod options. - Note that this field cannot be set when spec.os.name is windows. - properties: - localhostProfile: - description: |- - localhostProfile indicates a profile defined in a file on the node should be used. - The profile must be preconfigured on the node to work. - Must be a descending path, relative to the kubelet's configured seccomp profile location. - Must be set if type is "Localhost". Must NOT be set for any other type. - type: string - type: - description: |- - type indicates which kind of seccomp profile will be applied. - Valid options are: - - Localhost - a profile defined in a file on the node should be used. - RuntimeDefault - the container runtime default profile should be used. - Unconfined - no profile should be applied. - type: string - required: - - type - type: object - windowsOptions: - description: |- - The Windows specific settings applied to all containers. - If unspecified, the options from the PodSecurityContext will be used. - If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is linux. - properties: - gmsaCredentialSpec: - description: |- - GMSACredentialSpec is where the GMSA admission webhook - (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the - GMSA credential spec named by the GMSACredentialSpecName field. - type: string - gmsaCredentialSpecName: - description: GMSACredentialSpecName is - the name of the GMSA credential spec - to use. - type: string - hostProcess: - description: |- - HostProcess determines if a container should be run as a 'Host Process' container. - All of a Pod's containers must have the same effective HostProcess value - (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers). - In addition, if HostProcess is true then HostNetwork must also be set to true. - type: boolean - runAsUserName: - description: |- - The UserName in Windows to run the entrypoint of the container process. - Defaults to the user specified in image metadata if unspecified. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - type: string - type: object - type: object - startupProbe: - description: Probes are not allowed for ephemeral - containers. - properties: - exec: - description: Exec specifies the action to - take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - failureThreshold: - description: |- - Minimum consecutive failures for the probe to be considered failed after having succeeded. - Defaults to 3. Minimum value is 1. - format: int32 - type: integer - grpc: - description: GRPC specifies an action involving - a GRPC port. - properties: - port: - description: Port number of the gRPC service. - Number must be in the range 1 to 65535. - format: int32 - type: integer - service: - default: "" - description: |- - Service is the name of the service to place in the gRPC HealthCheckRequest - (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - - If this is not specified, the default behavior is defined by gRPC. - type: string - required: - - port - type: object - httpGet: - description: HTTPGet specifies the http request - to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set in - the request. HTTP allows repeated headers. - items: - description: HTTPHeader describes a - custom header to be used in HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the HTTP - server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - initialDelaySeconds: - description: |- - Number of seconds after the container has started before liveness probes are initiated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - periodSeconds: - description: |- - How often (in seconds) to perform the probe. - Default to 10 seconds. Minimum value is 1. - format: int32 - type: integer - successThreshold: - description: |- - Minimum consecutive successes for the probe to be considered successful after having failed. - Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. - format: int32 - type: integer - tcpSocket: - description: TCPSocket specifies an action - involving a TCP port. - properties: - host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - description: |- - Optional duration in seconds the pod needs to terminate gracefully upon probe failure. - The grace period is the duration in seconds after the processes running in the pod are sent - a termination signal and the time when the processes are forcibly halted with a kill signal. - Set this value longer than the expected cleanup time for your process. - format: int64 - type: integer - timeoutSeconds: - description: |- - Number of seconds after which the probe times out. - Defaults to 1 second. Minimum value is 1. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - type: object - stdin: - description: |- - Whether this container should allocate a buffer for stdin in the container runtime. If this - is not set, reads from stdin in the container will always result in EOF. - Default is false. - type: boolean - stdinOnce: - description: |- - Whether the container runtime should close the stdin channel after it has been opened by - a single attach. When stdin is true the stdin stream will remain open across multiple attach - sessions. - type: boolean - targetContainerName: - description: |- - If set, the name of the container from PodSpec that this ephemeral container targets. - The ephemeral container will be run in the namespaces (IPC, PID, etc) of this container. - If not set then the ephemeral container uses the namespaces configured in the Pod spec. - - The container runtime must implement support for this feature. - type: string - terminationMessagePath: - description: |- - Optional: Path at which the file to which the container's termination message - will be written is mounted into the container's filesystem. - Message written is intended to be brief final status, such as an assertion failure message. - Will be truncated by the node if greater than 4096 bytes. The total message length across - all containers will be limited to 12kb. - Defaults to /dev/termination-log. - type: string - terminationMessagePolicy: - description: |- - Indicate how the termination message should be populated. File will use the contents of - terminationMessagePath to populate the container status message on both success and failure. - FallbackToLogsOnError will use the last chunk of container log output if the termination - message file is empty and the container exited with an error. - type: string - tty: - description: |- - Whether this container should allocate a TTY for itself, also requires 'stdin' to be true. - Default is false. - type: boolean - volumeDevices: - description: volumeDevices is the list of block - devices to be used by the container. - items: - description: volumeDevice describes a mapping - of a raw block device within a container. - properties: - devicePath: - description: devicePath is the path inside - of the container that the device will - be mapped to. - type: string - name: - description: name must match the name of - a persistentVolumeClaim in the pod - type: string - required: - - devicePath - - name - type: object - type: array - x-kubernetes-list-map-keys: - - devicePath - x-kubernetes-list-type: map - volumeMounts: - description: |- - Pod volumes to mount into the container's filesystem. Subpath mounts are not allowed for ephemeral containers. - Cannot be updated. - items: - description: VolumeMount describes a mounting - of a Volume within a container. - properties: - mountPath: - description: |- - Path within the container at which the volume should be mounted. Must - not contain ':'. - type: string - mountPropagation: - description: |- - mountPropagation determines how mounts are propagated from the host - to container and the other way around. - When not set, MountPropagationNone is used. - This field is beta in 1.10. - When RecursiveReadOnly is set to IfPossible or to Enabled, MountPropagation must be None or unspecified - (which defaults to None). - type: string - name: - description: This must match the Name of - a Volume. - type: string - readOnly: - description: |- - Mounted read-only if true, read-write otherwise (false or unspecified). - Defaults to false. - type: boolean - recursiveReadOnly: - description: |- - RecursiveReadOnly specifies whether read-only mounts should be handled - recursively. - - If ReadOnly is false, this field has no meaning and must be unspecified. - - If ReadOnly is true, and this field is set to Disabled, the mount is not made - recursively read-only. If this field is set to IfPossible, the mount is made - recursively read-only, if it is supported by the container runtime. - type: string - subPath: - description: |- - Path within the volume from which the container's volume should be mounted. - Defaults to "" (volume's root). - type: string - subPathExpr: - description: |- - Expanded path within the volume from which the container's volume should be mounted. - Behaves similarly to SubPath but environment variable references $(VAR_NAME) are expanded using the container's environment. - Defaults to "" (volume's root). - SubPathExpr and SubPath are mutually exclusive. - type: string - required: - - mountPath - - name - type: object - type: array - x-kubernetes-list-map-keys: - - mountPath - x-kubernetes-list-type: map - workingDir: - description: |- - Container's working directory. - If not specified, the container runtime's default will be used, which - might be configured in the container image. - Cannot be updated. - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - hostAliases: - description: |- - HostAliases is an optional list of hosts and IPs that will be injected into the pod's hosts - file if specified. - items: - description: |- - HostAlias holds the mapping between IP and hostnames that will be injected as an entry in the - pod's hosts file. - properties: - hostnames: - description: Hostnames for the above IP address. - items: - type: string - type: array - x-kubernetes-list-type: atomic - ip: - description: IP address of the host file entry. - type: string - required: - - ip - type: object - type: array - x-kubernetes-list-map-keys: - - ip - x-kubernetes-list-type: map - hostIPC: - description: |- - Use the host's ipc namespace. - Optional: Default to false. - type: boolean - hostNetwork: - description: |- - Host networking requested for this pod. Use the host's network namespace. - If this option is set, the ports that will be used must be specified. - Default to false. - type: boolean - hostPID: - description: |- - Use the host's pid namespace. - Optional: Default to false. - type: boolean - hostUsers: - description: |- - Use the host's user namespace. - Optional: Default to true. - If set to true or not present, the pod will be run in the host user namespace, useful - for when the pod needs a feature only available to the host user namespace, such as - loading a kernel module with CAP_SYS_MODULE. - When set to false, a new userns is created for the pod. - type: boolean - hostname: - description: |- - Specifies the hostname of the Pod - If not specified, the pod's hostname will be set to a system-defined value. - type: string - imagePullSecrets: - description: |- - ImagePullSecrets is an optional list of references to secrets in the same namespace to use for pulling any of the images used by this PodSpec. - If specified, these secrets will be passed to individual puller implementations for them to use. - More info: https://kubernetes.io/docs/concepts/containers/images#specifying-imagepullsecrets-on-a-pod - items: - description: |- - LocalObjectReference contains enough information to let you locate the - referenced object inside the same namespace. - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - type: object - x-kubernetes-map-type: atomic - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - initContainers: - description: |- - List of initialization containers belonging to the pod. - Init containers are executed in order prior to containers being started. If any - init container fails, the pod is considered to have failed and is handled according - to its restartPolicy. The name for an init container or normal container must be - unique among all containers. - items: - description: A single application container that you - want to run within a pod. - properties: - args: - description: |- - Arguments to the entrypoint. - The container image's CMD is used if this is not provided. - Variable references $(VAR_NAME) are expanded using the container's environment. If a variable - cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced - to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. - items: - type: string - type: array - x-kubernetes-list-type: atomic - command: - description: |- - Entrypoint array. Not executed within a shell. - The container image's ENTRYPOINT is used if this is not provided. - Variable references $(VAR_NAME) are expanded using the container's environment. If a variable - cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced - to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. - items: - type: string - type: array - x-kubernetes-list-type: atomic - env: - description: |- - List of environment variables to set in the container. - Cannot be updated. - items: - description: EnvVar represents an environment - variable present in a Container. - properties: - name: - description: Name of the environment variable. - Must be a C_IDENTIFIER. - type: string - value: - description: |- - Variable references $(VAR_NAME) are expanded - using the previously defined environment variables in the container and - any service environment variables. If a variable cannot be resolved, - the reference in the input string will be unchanged. Double $$ are reduced - to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. - "$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)". - type: string - valueFrom: - description: Source for the environment - variable's value. Cannot be used if value - is not empty. - properties: - configMapKeyRef: - description: Selects a key of a ConfigMap. - properties: - key: - description: The key to select. - type: string - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the - ConfigMap or its key must be defined - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - fieldRef: - description: |- - Selects a field of the pod: supports metadata.name, metadata.namespace, `metadata.labels['']`, `metadata.annotations['']`, - spec.nodeName, spec.serviceAccountName, status.hostIP, status.podIP, status.podIPs. - properties: - apiVersion: - description: Version of the schema - the FieldPath is written in terms - of, defaults to "v1". - type: string - fieldPath: - description: Path of the field to - select in the specified API version. - type: string - required: - - fieldPath - type: object - x-kubernetes-map-type: atomic - resourceFieldRef: - description: |- - Selects a resource of the container: only resources limits and requests - (limits.cpu, limits.memory, limits.ephemeral-storage, requests.cpu, requests.memory and requests.ephemeral-storage) are currently supported. - properties: - containerName: - description: 'Container name: required - for volumes, optional for env - vars' - type: string - divisor: - anyOf: - - type: integer - - type: string - description: Specifies the output - format of the exposed resources, - defaults to "1" - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - resource: - description: 'Required: resource - to select' - type: string - required: - - resource - type: object - x-kubernetes-map-type: atomic - secretKeyRef: - description: Selects a key of a secret - in the pod's namespace - properties: - key: - description: The key of the secret - to select from. Must be a valid - secret key. - type: string - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the - Secret or its key must be defined - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - type: object - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - envFrom: - description: |- - List of sources to populate environment variables in the container. - The keys defined within a source must be a C_IDENTIFIER. All invalid keys - will be reported as an event when the container is starting. When a key exists in multiple - sources, the value associated with the last source will take precedence. - Values defined by an Env with a duplicate key will take precedence. - Cannot be updated. - items: - description: EnvFromSource represents the source - of a set of ConfigMaps - properties: - configMapRef: - description: The ConfigMap to select from - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the ConfigMap - must be defined - type: boolean - type: object - x-kubernetes-map-type: atomic - prefix: - description: An optional identifier to prepend - to each key in the ConfigMap. Must be - a C_IDENTIFIER. - type: string - secretRef: - description: The Secret to select from - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the Secret - must be defined - type: boolean - type: object - x-kubernetes-map-type: atomic - type: object - type: array - x-kubernetes-list-type: atomic - image: - description: |- - Container image name. - More info: https://kubernetes.io/docs/concepts/containers/images - This field is optional to allow higher level config management to default or override - container images in workload controllers like Deployments and StatefulSets. - type: string - imagePullPolicy: - description: |- - Image pull policy. - One of Always, Never, IfNotPresent. - Defaults to Always if :latest tag is specified, or IfNotPresent otherwise. - Cannot be updated. - More info: https://kubernetes.io/docs/concepts/containers/images#updating-images - type: string - lifecycle: - description: |- - Actions that the management system should take in response to container lifecycle events. - Cannot be updated. - properties: - postStart: - description: |- - PostStart is called immediately after a container is created. If the handler fails, - the container is terminated and restarted according to its restart policy. - Other management of the container blocks until the hook completes. - More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks - properties: - exec: - description: Exec specifies the action - to take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - httpGet: - description: HTTPGet specifies the http - request to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set - in the request. HTTP allows repeated - headers. - items: - description: HTTPHeader describes - a custom header to be used in - HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field - value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the - HTTP server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - sleep: - description: Sleep represents the duration - that the container should sleep before - being terminated. - properties: - seconds: - description: Seconds is the number - of seconds to sleep. - format: int64 - type: integer - required: - - seconds - type: object - tcpSocket: - description: |- - Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept - for the backward compatibility. There are no validation of this field and - lifecycle hooks will fail in runtime when tcp handler is specified. - properties: - host: - description: 'Optional: Host name - to connect to, defaults to the pod - IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - type: object - preStop: - description: |- - PreStop is called immediately before a container is terminated due to an - API request or management event such as liveness/startup probe failure, - preemption, resource contention, etc. The handler is not called if the - container crashes or exits. The Pod's termination grace period countdown begins before the - PreStop hook is executed. - properties: - exec: - description: Exec specifies the action - to take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - httpGet: - description: HTTPGet specifies the http - request to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set - in the request. HTTP allows repeated - headers. - items: - description: HTTPHeader describes - a custom header to be used in - HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field - value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the - HTTP server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - sleep: - description: Sleep represents the duration - that the container should sleep before - being terminated. - properties: - seconds: - description: Seconds is the number - of seconds to sleep. - format: int64 - type: integer - required: - - seconds - type: object - tcpSocket: - description: |- - Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept - for the backward compatibility. There are no validation of this field and - lifecycle hooks will fail in runtime when tcp handler is specified. - properties: - host: - description: 'Optional: Host name - to connect to, defaults to the pod - IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - type: object - type: object - livenessProbe: - description: |- - Periodic probe of container liveness. - Container will be restarted if the probe fails. - Cannot be updated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - properties: - exec: - description: Exec specifies the action to - take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - failureThreshold: - description: |- - Minimum consecutive failures for the probe to be considered failed after having succeeded. - Defaults to 3. Minimum value is 1. - format: int32 - type: integer - grpc: - description: GRPC specifies an action involving - a GRPC port. - properties: - port: - description: Port number of the gRPC service. - Number must be in the range 1 to 65535. - format: int32 - type: integer - service: - default: "" - description: |- - Service is the name of the service to place in the gRPC HealthCheckRequest - (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - - If this is not specified, the default behavior is defined by gRPC. - type: string - required: - - port - type: object - httpGet: - description: HTTPGet specifies the http request - to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set in - the request. HTTP allows repeated headers. - items: - description: HTTPHeader describes a - custom header to be used in HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the HTTP - server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - initialDelaySeconds: - description: |- - Number of seconds after the container has started before liveness probes are initiated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - periodSeconds: - description: |- - How often (in seconds) to perform the probe. - Default to 10 seconds. Minimum value is 1. - format: int32 - type: integer - successThreshold: - description: |- - Minimum consecutive successes for the probe to be considered successful after having failed. - Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. - format: int32 - type: integer - tcpSocket: - description: TCPSocket specifies an action - involving a TCP port. - properties: - host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - description: |- - Optional duration in seconds the pod needs to terminate gracefully upon probe failure. - The grace period is the duration in seconds after the processes running in the pod are sent - a termination signal and the time when the processes are forcibly halted with a kill signal. - Set this value longer than the expected cleanup time for your process. - format: int64 - type: integer - timeoutSeconds: - description: |- - Number of seconds after which the probe times out. - Defaults to 1 second. Minimum value is 1. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - type: object - name: - description: |- - Name of the container specified as a DNS_LABEL. - Each container in a pod must have a unique name (DNS_LABEL). - Cannot be updated. - type: string - ports: - description: |- - List of ports to expose from the container. Not specifying a port here - DOES NOT prevent that port from being exposed. Any port which is - listening on the default "0.0.0.0" address inside a container will be - accessible from the network. - Modifying this array with strategic merge patch may corrupt the data. - For more information See https://github.com/kubernetes/kubernetes/issues/108255. - items: - description: ContainerPort represents a network - port in a single container. - properties: - containerPort: - description: |- - Number of port to expose on the pod's IP address. - This must be a valid port number, 0 < x < 65536. - format: int32 - type: integer - hostIP: - description: What host IP to bind the external - port to. - type: string - hostPort: - description: |- - Number of port to expose on the host. - If specified, this must be a valid port number, 0 < x < 65536. - If HostNetwork is specified, this must match ContainerPort. - Most containers do not need this. - format: int32 - type: integer - name: - description: |- - If specified, this must be an IANA_SVC_NAME and unique within the pod. Each - named port in a pod must have a unique name. Name for the port that can be - referred to by services. - type: string - protocol: - default: TCP - description: |- - Protocol for port. Must be UDP, TCP, or SCTP. - Defaults to "TCP". - type: string - required: - - containerPort - type: object - type: array - x-kubernetes-list-map-keys: - - containerPort - - protocol - x-kubernetes-list-type: map - readinessProbe: - description: |- - Periodic probe of container service readiness. - Container will be removed from service endpoints if the probe fails. - Cannot be updated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - properties: - exec: - description: Exec specifies the action to - take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - failureThreshold: - description: |- - Minimum consecutive failures for the probe to be considered failed after having succeeded. - Defaults to 3. Minimum value is 1. - format: int32 - type: integer - grpc: - description: GRPC specifies an action involving - a GRPC port. - properties: - port: - description: Port number of the gRPC service. - Number must be in the range 1 to 65535. - format: int32 - type: integer - service: - default: "" - description: |- - Service is the name of the service to place in the gRPC HealthCheckRequest - (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - - If this is not specified, the default behavior is defined by gRPC. - type: string - required: - - port - type: object - httpGet: - description: HTTPGet specifies the http request - to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set in - the request. HTTP allows repeated headers. - items: - description: HTTPHeader describes a - custom header to be used in HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the HTTP - server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - initialDelaySeconds: - description: |- - Number of seconds after the container has started before liveness probes are initiated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - periodSeconds: - description: |- - How often (in seconds) to perform the probe. - Default to 10 seconds. Minimum value is 1. - format: int32 - type: integer - successThreshold: - description: |- - Minimum consecutive successes for the probe to be considered successful after having failed. - Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. - format: int32 - type: integer - tcpSocket: - description: TCPSocket specifies an action - involving a TCP port. - properties: - host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - description: |- - Optional duration in seconds the pod needs to terminate gracefully upon probe failure. - The grace period is the duration in seconds after the processes running in the pod are sent - a termination signal and the time when the processes are forcibly halted with a kill signal. - Set this value longer than the expected cleanup time for your process. - format: int64 - type: integer - timeoutSeconds: - description: |- - Number of seconds after which the probe times out. - Defaults to 1 second. Minimum value is 1. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - type: object - resizePolicy: - description: Resources resize policy for the container. - items: - description: ContainerResizePolicy represents - resource resize policy for the container. - properties: - resourceName: - description: |- - Name of the resource to which this resource resize policy applies. - Supported values: cpu, memory. - type: string - restartPolicy: - description: |- - Restart policy to apply when specified resource is resized. - If not specified, it defaults to NotRequired. - type: string - required: - - resourceName - - restartPolicy - type: object - type: array - x-kubernetes-list-type: atomic - resources: - description: |- - Compute Resources required by this container. - Cannot be updated. - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - properties: - claims: - description: |- - Claims lists the names of resources, defined in spec.resourceClaims, - that are used by this container. - - This is an alpha field and requires enabling the - DynamicResourceAllocation feature gate. - - This field is immutable. It can only be set for containers. - items: - description: ResourceClaim references one - entry in PodSpec.ResourceClaims. - properties: - name: - description: |- - Name must match the name of one entry in pod.spec.resourceClaims of - the Pod where this field is used. It makes that resource available - inside a container. - type: string - request: - description: |- - Request is the name chosen for a request in the referenced claim. - If empty, everything from the claim is made available, otherwise - only the result of this request. - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - limits: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: |- - Limits describes the maximum amount of compute resources allowed. - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - type: object - requests: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: |- - Requests describes the minimum amount of compute resources required. - If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, - otherwise to an implementation-defined value. Requests cannot exceed Limits. - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - type: object - type: object - restartPolicy: - description: |- - RestartPolicy defines the restart behavior of individual containers in a pod. - This field may only be set for init containers, and the only allowed value is "Always". - For non-init containers or when this field is not specified, - the restart behavior is defined by the Pod's restart policy and the container type. - type: string - securityContext: - description: |- - SecurityContext defines the security options the container should be run with. - If set, the fields of SecurityContext override the equivalent fields of PodSecurityContext. - More info: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/ - properties: - allowPrivilegeEscalation: - description: |- - AllowPrivilegeEscalation controls whether a process can gain more - privileges than its parent process. This bool directly controls if - the no_new_privs flag will be set on the container process. - AllowPrivilegeEscalation is true always when the container is: - 1) run as Privileged - 2) has CAP_SYS_ADMIN - Note that this field cannot be set when spec.os.name is windows. - type: boolean - appArmorProfile: - description: |- - appArmorProfile is the AppArmor options to use by this container. If set, this profile - overrides the pod's appArmorProfile. - Note that this field cannot be set when spec.os.name is windows. - properties: - localhostProfile: - description: |- - localhostProfile indicates a profile loaded on the node that should be used. - The profile must be preconfigured on the node to work. - Must match the loaded name of the profile. - Must be set if and only if type is "Localhost". - type: string - type: - description: |- - type indicates which kind of AppArmor profile will be applied. - Valid options are: - Localhost - a profile pre-loaded on the node. - RuntimeDefault - the container runtime's default profile. - Unconfined - no AppArmor enforcement. - type: string - required: - - type - type: object - capabilities: - description: |- - The capabilities to add/drop when running containers. - Defaults to the default set of capabilities granted by the container runtime. - Note that this field cannot be set when spec.os.name is windows. - properties: - add: - description: Added capabilities - items: - description: Capability represent POSIX - capabilities type - type: string - type: array - x-kubernetes-list-type: atomic - drop: - description: Removed capabilities - items: - description: Capability represent POSIX - capabilities type - type: string - type: array - x-kubernetes-list-type: atomic - type: object - privileged: - description: |- - Run container in privileged mode. - Processes in privileged containers are essentially equivalent to root on the host. - Defaults to false. - Note that this field cannot be set when spec.os.name is windows. - type: boolean - procMount: - description: |- - procMount denotes the type of proc mount to use for the containers. - The default value is Default which uses the container runtime defaults for - readonly paths and masked paths. - This requires the ProcMountType feature flag to be enabled. - Note that this field cannot be set when spec.os.name is windows. - type: string - readOnlyRootFilesystem: - description: |- - Whether this container has a read-only root filesystem. - Default is false. - Note that this field cannot be set when spec.os.name is windows. - type: boolean - runAsGroup: - description: |- - The GID to run the entrypoint of the container process. - Uses runtime default if unset. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is windows. - format: int64 - type: integer - runAsNonRoot: - description: |- - Indicates that the container must run as a non-root user. - If true, the Kubelet will validate the image at runtime to ensure that it - does not run as UID 0 (root) and fail to start the container if it does. - If unset or false, no such validation will be performed. - May also be set in PodSecurityContext. - type: boolean - runAsUser: - description: |- - The UID to run the entrypoint of the container process. - Defaults to user specified in image metadata if unspecified. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is windows. - format: int64 - type: integer - seLinuxOptions: - description: |- - The SELinux context to be applied to the container. - If unspecified, the container runtime will allocate a random SELinux context for each - container. May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is windows. - properties: - level: - description: Level is SELinux level label - that applies to the container. - type: string - role: - description: Role is a SELinux role label - that applies to the container. - type: string - type: - description: Type is a SELinux type label - that applies to the container. - type: string - user: - description: User is a SELinux user label - that applies to the container. - type: string - type: object - seccompProfile: - description: |- - The seccomp options to use by this container. If seccomp options are - provided at both the pod & container level, the container options - override the pod options. - Note that this field cannot be set when spec.os.name is windows. - properties: - localhostProfile: - description: |- - localhostProfile indicates a profile defined in a file on the node should be used. - The profile must be preconfigured on the node to work. - Must be a descending path, relative to the kubelet's configured seccomp profile location. - Must be set if type is "Localhost". Must NOT be set for any other type. - type: string - type: - description: |- - type indicates which kind of seccomp profile will be applied. - Valid options are: - - Localhost - a profile defined in a file on the node should be used. - RuntimeDefault - the container runtime default profile should be used. - Unconfined - no profile should be applied. - type: string - required: - - type - type: object - windowsOptions: - description: |- - The Windows specific settings applied to all containers. - If unspecified, the options from the PodSecurityContext will be used. - If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is linux. - properties: - gmsaCredentialSpec: - description: |- - GMSACredentialSpec is where the GMSA admission webhook - (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the - GMSA credential spec named by the GMSACredentialSpecName field. - type: string - gmsaCredentialSpecName: - description: GMSACredentialSpecName is - the name of the GMSA credential spec - to use. - type: string - hostProcess: - description: |- - HostProcess determines if a container should be run as a 'Host Process' container. - All of a Pod's containers must have the same effective HostProcess value - (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers). - In addition, if HostProcess is true then HostNetwork must also be set to true. - type: boolean - runAsUserName: - description: |- - The UserName in Windows to run the entrypoint of the container process. - Defaults to the user specified in image metadata if unspecified. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - type: string - type: object - type: object - startupProbe: - description: |- - StartupProbe indicates that the Pod has successfully initialized. - If specified, no other probes are executed until this completes successfully. - If this probe fails, the Pod will be restarted, just as if the livenessProbe failed. - properties: - exec: - description: Exec specifies the action to - take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - failureThreshold: - description: |- - Minimum consecutive failures for the probe to be considered failed after having succeeded. - Defaults to 3. Minimum value is 1. - format: int32 - type: integer - grpc: - description: GRPC specifies an action involving - a GRPC port. - properties: - port: - description: Port number of the gRPC service. - Number must be in the range 1 to 65535. - format: int32 - type: integer - service: - default: "" - description: |- - Service is the name of the service to place in the gRPC HealthCheckRequest - (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - - If this is not specified, the default behavior is defined by gRPC. - type: string - required: - - port - type: object - httpGet: - description: HTTPGet specifies the http request - to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set in - the request. HTTP allows repeated headers. - items: - description: HTTPHeader describes a - custom header to be used in HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the HTTP - server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - initialDelaySeconds: - description: |- - Number of seconds after the container has started before liveness probes are initiated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - periodSeconds: - description: |- - How often (in seconds) to perform the probe. - Default to 10 seconds. Minimum value is 1. - format: int32 - type: integer - successThreshold: - description: |- - Minimum consecutive successes for the probe to be considered successful after having failed. - Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. - format: int32 - type: integer - tcpSocket: - description: TCPSocket specifies an action - involving a TCP port. - properties: - host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - description: |- - Optional duration in seconds the pod needs to terminate gracefully upon probe failure. - The grace period is the duration in seconds after the processes running in the pod are sent - a termination signal and the time when the processes are forcibly halted with a kill signal. - Set this value longer than the expected cleanup time for your process. - format: int64 - type: integer - timeoutSeconds: - description: |- - Number of seconds after which the probe times out. - Defaults to 1 second. Minimum value is 1. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - type: object - stdin: - description: |- - Whether this container should allocate a buffer for stdin in the container runtime. If this - is not set, reads from stdin in the container will always result in EOF. - Default is false. - type: boolean - stdinOnce: - description: |- - Whether the container runtime should close the stdin channel after it has been opened by - a single attach. When stdin is true the stdin stream will remain open across multiple attach - sessions. - type: boolean - terminationMessagePath: - description: |- - Optional: Path at which the file to which the container's termination message - will be written is mounted into the container's filesystem. - Message written is intended to be brief final status, such as an assertion failure message. - Will be truncated by the node if greater than 4096 bytes. The total message length across - all containers will be limited to 12kb. - Defaults to /dev/termination-log. - type: string - terminationMessagePolicy: - description: |- - Indicate how the termination message should be populated. File will use the contents of - terminationMessagePath to populate the container status message on both success and failure. - FallbackToLogsOnError will use the last chunk of container log output if the termination - message file is empty and the container exited with an error. - type: string - tty: - description: |- - Whether this container should allocate a TTY for itself, also requires 'stdin' to be true. - Default is false. - type: boolean - volumeDevices: - description: volumeDevices is the list of block - devices to be used by the container. - items: - description: volumeDevice describes a mapping - of a raw block device within a container. - properties: - devicePath: - description: devicePath is the path inside - of the container that the device will - be mapped to. - type: string - name: - description: name must match the name of - a persistentVolumeClaim in the pod - type: string - required: - - devicePath - - name - type: object - type: array - x-kubernetes-list-map-keys: - - devicePath - x-kubernetes-list-type: map - volumeMounts: - description: |- - Pod volumes to mount into the container's filesystem. - Cannot be updated. - items: - description: VolumeMount describes a mounting - of a Volume within a container. - properties: - mountPath: - description: |- - Path within the container at which the volume should be mounted. Must - not contain ':'. - type: string - mountPropagation: - description: |- - mountPropagation determines how mounts are propagated from the host - to container and the other way around. - When not set, MountPropagationNone is used. - This field is beta in 1.10. - When RecursiveReadOnly is set to IfPossible or to Enabled, MountPropagation must be None or unspecified - (which defaults to None). - type: string - name: - description: This must match the Name of - a Volume. - type: string - readOnly: - description: |- - Mounted read-only if true, read-write otherwise (false or unspecified). - Defaults to false. - type: boolean - recursiveReadOnly: - description: |- - RecursiveReadOnly specifies whether read-only mounts should be handled - recursively. - - If ReadOnly is false, this field has no meaning and must be unspecified. - - If ReadOnly is true, and this field is set to Disabled, the mount is not made - recursively read-only. If this field is set to IfPossible, the mount is made - recursively read-only, if it is supported by the container runtime. - type: string - subPath: - description: |- - Path within the volume from which the container's volume should be mounted. - Defaults to "" (volume's root). - type: string - subPathExpr: - description: |- - Expanded path within the volume from which the container's volume should be mounted. - Behaves similarly to SubPath but environment variable references $(VAR_NAME) are expanded using the container's environment. - Defaults to "" (volume's root). - SubPathExpr and SubPath are mutually exclusive. - type: string - required: - - mountPath - - name - type: object - type: array - x-kubernetes-list-map-keys: - - mountPath - x-kubernetes-list-type: map - workingDir: - description: |- - Container's working directory. - If not specified, the container runtime's default will be used, which - might be configured in the container image. - Cannot be updated. - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - nodeName: - description: |- - NodeName indicates in which node this pod is scheduled. - If empty, this pod is a candidate for scheduling by the scheduler defined in schedulerName. - Once this field is set, the kubelet for this node becomes responsible for the lifecycle of this pod. - This field should not be used to express a desire for the pod to be scheduled on a specific node. - https://kubernetes. - type: string - nodeSelector: - additionalProperties: - type: string - description: |- - NodeSelector is a selector which must be true for the pod to fit on a node. - Selector which must match a node's labels for the pod to be scheduled on that node. - More info: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/ - type: object - x-kubernetes-map-type: atomic - os: - description: |- - Specifies the OS of the containers in the pod. - Some pod and container fields are restricted if this is set. - - If the OS field is set to linux, the following fields must be unset: - -securityContext.windowsOptions - - If the OS field is set to windows, following fields must be unset: - - spec.hostPID - - spec.hostIPC - - spec.hostUsers - - spec.securityContext.appArmorProfile - - spec.securityContext. - properties: - name: - description: |- - Name is the name of the operating system. The currently supported values are linux and windows. - Additional value may be defined in future and can be one of: - https://github.com/opencontainers/runtime-spec/blob/master/config.md#platform-specific-configuration - Clients should expect to handle additional values and treat unrecognized values in this field as os: null - type: string - required: - - name - type: object - overhead: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: |- - Overhead represents the resource overhead associated with running a pod for a given RuntimeClass. - This field will be autopopulated at admission time by the RuntimeClass admission controller. If - the RuntimeClass admission controller is enabled, overhead must not be set in Pod create requests. - The RuntimeClass admission controller will reject Pod create requests which have the overhead already - set. - type: object - preemptionPolicy: - description: |- - PreemptionPolicy is the Policy for preempting pods with lower priority. - One of Never, PreemptLowerPriority. - Defaults to PreemptLowerPriority if unset. - type: string - priority: - description: |- - The priority value. Various system components use this field to find the - priority of the pod. When Priority Admission Controller is enabled, it - prevents users from setting this field. The admission controller populates - this field from PriorityClassName. - The higher the value, the higher the priority. - format: int32 - type: integer - priorityClassName: - description: |- - If specified, indicates the pod's priority. "system-node-critical" and - "system-cluster-critical" are two special keywords which indicate the - highest priorities with the former being the highest priority. Any other - name must be defined by creating a PriorityClass object with that name. - If not specified, the pod priority will be default or zero if there is no - default. - type: string - readinessGates: - description: |- - If specified, all readiness gates will be evaluated for pod readiness. - A pod is ready when all its containers are ready AND - all conditions specified in the readiness gates have status equal to "True" - More info: https://git.k8s.io/enhancements/keps/sig-network/580-pod-readiness-gates - items: - description: PodReadinessGate contains the reference - to a pod condition - properties: - conditionType: - description: ConditionType refers to a condition - in the pod's condition list with matching type. - type: string - required: - - conditionType - type: object - type: array - x-kubernetes-list-type: atomic - resourceClaims: - description: |- - ResourceClaims defines which ResourceClaims must be allocated - and reserved before the Pod is allowed to start. The resources - will be made available to those containers which consume them - by name. - - This is an alpha field and requires enabling the - DynamicResourceAllocation feature gate. - - This field is immutable. - items: - description: |- - PodResourceClaim references exactly one ResourceClaim, either directly - or by naming a ResourceClaimTemplate which is then turned into a ResourceClaim - for the pod. - - It adds a name to it that uniquely identifies the ResourceClaim inside the Pod. - Containers that need access to the ResourceClaim reference it with this name. - properties: - name: - description: |- - Name uniquely identifies this resource claim inside the pod. - This must be a DNS_LABEL. - type: string - resourceClaimName: - description: |- - ResourceClaimName is the name of a ResourceClaim object in the same - namespace as this pod. - - Exactly one of ResourceClaimName and ResourceClaimTemplateName must - be set. - type: string - resourceClaimTemplateName: - description: |- - ResourceClaimTemplateName is the name of a ResourceClaimTemplate - object in the same namespace as this pod. - - The template will be used to create a new ResourceClaim, which will - be bound to this pod. When this pod is deleted, the ResourceClaim - will also be deleted. - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - restartPolicy: - description: |- - Restart policy for all containers within the pod. - One of Always, OnFailure, Never. In some contexts, only a subset of those values may be permitted. - Default to Always. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#restart-policy - type: string - runtimeClassName: - description: |- - RuntimeClassName refers to a RuntimeClass object in the node.k8s.io group, which should be used - to run this pod. If no RuntimeClass resource matches the named class, the pod will not be run. - If unset or empty, the "legacy" RuntimeClass will be used, which is an implicit class with an - empty definition that uses the default runtime handler. - More info: https://git.k8s. - type: string - schedulerName: - description: |- - If specified, the pod will be dispatched by specified scheduler. - If not specified, the pod will be dispatched by default scheduler. - type: string - schedulingGates: - description: |- - SchedulingGates is an opaque list of values that if specified will block scheduling the pod. - If schedulingGates is not empty, the pod will stay in the SchedulingGated state and the - scheduler will not attempt to schedule the pod. - - SchedulingGates can only be set at pod creation time, and be removed only afterwards. - items: - description: PodSchedulingGate is associated to a - Pod to guard its scheduling. - properties: - name: - description: |- - Name of the scheduling gate. - Each scheduling gate must have a unique name field. - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - securityContext: - description: |- - SecurityContext holds pod-level security attributes and common container settings. - Optional: Defaults to empty. See type description for default values of each field. - properties: - appArmorProfile: - description: |- - appArmorProfile is the AppArmor options to use by the containers in this pod. - Note that this field cannot be set when spec.os.name is windows. - properties: - localhostProfile: - description: |- - localhostProfile indicates a profile loaded on the node that should be used. - The profile must be preconfigured on the node to work. - Must match the loaded name of the profile. - Must be set if and only if type is "Localhost". - type: string - type: - description: |- - type indicates which kind of AppArmor profile will be applied. - Valid options are: - Localhost - a profile pre-loaded on the node. - RuntimeDefault - the container runtime's default profile. - Unconfined - no AppArmor enforcement. - type: string - required: - - type - type: object - fsGroup: - description: |- - A special supplemental group that applies to all containers in a pod. - Some volume types allow the Kubelet to change the ownership of that volume - to be owned by the pod: - - 1. The owning GID will be the FSGroup - 2. The setgid bit is set (new files created in the volume will be owned by FSGroup) - 3. - format: int64 - type: integer - fsGroupChangePolicy: - description: |- - fsGroupChangePolicy defines behavior of changing ownership and permission of the volume - before being exposed inside Pod. This field will only apply to - volume types which support fsGroup based ownership(and permissions). - It will have no effect on ephemeral volume types such as: secret, configmaps - and emptydir. - Valid values are "OnRootMismatch" and "Always". If not specified, "Always" is used. - type: string - runAsGroup: - description: |- - The GID to run the entrypoint of the container process. - Uses runtime default if unset. - May also be set in SecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence - for that container. - Note that this field cannot be set when spec.os.name is windows. - format: int64 - type: integer - runAsNonRoot: - description: |- - Indicates that the container must run as a non-root user. - If true, the Kubelet will validate the image at runtime to ensure that it - does not run as UID 0 (root) and fail to start the container if it does. - If unset or false, no such validation will be performed. - May also be set in SecurityContext. - type: boolean - runAsUser: - description: |- - The UID to run the entrypoint of the container process. - Defaults to user specified in image metadata if unspecified. - May also be set in SecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence - for that container. - Note that this field cannot be set when spec.os.name is windows. - format: int64 - type: integer - seLinuxOptions: - description: |- - The SELinux context to be applied to all containers. - If unspecified, the container runtime will allocate a random SELinux context for each - container. May also be set in SecurityContext. If set in - both SecurityContext and PodSecurityContext, the value specified in SecurityContext - takes precedence for that container. - Note that this field cannot be set when spec.os.name is windows. - properties: - level: - description: Level is SELinux level label that - applies to the container. - type: string - role: - description: Role is a SELinux role label that - applies to the container. - type: string - type: - description: Type is a SELinux type label that - applies to the container. - type: string - user: - description: User is a SELinux user label that - applies to the container. - type: string - type: object - seccompProfile: - description: |- - The seccomp options to use by the containers in this pod. - Note that this field cannot be set when spec.os.name is windows. - properties: - localhostProfile: - description: |- - localhostProfile indicates a profile defined in a file on the node should be used. - The profile must be preconfigured on the node to work. - Must be a descending path, relative to the kubelet's configured seccomp profile location. - Must be set if type is "Localhost". Must NOT be set for any other type. - type: string - type: - description: |- - type indicates which kind of seccomp profile will be applied. - Valid options are: - - Localhost - a profile defined in a file on the node should be used. - RuntimeDefault - the container runtime default profile should be used. - Unconfined - no profile should be applied. - type: string - required: - - type - type: object - supplementalGroups: - description: |- - A list of groups applied to the first process run in each container, in - addition to the container's primary GID and fsGroup (if specified). If - the SupplementalGroupsPolicy feature is enabled, the - supplementalGroupsPolicy field determines whether these are in addition - to or instead of any group memberships defined in the container image. - items: - format: int64 - type: integer - type: array - x-kubernetes-list-type: atomic - supplementalGroupsPolicy: - description: |- - Defines how supplemental groups of the first container processes are calculated. - Valid values are "Merge" and "Strict". If not specified, "Merge" is used. - (Alpha) Using the field requires the SupplementalGroupsPolicy feature gate to be enabled - and the container runtime must implement support for this feature. - Note that this field cannot be set when spec.os.name is windows. - type: string - sysctls: - description: |- - Sysctls hold a list of namespaced sysctls used for the pod. Pods with unsupported - sysctls (by the container runtime) might fail to launch. - Note that this field cannot be set when spec.os.name is windows. - items: - description: Sysctl defines a kernel parameter - to be set - properties: - name: - description: Name of a property to set - type: string - value: - description: Value of a property to set - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - windowsOptions: - description: |- - The Windows specific settings applied to all containers. - If unspecified, the options within a container's SecurityContext will be used. - If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is linux. - properties: - gmsaCredentialSpec: - description: |- - GMSACredentialSpec is where the GMSA admission webhook - (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the - GMSA credential spec named by the GMSACredentialSpecName field. - type: string - gmsaCredentialSpecName: - description: GMSACredentialSpecName is the name - of the GMSA credential spec to use. - type: string - hostProcess: - description: |- - HostProcess determines if a container should be run as a 'Host Process' container. - All of a Pod's containers must have the same effective HostProcess value - (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers). - In addition, if HostProcess is true then HostNetwork must also be set to true. - type: boolean - runAsUserName: - description: |- - The UserName in Windows to run the entrypoint of the container process. - Defaults to the user specified in image metadata if unspecified. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - type: string - type: object - type: object - serviceAccount: - description: |- - DeprecatedServiceAccount is a deprecated alias for ServiceAccountName. - Deprecated: Use serviceAccountName instead. - type: string - serviceAccountName: - description: |- - ServiceAccountName is the name of the ServiceAccount to use to run this pod. - More info: https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/ - type: string - setHostnameAsFQDN: - description: |- - If true the pod's hostname will be configured as the pod's FQDN, rather than the leaf name (the default). - In Linux containers, this means setting the FQDN in the hostname field of the kernel (the nodename field of struct utsname). - type: boolean - shareProcessNamespace: - description: |- - Share a single process namespace between all of the containers in a pod. - When this is set containers will be able to view and signal processes from other containers - in the same pod, and the first process in each container will not be assigned PID 1. - HostPID and ShareProcessNamespace cannot both be set. - Optional: Default to false. - type: boolean - subdomain: - description: |- - If specified, the fully qualified Pod hostname will be "...svc.". - If not specified, the pod will not have a domainname at all. - type: string - terminationGracePeriodSeconds: - description: |- - Optional duration in seconds the pod needs to terminate gracefully. May be decreased in delete request. - Value must be non-negative integer. The value zero indicates stop immediately via - the kill signal (no opportunity to shut down). - If this value is nil, the default grace period will be used instead. - format: int64 - type: integer - tolerations: - description: If specified, the pod's tolerations. - items: - description: |- - The pod this Toleration is attached to tolerates any taint that matches - the triple using the matching operator . - properties: - effect: - description: |- - Effect indicates the taint effect to match. Empty means match all taint effects. - When specified, allowed values are NoSchedule, PreferNoSchedule and NoExecute. - type: string - key: - description: |- - Key is the taint key that the toleration applies to. Empty means match all taint keys. - If the key is empty, operator must be Exists; this combination means to match all values and all keys. - type: string - operator: - description: |- - Operator represents a key's relationship to the value. - Valid operators are Exists and Equal. Defaults to Equal. - Exists is equivalent to wildcard for value, so that a pod can - tolerate all taints of a particular category. - type: string - tolerationSeconds: - description: |- - TolerationSeconds represents the period of time the toleration (which must be - of effect NoExecute, otherwise this field is ignored) tolerates the taint. By default, - it is not set, which means tolerate the taint forever (do not evict). Zero and - negative values will be treated as 0 (evict immediately) by the system. - format: int64 - type: integer - value: - description: |- - Value is the taint value the toleration matches to. - If the operator is Exists, the value should be empty, otherwise just a regular string. - type: string - type: object - type: array - x-kubernetes-list-type: atomic - topologySpreadConstraints: - description: |- - TopologySpreadConstraints describes how a group of pods ought to spread across topology - domains. Scheduler will schedule pods in a way which abides by the constraints. - All topologySpreadConstraints are ANDed. - items: - description: TopologySpreadConstraint specifies how - to spread matching pods among the given topology. - properties: - labelSelector: - description: |- - LabelSelector is used to find matching pods. - Pods that match this label selector are counted to determine the number of pods - in their corresponding topology domain. - properties: - matchExpressions: - description: matchExpressions is a list of - label selector requirements. The requirements - are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the label key that - the selector applies to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - matchLabelKeys: - description: |- - MatchLabelKeys is a set of pod label keys to select the pods over which - spreading will be calculated. The keys are used to lookup values from the - incoming pod labels, those key-value labels are ANDed with labelSelector - to select the group of existing pods over which spreading will be calculated - for the incoming pod. The same key is forbidden to exist in both MatchLabelKeys and LabelSelector. - items: - type: string - type: array - x-kubernetes-list-type: atomic - maxSkew: - description: |- - MaxSkew describes the degree to which pods may be unevenly distributed. - When `whenUnsatisfiable=DoNotSchedule`, it is the maximum permitted difference - between the number of matching pods in the target topology and the global minimum. - The global minimum is the minimum number of matching pods in an eligible domain - or zero if the number of eligible domains is less than MinDomains. - format: int32 - type: integer - minDomains: - description: |- - MinDomains indicates a minimum number of eligible domains. - When the number of eligible domains with matching topology keys is less than minDomains, - Pod Topology Spread treats "global minimum" as 0, and then the calculation of Skew is performed. - And when the number of eligible domains with matching topology keys equals or greater than minDomains, - this value has no effect on scheduling. - format: int32 - type: integer - nodeAffinityPolicy: - description: |- - NodeAffinityPolicy indicates how we will treat Pod's nodeAffinity/nodeSelector - when calculating pod topology spread skew. Options are: - - Honor: only nodes matching nodeAffinity/nodeSelector are included in the calculations. - - Ignore: nodeAffinity/nodeSelector are ignored. All nodes are included in the calculations. - - If this value is nil, the behavior is equivalent to the Honor policy. - type: string - nodeTaintsPolicy: - description: |- - NodeTaintsPolicy indicates how we will treat node taints when calculating - pod topology spread skew. Options are: - - Honor: nodes without taints, along with tainted nodes for which the incoming pod - has a toleration, are included. - - Ignore: node taints are ignored. All nodes are included. - - If this value is nil, the behavior is equivalent to the Ignore policy. - type: string - topologyKey: - description: |- - TopologyKey is the key of node labels. Nodes that have a label with this key - and identical values are considered to be in the same topology. - We consider each as a "bucket", and try to put balanced number - of pods into each bucket. - We define a domain as a particular instance of a topology. - type: string - whenUnsatisfiable: - description: |- - WhenUnsatisfiable indicates how to deal with a pod if it doesn't satisfy - the spread constraint. - - DoNotSchedule (default) tells the scheduler not to schedule it. - - ScheduleAnyway tells the scheduler to schedule the pod in any location, - but giving higher precedence to topologies that would help reduce the - skew. - type: string - required: - - maxSkew - - topologyKey - - whenUnsatisfiable - type: object - type: array - x-kubernetes-list-map-keys: - - topologyKey - - whenUnsatisfiable - x-kubernetes-list-type: map - volumes: - description: |- - List of volumes that can be mounted by containers belonging to the pod. - More info: https://kubernetes.io/docs/concepts/storage/volumes - items: - description: Volume represents a named volume in a - pod that may be accessed by any container in the - pod. - properties: - awsElasticBlockStore: - description: |- - awsElasticBlockStore represents an AWS Disk resource that is attached to a - kubelet's host machine and then exposed to the pod. - More info: https://kubernetes.io/docs/concepts/storage/volumes#awselasticblockstore - properties: - fsType: - description: |- - fsType is the filesystem type of the volume that you want to mount. - Tip: Ensure that the filesystem type is supported by the host operating system. - Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - More info: https://kubernetes.io/docs/concepts/storage/volumes#awselasticblockstore - type: string - partition: - description: |- - partition is the partition in the volume that you want to mount. - If omitted, the default is to mount by volume name. - Examples: For volume /dev/sda1, you specify the partition as "1". - Similarly, the volume partition for /dev/sda is "0" (or you can leave the property empty). - format: int32 - type: integer - readOnly: - description: |- - readOnly value true will force the readOnly setting in VolumeMounts. - More info: https://kubernetes.io/docs/concepts/storage/volumes#awselasticblockstore - type: boolean - volumeID: - description: |- - volumeID is unique ID of the persistent disk resource in AWS (Amazon EBS volume). - More info: https://kubernetes.io/docs/concepts/storage/volumes#awselasticblockstore - type: string - required: - - volumeID - type: object - azureDisk: - description: azureDisk represents an Azure Data - Disk mount on the host and bind mount to the - pod. - properties: - cachingMode: - description: 'cachingMode is the Host Caching - mode: None, Read Only, Read Write.' - type: string - diskName: - description: diskName is the Name of the data - disk in the blob storage - type: string - diskURI: - description: diskURI is the URI of data disk - in the blob storage - type: string - fsType: - default: ext4 - description: |- - fsType is Filesystem type to mount. - Must be a filesystem type supported by the host operating system. - Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - type: string - kind: - description: 'kind expected values are Shared: - multiple blob disks per storage account Dedicated: - single blob disk per storage account Managed: - azure managed data disk (only in managed - availability set). defaults to shared' - type: string - readOnly: - default: false - description: |- - readOnly Defaults to false (read/write). ReadOnly here will force - the ReadOnly setting in VolumeMounts. - type: boolean - required: - - diskName - - diskURI - type: object - azureFile: - description: azureFile represents an Azure File - Service mount on the host and bind mount to - the pod. - properties: - readOnly: - description: |- - readOnly defaults to false (read/write). ReadOnly here will force - the ReadOnly setting in VolumeMounts. - type: boolean - secretName: - description: secretName is the name of secret - that contains Azure Storage Account Name - and Key - type: string - shareName: - description: shareName is the azure share - Name - type: string - required: - - secretName - - shareName - type: object - cephfs: - description: cephFS represents a Ceph FS mount - on the host that shares a pod's lifetime - properties: - monitors: - description: |- - monitors is Required: Monitors is a collection of Ceph monitors - More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it - items: - type: string - type: array - x-kubernetes-list-type: atomic - path: - description: 'path is Optional: Used as the - mounted root, rather than the full Ceph - tree, default is /' - type: string - readOnly: - description: |- - readOnly is Optional: Defaults to false (read/write). ReadOnly here will force - the ReadOnly setting in VolumeMounts. - More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it - type: boolean - secretFile: - description: |- - secretFile is Optional: SecretFile is the path to key ring for User, default is /etc/ceph/user.secret - More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it - type: string - secretRef: - description: |- - secretRef is Optional: SecretRef is reference to the authentication secret for User, default is empty. - More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - type: object - x-kubernetes-map-type: atomic - user: - description: |- - user is optional: User is the rados user name, default is admin - More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it - type: string - required: - - monitors - type: object - cinder: - description: |- - cinder represents a cinder volume attached and mounted on kubelets host machine. - More info: https://examples.k8s.io/mysql-cinder-pd/README.md - properties: - fsType: - description: |- - fsType is the filesystem type to mount. - Must be a filesystem type supported by the host operating system. - Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - More info: https://examples.k8s.io/mysql-cinder-pd/README.md - type: string - readOnly: - description: |- - readOnly defaults to false (read/write). ReadOnly here will force - the ReadOnly setting in VolumeMounts. - More info: https://examples.k8s.io/mysql-cinder-pd/README.md - type: boolean - secretRef: - description: |- - secretRef is optional: points to a secret object containing parameters used to connect - to OpenStack. - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - type: object - x-kubernetes-map-type: atomic - volumeID: - description: |- - volumeID used to identify the volume in cinder. - More info: https://examples.k8s.io/mysql-cinder-pd/README.md - type: string - required: - - volumeID - type: object - configMap: - description: configMap represents a configMap - that should populate this volume - properties: - defaultMode: - description: |- - defaultMode is optional: mode bits used to set permissions on created files by default. - Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. - YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. - Defaults to 0644. - Directories within the path are not affected by this setting. - format: int32 - type: integer - items: - description: |- - items if unspecified, each key-value pair in the Data field of the referenced - ConfigMap will be projected into the volume as a file whose name is the - key and content is the value. If specified, the listed keys will be - projected into the specified paths, and unlisted keys will not be - present. - items: - description: Maps a string key to a path - within a volume. - properties: - key: - description: key is the key to project. - type: string - mode: - description: |- - mode is Optional: mode bits used to set permissions on this file. - Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. - YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. - If not specified, the volume defaultMode will be used. - format: int32 - type: integer - path: - description: |- - path is the relative path of the file to map the key to. - May not be an absolute path. - May not contain the path element '..'. - May not start with the string '..'. - type: string - required: - - key - - path - type: object - type: array - x-kubernetes-list-type: atomic - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: optional specify whether the - ConfigMap or its keys must be defined - type: boolean - type: object - x-kubernetes-map-type: atomic - csi: - description: csi (Container Storage Interface) - represents ephemeral storage that is handled - by certain external CSI drivers (Beta feature). - properties: - driver: - description: |- - driver is the name of the CSI driver that handles this volume. - Consult with your admin for the correct name as registered in the cluster. - type: string - fsType: - description: |- - fsType to mount. Ex. "ext4", "xfs", "ntfs". - If not provided, the empty value is passed to the associated CSI driver - which will determine the default filesystem to apply. - type: string - nodePublishSecretRef: - description: |- - nodePublishSecretRef is a reference to the secret object containing - sensitive information to pass to the CSI driver to complete the CSI - NodePublishVolume and NodeUnpublishVolume calls. - This field is optional, and may be empty if no secret is required. If the - secret object contains more than one secret, all secret references are passed. - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - type: object - x-kubernetes-map-type: atomic - readOnly: - description: |- - readOnly specifies a read-only configuration for the volume. - Defaults to false (read/write). - type: boolean - volumeAttributes: - additionalProperties: - type: string - description: |- - volumeAttributes stores driver-specific properties that are passed to the CSI - driver. Consult your driver's documentation for supported values. - type: object - required: - - driver - type: object - downwardAPI: - description: downwardAPI represents downward API - about the pod that should populate this volume - properties: - defaultMode: - description: |- - Optional: mode bits to use on created files by default. Must be a - Optional: mode bits used to set permissions on created files by default. - Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. - YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. - Defaults to 0644. - Directories within the path are not affected by this setting. - format: int32 - type: integer - items: - description: Items is a list of downward API - volume file - items: - description: DownwardAPIVolumeFile represents - information to create the file containing - the pod field - properties: - fieldRef: - description: 'Required: Selects a field - of the pod: only annotations, labels, - name, namespace and uid are supported.' - properties: - apiVersion: - description: Version of the schema - the FieldPath is written in terms - of, defaults to "v1". - type: string - fieldPath: - description: Path of the field to - select in the specified API version. - type: string - required: - - fieldPath - type: object - x-kubernetes-map-type: atomic - mode: - description: |- - Optional: mode bits used to set permissions on this file, must be an octal value - between 0000 and 0777 or a decimal value between 0 and 511. - YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. - If not specified, the volume defaultMode will be used. - format: int32 - type: integer - path: - description: 'Required: Path is the - relative path name of the file to - be created. Must not be absolute or - contain the ''..'' path. Must be utf-8 - encoded. The first item of the relative - path must not start with ''..''' - type: string - resourceFieldRef: - description: |- - Selects a resource of the container: only resources limits and requests - (limits.cpu, limits.memory, requests.cpu and requests.memory) are currently supported. - properties: - containerName: - description: 'Container name: required - for volumes, optional for env - vars' - type: string - divisor: - anyOf: - - type: integer - - type: string - description: Specifies the output - format of the exposed resources, - defaults to "1" - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - resource: - description: 'Required: resource - to select' - type: string - required: - - resource - type: object - x-kubernetes-map-type: atomic - required: - - path - type: object - type: array - x-kubernetes-list-type: atomic - type: object - emptyDir: - description: |- - emptyDir represents a temporary directory that shares a pod's lifetime. - More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir - properties: - medium: - description: |- - medium represents what type of storage medium should back this directory. - The default is "" which means to use the node's default medium. - Must be an empty string (default) or Memory. - More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir - type: string - sizeLimit: - anyOf: - - type: integer - - type: string - description: |- - sizeLimit is the total amount of local storage required for this EmptyDir volume. - The size limit is also applicable for memory medium. - The maximum usage on memory medium EmptyDir would be the minimum value between - the SizeLimit specified here and the sum of memory limits of all containers in a pod. - The default is nil which means that the limit is undefined. - More info: https://kubernetes. - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - type: object - ephemeral: - description: |- - ephemeral represents a volume that is handled by a cluster storage driver. - The volume's lifecycle is tied to the pod that defines it - it will be created before the pod starts, - and deleted when the pod is removed. - properties: - volumeClaimTemplate: - description: |- - Will be used to create a stand-alone PVC to provision the volume. - The pod in which this EphemeralVolumeSource is embedded will be the - owner of the PVC, i.e. the PVC will be deleted together with the - pod. The name of the PVC will be `-` where - `` is the name from the `PodSpec.Volumes` array - entry. - properties: - metadata: - description: |- - May contain labels and annotations that will be copied into the PVC - when creating it. No other fields are allowed and will be rejected during - validation. - properties: - annotations: - additionalProperties: - type: string - type: object - finalizers: - items: - type: string - type: array - labels: - additionalProperties: - type: string - type: object - name: - type: string - namespace: - type: string - type: object - spec: - description: |- - The specification for the PersistentVolumeClaim. The entire content is - copied unchanged into the PVC that gets created from this - template. The same fields as in a PersistentVolumeClaim - are also valid here. - properties: - accessModes: - description: |- - accessModes contains the desired access modes the volume should have. - More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#access-modes-1 - items: - type: string - type: array - x-kubernetes-list-type: atomic - dataSource: - description: |- - dataSource field can be used to specify either: - * An existing VolumeSnapshot object (snapshot.storage.k8s.io/VolumeSnapshot) - * An existing PVC (PersistentVolumeClaim) - If the provisioner or an external controller can support the specified data source, - it will create a new volume based on the contents of the specified data source. - properties: - apiGroup: - description: |- - APIGroup is the group for the resource being referenced. - If APIGroup is not specified, the specified Kind must be in the core API group. - For any other third-party types, APIGroup is required. - type: string - kind: - description: Kind is the type - of resource being referenced - type: string - name: - description: Name is the name - of resource being referenced - type: string - required: - - kind - - name - type: object - x-kubernetes-map-type: atomic - dataSourceRef: - description: |- - dataSourceRef specifies the object from which to populate the volume with data, if a non-empty - volume is desired. This may be any object from a non-empty API group (non - core object) or a PersistentVolumeClaim object. - When this field is specified, volume binding will only succeed if the type of - the specified object matches some installed volume populator or dynamic - provisioner. - properties: - apiGroup: - description: |- - APIGroup is the group for the resource being referenced. - If APIGroup is not specified, the specified Kind must be in the core API group. - For any other third-party types, APIGroup is required. - type: string - kind: - description: Kind is the type - of resource being referenced - type: string - name: - description: Name is the name - of resource being referenced - type: string - namespace: - description: |- - Namespace is the namespace of resource being referenced - Note that when a namespace is specified, a gateway.networking.k8s.io/ReferenceGrant object is required in the referent namespace to allow that namespace's owner to accept the reference. See the ReferenceGrant documentation for details. - (Alpha) This field requires the CrossNamespaceVolumeDataSource feature gate to be enabled. - type: string - required: - - kind - - name - type: object - resources: - description: |- - resources represents the minimum resources the volume should have. - If RecoverVolumeExpansionFailure feature is enabled users are allowed to specify resource requirements - that are lower than previous value but must still be higher than capacity recorded in the - status field of the claim. - More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#resources - properties: - limits: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: |- - Limits describes the maximum amount of compute resources allowed. - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - type: object - requests: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: |- - Requests describes the minimum amount of compute resources required. - If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, - otherwise to an implementation-defined value. Requests cannot exceed Limits. - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - type: object - type: object - selector: - description: selector is a label query - over volumes to consider for binding. - properties: - matchExpressions: - description: matchExpressions - is a list of label selector - requirements. The requirements - are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the - label key that the selector - applies to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - storageClassName: - description: |- - storageClassName is the name of the StorageClass required by the claim. - More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#class-1 - type: string - volumeAttributesClassName: - description: |- - volumeAttributesClassName may be used to set the VolumeAttributesClass used by this claim. - If specified, the CSI driver will create or update the volume with the attributes defined - in the corresponding VolumeAttributesClass. This has a different purpose than storageClassName, - it can be changed after the claim is created. - type: string - volumeMode: - description: |- - volumeMode defines what type of volume is required by the claim. - Value of Filesystem is implied when not included in claim spec. - type: string - volumeName: - description: volumeName is the binding - reference to the PersistentVolume - backing this claim. - type: string - type: object - required: - - spec - type: object - type: object - fc: - description: fc represents a Fibre Channel resource - that is attached to a kubelet's host machine - and then exposed to the pod. - properties: - fsType: - description: |- - fsType is the filesystem type to mount. - Must be a filesystem type supported by the host operating system. - Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - type: string - lun: - description: 'lun is Optional: FC target lun - number' - format: int32 - type: integer - readOnly: - description: |- - readOnly is Optional: Defaults to false (read/write). ReadOnly here will force - the ReadOnly setting in VolumeMounts. - type: boolean - targetWWNs: - description: 'targetWWNs is Optional: FC target - worldwide names (WWNs)' - items: - type: string - type: array - x-kubernetes-list-type: atomic - wwids: - description: |- - wwids Optional: FC volume world wide identifiers (wwids) - Either wwids or combination of targetWWNs and lun must be set, but not both simultaneously. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - flexVolume: - description: |- - flexVolume represents a generic volume resource that is - provisioned/attached using an exec based plugin. - properties: - driver: - description: driver is the name of the driver - to use for this volume. - type: string - fsType: - description: |- - fsType is the filesystem type to mount. - Must be a filesystem type supported by the host operating system. - Ex. "ext4", "xfs", "ntfs". The default filesystem depends on FlexVolume script. - type: string - options: - additionalProperties: - type: string - description: 'options is Optional: this field - holds extra command options if any.' - type: object - readOnly: - description: |- - readOnly is Optional: defaults to false (read/write). ReadOnly here will force - the ReadOnly setting in VolumeMounts. - type: boolean - secretRef: - description: |- - secretRef is Optional: secretRef is reference to the secret object containing - sensitive information to pass to the plugin scripts. This may be - empty if no secret object is specified. If the secret object - contains more than one secret, all secrets are passed to the plugin - scripts. - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - type: object - x-kubernetes-map-type: atomic - required: - - driver - type: object - flocker: - description: flocker represents a Flocker volume - attached to a kubelet's host machine. This depends - on the Flocker control service being running - properties: - datasetName: - description: |- - datasetName is Name of the dataset stored as metadata -> name on the dataset for Flocker - should be considered as deprecated - type: string - datasetUUID: - description: datasetUUID is the UUID of the - dataset. This is unique identifier of a - Flocker dataset - type: string - type: object - gcePersistentDisk: - description: |- - gcePersistentDisk represents a GCE Disk resource that is attached to a - kubelet's host machine and then exposed to the pod. - More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk - properties: - fsType: - description: |- - fsType is filesystem type of the volume that you want to mount. - Tip: Ensure that the filesystem type is supported by the host operating system. - Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk - type: string - partition: - description: |- - partition is the partition in the volume that you want to mount. - If omitted, the default is to mount by volume name. - Examples: For volume /dev/sda1, you specify the partition as "1". - Similarly, the volume partition for /dev/sda is "0" (or you can leave the property empty). - More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk - format: int32 - type: integer - pdName: - description: |- - pdName is unique name of the PD resource in GCE. Used to identify the disk in GCE. - More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk - type: string - readOnly: - description: |- - readOnly here will force the ReadOnly setting in VolumeMounts. - Defaults to false. - More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk - type: boolean - required: - - pdName - type: object - gitRepo: - description: |- - gitRepo represents a git repository at a particular revision. - DEPRECATED: GitRepo is deprecated. To provision a container with a git repo, mount an - EmptyDir into an InitContainer that clones the repo using git, then mount the EmptyDir - into the Pod's container. - properties: - directory: - description: |- - directory is the target directory name. - Must not contain or start with '..'. If '.' is supplied, the volume directory will be the - git repository. Otherwise, if specified, the volume will contain the git repository in - the subdirectory with the given name. - type: string - repository: - description: repository is the URL - type: string - revision: - description: revision is the commit hash for - the specified revision. - type: string - required: - - repository - type: object - glusterfs: - description: |- - glusterfs represents a Glusterfs mount on the host that shares a pod's lifetime. - More info: https://examples.k8s.io/volumes/glusterfs/README.md - properties: - endpoints: - description: |- - endpoints is the endpoint name that details Glusterfs topology. - More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod - type: string - path: - description: |- - path is the Glusterfs volume path. - More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod - type: string - readOnly: - description: |- - readOnly here will force the Glusterfs volume to be mounted with read-only permissions. - Defaults to false. - More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod - type: boolean - required: - - endpoints - - path - type: object - hostPath: - description: |- - hostPath represents a pre-existing file or directory on the host - machine that is directly exposed to the container. This is generally - used for system agents or other privileged things that are allowed - to see the host machine. Most containers will NOT need this. - More info: https://kubernetes.io/docs/concepts/storage/volumes#hostpath - properties: - path: - description: |- - path of the directory on the host. - If the path is a symlink, it will follow the link to the real path. - More info: https://kubernetes.io/docs/concepts/storage/volumes#hostpath - type: string - type: - description: |- - type for HostPath Volume - Defaults to "" - More info: https://kubernetes.io/docs/concepts/storage/volumes#hostpath - type: string - required: - - path - type: object - image: - description: |- - image represents an OCI object (a container image or artifact) pulled and mounted on the kubelet's host machine. - The volume is resolved at pod startup depending on which PullPolicy value is provided: - - - Always: the kubelet always attempts to pull the reference. Container creation will fail If the pull fails. - - Never: the kubelet never pulls the reference and only uses a local image or artifact. - properties: - pullPolicy: - description: |- - Policy for pulling OCI objects. Possible values are: - Always: the kubelet always attempts to pull the reference. Container creation will fail If the pull fails. - Never: the kubelet never pulls the reference and only uses a local image or artifact. Container creation will fail if the reference isn't present. - IfNotPresent: the kubelet pulls if the reference isn't already present on disk. - type: string - reference: - description: |- - Required: Image or artifact reference to be used. - Behaves in the same way as pod.spec.containers[*].image. - Pull secrets will be assembled in the same way as for the container image by looking up node credentials, SA image pull secrets, and pod spec image pull secrets. - More info: https://kubernetes. - type: string - type: object - iscsi: - description: |- - iscsi represents an ISCSI Disk resource that is attached to a - kubelet's host machine and then exposed to the pod. - More info: https://examples.k8s.io/volumes/iscsi/README.md - properties: - chapAuthDiscovery: - description: chapAuthDiscovery defines whether - support iSCSI Discovery CHAP authentication - type: boolean - chapAuthSession: - description: chapAuthSession defines whether - support iSCSI Session CHAP authentication - type: boolean - fsType: - description: |- - fsType is the filesystem type of the volume that you want to mount. - Tip: Ensure that the filesystem type is supported by the host operating system. - Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - More info: https://kubernetes.io/docs/concepts/storage/volumes#iscsi - type: string - initiatorName: - description: |- - initiatorName is the custom iSCSI Initiator Name. - If initiatorName is specified with iscsiInterface simultaneously, new iSCSI interface - : will be created for the connection. - type: string - iqn: - description: iqn is the target iSCSI Qualified - Name. - type: string - iscsiInterface: - default: default - description: |- - iscsiInterface is the interface Name that uses an iSCSI transport. - Defaults to 'default' (tcp). - type: string - lun: - description: lun represents iSCSI Target Lun - number. - format: int32 - type: integer - portals: - description: |- - portals is the iSCSI Target Portal List. The portal is either an IP or ip_addr:port if the port - is other than default (typically TCP ports 860 and 3260). - items: - type: string - type: array - x-kubernetes-list-type: atomic - readOnly: - description: |- - readOnly here will force the ReadOnly setting in VolumeMounts. - Defaults to false. - type: boolean - secretRef: - description: secretRef is the CHAP Secret - for iSCSI target and initiator authentication - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - type: object - x-kubernetes-map-type: atomic - targetPortal: - description: |- - targetPortal is iSCSI Target Portal. The Portal is either an IP or ip_addr:port if the port - is other than default (typically TCP ports 860 and 3260). - type: string - required: - - iqn - - lun - - targetPortal - type: object - name: - description: |- - name of the volume. - Must be a DNS_LABEL and unique within the pod. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - nfs: - description: |- - nfs represents an NFS mount on the host that shares a pod's lifetime - More info: https://kubernetes.io/docs/concepts/storage/volumes#nfs - properties: - path: - description: |- - path that is exported by the NFS server. - More info: https://kubernetes.io/docs/concepts/storage/volumes#nfs - type: string - readOnly: - description: |- - readOnly here will force the NFS export to be mounted with read-only permissions. - Defaults to false. - More info: https://kubernetes.io/docs/concepts/storage/volumes#nfs - type: boolean - server: - description: |- - server is the hostname or IP address of the NFS server. - More info: https://kubernetes.io/docs/concepts/storage/volumes#nfs - type: string - required: - - path - - server - type: object - persistentVolumeClaim: - description: |- - persistentVolumeClaimVolumeSource represents a reference to a - PersistentVolumeClaim in the same namespace. - More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#persistentvolumeclaims - properties: - claimName: - description: |- - claimName is the name of a PersistentVolumeClaim in the same namespace as the pod using this volume. - More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#persistentvolumeclaims - type: string - readOnly: - description: |- - readOnly Will force the ReadOnly setting in VolumeMounts. - Default false. - type: boolean - required: - - claimName - type: object - photonPersistentDisk: - description: photonPersistentDisk represents a - PhotonController persistent disk attached and - mounted on kubelets host machine - properties: - fsType: - description: |- - fsType is the filesystem type to mount. - Must be a filesystem type supported by the host operating system. - Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - type: string - pdID: - description: pdID is the ID that identifies - Photon Controller persistent disk - type: string - required: - - pdID - type: object - portworxVolume: - description: portworxVolume represents a portworx - volume attached and mounted on kubelets host - machine - properties: - fsType: - description: |- - fSType represents the filesystem type to mount - Must be a filesystem type supported by the host operating system. - Ex. "ext4", "xfs". Implicitly inferred to be "ext4" if unspecified. - type: string - readOnly: - description: |- - readOnly defaults to false (read/write). ReadOnly here will force - the ReadOnly setting in VolumeMounts. - type: boolean - volumeID: - description: volumeID uniquely identifies - a Portworx volume - type: string - required: - - volumeID - type: object - projected: - description: projected items for all in one resources - secrets, configmaps, and downward API - properties: - defaultMode: - description: |- - defaultMode are the mode bits used to set permissions on created files by default. - Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. - YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. - Directories within the path are not affected by this setting. - format: int32 - type: integer - sources: - description: |- - sources is the list of volume projections. Each entry in this list - handles one source. - items: - description: |- - Projection that may be projected along with other supported volume types. - Exactly one of these fields must be set. - properties: - clusterTrustBundle: - description: |- - ClusterTrustBundle allows a pod to access the `.spec.trustBundle` field - of ClusterTrustBundle objects in an auto-updating file. - - Alpha, gated by the ClusterTrustBundleProjection feature gate. - - ClusterTrustBundle objects can either be selected by name, or by the - combination of signer name and a label selector. - properties: - labelSelector: - description: |- - Select all ClusterTrustBundles that match this label selector. Only has - effect if signerName is set. Mutually-exclusive with name. If unset, - interpreted as "match nothing". If set but empty, interpreted as "match - everything". - properties: - matchExpressions: - description: matchExpressions - is a list of label selector - requirements. The requirements - are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the - label key that the selector - applies to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - name: - description: |- - Select a single ClusterTrustBundle by object name. Mutually-exclusive - with signerName and labelSelector. - type: string - optional: - description: |- - If true, don't block pod startup if the referenced ClusterTrustBundle(s) - aren't available. If using name, then the named ClusterTrustBundle is - allowed not to exist. If using signerName, then the combination of - signerName and labelSelector is allowed to match zero - ClusterTrustBundles. - type: boolean - path: - description: Relative path from - the volume root to write the bundle. - type: string - signerName: - description: |- - Select all ClusterTrustBundles that match this signer name. - Mutually-exclusive with name. The contents of all selected - ClusterTrustBundles will be unified and deduplicated. - type: string - required: - - path - type: object - configMap: - description: configMap information about - the configMap data to project - properties: - items: - description: |- - items if unspecified, each key-value pair in the Data field of the referenced - ConfigMap will be projected into the volume as a file whose name is the - key and content is the value. If specified, the listed keys will be - projected into the specified paths, and unlisted keys will not be - present. - items: - description: Maps a string key - to a path within a volume. - properties: - key: - description: key is the key - to project. - type: string - mode: - description: |- - mode is Optional: mode bits used to set permissions on this file. - Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. - YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. - If not specified, the volume defaultMode will be used. - format: int32 - type: integer - path: - description: |- - path is the relative path of the file to map the key to. - May not be an absolute path. - May not contain the path element '..'. - May not start with the string '..'. - type: string - required: - - key - - path - type: object - type: array - x-kubernetes-list-type: atomic - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: optional specify whether - the ConfigMap or its keys must - be defined - type: boolean - type: object - x-kubernetes-map-type: atomic - downwardAPI: - description: downwardAPI information - about the downwardAPI data to project - properties: - items: - description: Items is a list of - DownwardAPIVolume file - items: - description: DownwardAPIVolumeFile - represents information to create - the file containing the pod - field - properties: - fieldRef: - description: 'Required: Selects - a field of the pod: only - annotations, labels, name, - namespace and uid are supported.' - properties: - apiVersion: - description: Version of - the schema the FieldPath - is written in terms - of, defaults to "v1". - type: string - fieldPath: - description: Path of the - field to select in the - specified API version. - type: string - required: - - fieldPath - type: object - x-kubernetes-map-type: atomic - mode: - description: |- - Optional: mode bits used to set permissions on this file, must be an octal value - between 0000 and 0777 or a decimal value between 0 and 511. - YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. - If not specified, the volume defaultMode will be used. - format: int32 - type: integer - path: - description: 'Required: Path - is the relative path name - of the file to be created. - Must not be absolute or - contain the ''..'' path. - Must be utf-8 encoded. The - first item of the relative - path must not start with - ''..''' - type: string - resourceFieldRef: - description: |- - Selects a resource of the container: only resources limits and requests - (limits.cpu, limits.memory, requests.cpu and requests.memory) are currently supported. - properties: - containerName: - description: 'Container - name: required for volumes, - optional for env vars' - type: string - divisor: - anyOf: - - type: integer - - type: string - description: Specifies - the output format of - the exposed resources, - defaults to "1" - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - resource: - description: 'Required: - resource to select' - type: string - required: - - resource - type: object - x-kubernetes-map-type: atomic - required: - - path - type: object - type: array - x-kubernetes-list-type: atomic - type: object - secret: - description: secret information about - the secret data to project - properties: - items: - description: |- - items if unspecified, each key-value pair in the Data field of the referenced - Secret will be projected into the volume as a file whose name is the - key and content is the value. If specified, the listed keys will be - projected into the specified paths, and unlisted keys will not be - present. - items: - description: Maps a string key - to a path within a volume. - properties: - key: - description: key is the key - to project. - type: string - mode: - description: |- - mode is Optional: mode bits used to set permissions on this file. - Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. - YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. - If not specified, the volume defaultMode will be used. - format: int32 - type: integer - path: - description: |- - path is the relative path of the file to map the key to. - May not be an absolute path. - May not contain the path element '..'. - May not start with the string '..'. - type: string - required: - - key - - path - type: object - type: array - x-kubernetes-list-type: atomic - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: optional field specify - whether the Secret or its key - must be defined - type: boolean - type: object - x-kubernetes-map-type: atomic - serviceAccountToken: - description: serviceAccountToken is - information about the serviceAccountToken - data to project - properties: - audience: - description: |- - audience is the intended audience of the token. A recipient of a token - must identify itself with an identifier specified in the audience of the - token, and otherwise should reject the token. The audience defaults to the - identifier of the apiserver. - type: string - expirationSeconds: - description: |- - expirationSeconds is the requested duration of validity of the service - account token. As the token approaches expiration, the kubelet volume - plugin will proactively rotate the service account token. The kubelet will - start trying to rotate the token if the token is older than 80 percent of - its time to live or if the token is older than 24 hours.Defaults to 1 hour - and must be at least 10 minutes. - format: int64 - type: integer - path: - description: |- - path is the path relative to the mount point of the file to project the - token into. - type: string - required: - - path - type: object - type: object - type: array - x-kubernetes-list-type: atomic - type: object - quobyte: - description: quobyte represents a Quobyte mount - on the host that shares a pod's lifetime - properties: - group: - description: |- - group to map volume access to - Default is no group - type: string - readOnly: - description: |- - readOnly here will force the Quobyte volume to be mounted with read-only permissions. - Defaults to false. - type: boolean - registry: - description: |- - registry represents a single or multiple Quobyte Registry services - specified as a string as host:port pair (multiple entries are separated with commas) - which acts as the central registry for volumes - type: string - tenant: - description: |- - tenant owning the given Quobyte volume in the Backend - Used with dynamically provisioned Quobyte volumes, value is set by the plugin - type: string - user: - description: |- - user to map volume access to - Defaults to serivceaccount user - type: string - volume: - description: volume is a string that references - an already created Quobyte volume by name. - type: string - required: - - registry - - volume - type: object - rbd: - description: |- - rbd represents a Rados Block Device mount on the host that shares a pod's lifetime. - More info: https://examples.k8s.io/volumes/rbd/README.md - properties: - fsType: - description: |- - fsType is the filesystem type of the volume that you want to mount. - Tip: Ensure that the filesystem type is supported by the host operating system. - Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - More info: https://kubernetes.io/docs/concepts/storage/volumes#rbd - type: string - image: - description: |- - image is the rados image name. - More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it - type: string - keyring: - default: /etc/ceph/keyring - description: |- - keyring is the path to key ring for RBDUser. - Default is /etc/ceph/keyring. - More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it - type: string - monitors: - description: |- - monitors is a collection of Ceph monitors. - More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it - items: - type: string - type: array - x-kubernetes-list-type: atomic - pool: - default: rbd - description: |- - pool is the rados pool name. - Default is rbd. - More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it - type: string - readOnly: - description: |- - readOnly here will force the ReadOnly setting in VolumeMounts. - Defaults to false. - More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it - type: boolean - secretRef: - description: |- - secretRef is name of the authentication secret for RBDUser. If provided - overrides keyring. - Default is nil. - More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - type: object - x-kubernetes-map-type: atomic - user: - default: admin - description: |- - user is the rados user name. - Default is admin. - More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it - type: string - required: - - image - - monitors - type: object - scaleIO: - description: scaleIO represents a ScaleIO persistent - volume attached and mounted on Kubernetes nodes. - properties: - fsType: - default: xfs - description: |- - fsType is the filesystem type to mount. - Must be a filesystem type supported by the host operating system. - Ex. "ext4", "xfs", "ntfs". - Default is "xfs". - type: string - gateway: - description: gateway is the host address of - the ScaleIO API Gateway. - type: string - protectionDomain: - description: protectionDomain is the name - of the ScaleIO Protection Domain for the - configured storage. - type: string - readOnly: - description: |- - readOnly Defaults to false (read/write). ReadOnly here will force - the ReadOnly setting in VolumeMounts. - type: boolean - secretRef: - description: |- - secretRef references to the secret for ScaleIO user and other - sensitive information. If this is not provided, Login operation will fail. - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - type: object - x-kubernetes-map-type: atomic - sslEnabled: - description: sslEnabled Flag enable/disable - SSL communication with Gateway, default - false - type: boolean - storageMode: - default: ThinProvisioned - description: |- - storageMode indicates whether the storage for a volume should be ThickProvisioned or ThinProvisioned. - Default is ThinProvisioned. - type: string - storagePool: - description: storagePool is the ScaleIO Storage - Pool associated with the protection domain. - type: string - system: - description: system is the name of the storage - system as configured in ScaleIO. - type: string - volumeName: - description: |- - volumeName is the name of a volume already created in the ScaleIO system - that is associated with this volume source. - type: string - required: - - gateway - - secretRef - - system - type: object - secret: - description: |- - secret represents a secret that should populate this volume. - More info: https://kubernetes.io/docs/concepts/storage/volumes#secret - properties: - defaultMode: - description: |- - defaultMode is Optional: mode bits used to set permissions on created files by default. - Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. - YAML accepts both octal and decimal values, JSON requires decimal values - for mode bits. Defaults to 0644. - Directories within the path are not affected by this setting. - format: int32 - type: integer - items: - description: |- - items If unspecified, each key-value pair in the Data field of the referenced - Secret will be projected into the volume as a file whose name is the - key and content is the value. If specified, the listed keys will be - projected into the specified paths, and unlisted keys will not be - present. - items: - description: Maps a string key to a path - within a volume. - properties: - key: - description: key is the key to project. - type: string - mode: - description: |- - mode is Optional: mode bits used to set permissions on this file. - Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. - YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. - If not specified, the volume defaultMode will be used. - format: int32 - type: integer - path: - description: |- - path is the relative path of the file to map the key to. - May not be an absolute path. - May not contain the path element '..'. - May not start with the string '..'. - type: string - required: - - key - - path - type: object - type: array - x-kubernetes-list-type: atomic - optional: - description: optional field specify whether - the Secret or its keys must be defined - type: boolean - secretName: - description: |- - secretName is the name of the secret in the pod's namespace to use. - More info: https://kubernetes.io/docs/concepts/storage/volumes#secret - type: string - type: object - storageos: - description: storageOS represents a StorageOS - volume attached and mounted on Kubernetes nodes. - properties: - fsType: - description: |- - fsType is the filesystem type to mount. - Must be a filesystem type supported by the host operating system. - Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - type: string - readOnly: - description: |- - readOnly defaults to false (read/write). ReadOnly here will force - the ReadOnly setting in VolumeMounts. - type: boolean - secretRef: - description: |- - secretRef specifies the secret to use for obtaining the StorageOS API - credentials. If not specified, default values will be attempted. - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - type: object - x-kubernetes-map-type: atomic - volumeName: - description: |- - volumeName is the human-readable name of the StorageOS volume. Volume - names are only unique within a namespace. - type: string - volumeNamespace: - description: |- - volumeNamespace specifies the scope of the volume within StorageOS. If no - namespace is specified then the Pod's namespace will be used. This allows the - Kubernetes name scoping to be mirrored within StorageOS for tighter integration. - Set VolumeName to any name to override the default behaviour. - Set to "default" if you are not using namespaces within StorageOS. - type: string - type: object - vsphereVolume: - description: vsphereVolume represents a vSphere - volume attached and mounted on kubelets host - machine - properties: - fsType: - description: |- - fsType is filesystem type to mount. - Must be a filesystem type supported by the host operating system. - Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - type: string - storagePolicyID: - description: storagePolicyID is the storage - Policy Based Management (SPBM) profile ID - associated with the StoragePolicyName. - type: string - storagePolicyName: - description: storagePolicyName is the storage - Policy Based Management (SPBM) profile name. - type: string - volumePath: - description: volumePath is the path that identifies - vSphere volume vmdk - type: string - required: - - volumePath - type: object - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - required: - - containers - type: object - type: object - type: object - description: |- - `MPIReplicaSpecs` contains maps from `MPIReplicaType` to `ReplicaSpec` that - specify the MPI replicas to run. - type: object - runPolicy: - description: |- - `RunPolicy` encapsulates various runtime policies of the distributed training - job, for example how to clean up resources and how long the job can stay - active. - properties: - activeDeadlineSeconds: - description: |- - Specifies the duration in seconds relative to the startTime that the job may be active - before the system tries to terminate it; value must be positive integer. - format: int64 - type: integer - backoffLimit: - description: Optional number of retries before marking this job - failed. - format: int32 - type: integer - cleanPodPolicy: - description: |- - CleanPodPolicy defines the policy to kill pods after the job completes. - Default to None. - type: string - managedBy: - description: |- - ManagedBy is used to indicate the controller or entity that manages a job. - The value must be either an empty, 'kubeflow.org/training-operator' or - 'kueue.x-k8s.io/multikueue'. - The training-operator reconciles a job which doesn't have this - field at all or the field value is the reserved string - 'kubeflow.org/training-operator', but delegates reconciling the job - with 'kueue.x-k8s. - type: string - schedulingPolicy: - description: SchedulingPolicy defines the policy related to scheduling, - e.g. gang-scheduling - properties: - minAvailable: - format: int32 - type: integer - minResources: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - type: object - priorityClass: - type: string - queue: - type: string - x-kubernetes-validations: - - message: spec.runPolicy.schedulingPolicy.queue is immutable - rule: self == oldSelf - scheduleTimeoutSeconds: - format: int32 - type: integer - type: object - suspend: - default: false - description: |- - suspend specifies whether the Job controller should create Pods or not. - If a Job is created with suspend set to true, no Pods are created by - the Job controller. If a Job is suspended after creation (i.e. the - flag goes from false to true), the Job controller will delete all - active Pods and PodGroups associated with this Job. - Users must design their workload to gracefully handle this. - type: boolean - ttlSecondsAfterFinished: - description: |- - TTLSecondsAfterFinished is the TTL to clean up jobs. - It may take extra ReconcilePeriod seconds for the cleanup, since - reconcile gets called periodically. - Default to infinite. - format: int32 - type: integer - type: object - slotsPerWorker: - description: |- - Specifies the number of slots per worker used in hostfile. - Defaults to 1. - format: int32 - type: integer - required: - - mpiReplicaSpecs - type: object - status: - description: JobStatus represents the current observed state of the training - Job. - properties: - completionTime: - description: |- - Represents time when the job was completed. It is not guaranteed to - be set in happens-before order across separate operations. - It is represented in RFC3339 form and is in UTC. - format: date-time - type: string - conditions: - description: Conditions is an array of current observed job conditions. - items: - description: JobCondition describes the state of the job at a certain - point. - properties: - lastTransitionTime: - description: Last time the condition transitioned from one status - to another. - format: date-time - type: string - lastUpdateTime: - description: The last time this condition was updated. - format: date-time - type: string - message: - description: A human readable message indicating details about - the transition. - type: string - reason: - description: The reason for the condition's last transition. - type: string - status: - description: Status of the condition, one of True, False, Unknown. - type: string - type: - description: Type of job condition. - type: string - required: - - status - - type - type: object - type: array - lastReconcileTime: - description: |- - Represents last time when the job was reconciled. It is not guaranteed to - be set in happens-before order across separate operations. - It is represented in RFC3339 form and is in UTC. - format: date-time - type: string - replicaStatuses: - additionalProperties: - description: ReplicaStatus represents the current observed state - of the replica. - properties: - active: - description: The number of actively running pods. - format: int32 - type: integer - failed: - description: The number of pods which reached phase Failed. - format: int32 - type: integer - labelSelector: - description: 'Deprecated: Use Selector instead' - properties: - matchExpressions: - description: matchExpressions is a list of label selector - requirements. The requirements are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the label key that the selector - applies to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - selector: - description: |- - A Selector is a label query over a set of resources. The result of matchLabels and - matchExpressions are ANDed. An empty Selector matches all objects. A null - Selector matches no objects. - type: string - succeeded: - description: The number of pods which reached phase Succeeded. - format: int32 - type: integer - type: object - description: |- - ReplicaStatuses is map of ReplicaType and ReplicaStatus, - specifies the status of each replica. - type: object - startTime: - description: |- - Represents time when the job was acknowledged by the job controller. - It is not guaranteed to be set in happens-before order across separate operations. - It is represented in RFC3339 form and is in UTC. - format: date-time - type: string - type: object - type: object - served: true - storage: true - subresources: - status: {} diff --git a/manifests/base/crds/kubeflow.org_paddlejobs.yaml b/manifests/base/crds/kubeflow.org_paddlejobs.yaml deleted file mode 100644 index fbcf3f2037..0000000000 --- a/manifests/base/crds/kubeflow.org_paddlejobs.yaml +++ /dev/null @@ -1,8394 +0,0 @@ ---- -apiVersion: apiextensions.k8s.io/v1 -kind: CustomResourceDefinition -metadata: - annotations: - controller-gen.kubebuilder.io/version: v0.16.5 - name: paddlejobs.kubeflow.org -spec: - group: kubeflow.org - names: - kind: PaddleJob - listKind: PaddleJobList - plural: paddlejobs - singular: paddlejob - scope: Namespaced - versions: - - additionalPrinterColumns: - - jsonPath: .status.conditions[-1:].type - name: State - type: string - - jsonPath: .metadata.creationTimestamp - name: Age - type: date - name: v1 - schema: - openAPIV3Schema: - description: PaddleJob Represents a PaddleJob resource. - properties: - apiVersion: - description: |- - APIVersion defines the versioned schema of this representation of an object. - Servers should convert recognized schemas to the latest internal value, and - may reject unrecognized values. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources - type: string - kind: - description: |- - Kind is a string value representing the REST resource this object represents. - Servers may infer this from the endpoint the client submits requests to. - Cannot be updated. - In CamelCase. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds - type: string - metadata: - type: object - spec: - description: Specification of the desired state of the PaddleJob. - properties: - elasticPolicy: - description: ElasticPolicy holds the elastic policy for paddle job. - properties: - maxReplicas: - description: upper limit for the number of pods that can be set - by the autoscaler; cannot be smaller than MinReplicas, defaults - to null. - format: int32 - type: integer - maxRestarts: - description: MaxRestarts is the limit for restart times of pods - in elastic mode. - format: int32 - type: integer - metrics: - description: |- - Metrics contains the specifications which are used to calculate the - desired replica count (the maximum replica count across all metrics will - be used). The desired replica count is calculated with multiplying the - ratio between the target value and the current value by the current - number of pods. Ergo, metrics used must decrease as the pod count is - increased, and vice-versa. - items: - description: |- - MetricSpec specifies how to scale based on a single metric - (only `type` and one other matching field should be set at once). - properties: - containerResource: - description: |- - containerResource refers to a resource metric (such as those specified in - requests and limits) known to Kubernetes describing a single container in - each pod of the current scale target (e.g. CPU or memory). Such metrics are - built in to Kubernetes, and have special scaling options on top of those - available to normal per-pod metrics using the "pods" source. - properties: - container: - description: container is the name of the container - in the pods of the scaling target - type: string - name: - description: name is the name of the resource in question. - type: string - target: - description: target specifies the target value for the - given metric - properties: - averageUtilization: - description: |- - averageUtilization is the target value of the average of the - resource metric across all relevant pods, represented as a percentage of - the requested value of the resource for the pods. - Currently only valid for Resource metric source type - format: int32 - type: integer - averageValue: - anyOf: - - type: integer - - type: string - description: |- - averageValue is the target value of the average of the - metric across all relevant pods (as a quantity) - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - type: - description: type represents whether the metric - type is Utilization, Value, or AverageValue - type: string - value: - anyOf: - - type: integer - - type: string - description: value is the target value of the metric - (as a quantity). - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - required: - - type - type: object - required: - - container - - name - - target - type: object - external: - description: |- - external refers to a global metric that is not associated - with any Kubernetes object. It allows autoscaling based on information - coming from components running outside of cluster - (for example length of queue in cloud messaging service, or - QPS from loadbalancer running outside of cluster). - properties: - metric: - description: metric identifies the target metric by - name and selector - properties: - name: - description: name is the name of the given metric - type: string - selector: - description: |- - selector is the string-encoded form of a standard kubernetes label selector for the given metric - When set, it is passed as an additional parameter to the metrics server for more specific metrics scoping. - When unset, just the metricName will be used to gather metrics. - properties: - matchExpressions: - description: matchExpressions is a list of label - selector requirements. The requirements are - ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the label key that - the selector applies to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - required: - - name - type: object - target: - description: target specifies the target value for the - given metric - properties: - averageUtilization: - description: |- - averageUtilization is the target value of the average of the - resource metric across all relevant pods, represented as a percentage of - the requested value of the resource for the pods. - Currently only valid for Resource metric source type - format: int32 - type: integer - averageValue: - anyOf: - - type: integer - - type: string - description: |- - averageValue is the target value of the average of the - metric across all relevant pods (as a quantity) - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - type: - description: type represents whether the metric - type is Utilization, Value, or AverageValue - type: string - value: - anyOf: - - type: integer - - type: string - description: value is the target value of the metric - (as a quantity). - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - required: - - type - type: object - required: - - metric - - target - type: object - object: - description: |- - object refers to a metric describing a single kubernetes object - (for example, hits-per-second on an Ingress object). - properties: - describedObject: - description: describedObject specifies the descriptions - of a object,such as kind,name apiVersion - properties: - apiVersion: - description: apiVersion is the API version of the - referent - type: string - kind: - description: 'kind is the kind of the referent; - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' - type: string - name: - description: 'name is the name of the referent; - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' - type: string - required: - - kind - - name - type: object - metric: - description: metric identifies the target metric by - name and selector - properties: - name: - description: name is the name of the given metric - type: string - selector: - description: |- - selector is the string-encoded form of a standard kubernetes label selector for the given metric - When set, it is passed as an additional parameter to the metrics server for more specific metrics scoping. - When unset, just the metricName will be used to gather metrics. - properties: - matchExpressions: - description: matchExpressions is a list of label - selector requirements. The requirements are - ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the label key that - the selector applies to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - required: - - name - type: object - target: - description: target specifies the target value for the - given metric - properties: - averageUtilization: - description: |- - averageUtilization is the target value of the average of the - resource metric across all relevant pods, represented as a percentage of - the requested value of the resource for the pods. - Currently only valid for Resource metric source type - format: int32 - type: integer - averageValue: - anyOf: - - type: integer - - type: string - description: |- - averageValue is the target value of the average of the - metric across all relevant pods (as a quantity) - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - type: - description: type represents whether the metric - type is Utilization, Value, or AverageValue - type: string - value: - anyOf: - - type: integer - - type: string - description: value is the target value of the metric - (as a quantity). - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - required: - - type - type: object - required: - - describedObject - - metric - - target - type: object - pods: - description: |- - pods refers to a metric describing each pod in the current scale target - (for example, transactions-processed-per-second). The values will be - averaged together before being compared to the target value. - properties: - metric: - description: metric identifies the target metric by - name and selector - properties: - name: - description: name is the name of the given metric - type: string - selector: - description: |- - selector is the string-encoded form of a standard kubernetes label selector for the given metric - When set, it is passed as an additional parameter to the metrics server for more specific metrics scoping. - When unset, just the metricName will be used to gather metrics. - properties: - matchExpressions: - description: matchExpressions is a list of label - selector requirements. The requirements are - ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the label key that - the selector applies to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - required: - - name - type: object - target: - description: target specifies the target value for the - given metric - properties: - averageUtilization: - description: |- - averageUtilization is the target value of the average of the - resource metric across all relevant pods, represented as a percentage of - the requested value of the resource for the pods. - Currently only valid for Resource metric source type - format: int32 - type: integer - averageValue: - anyOf: - - type: integer - - type: string - description: |- - averageValue is the target value of the average of the - metric across all relevant pods (as a quantity) - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - type: - description: type represents whether the metric - type is Utilization, Value, or AverageValue - type: string - value: - anyOf: - - type: integer - - type: string - description: value is the target value of the metric - (as a quantity). - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - required: - - type - type: object - required: - - metric - - target - type: object - resource: - description: |- - resource refers to a resource metric (such as those specified in - requests and limits) known to Kubernetes describing each pod in the - current scale target (e.g. CPU or memory). Such metrics are built in to - Kubernetes, and have special scaling options on top of those available - to normal per-pod metrics using the "pods" source. - properties: - name: - description: name is the name of the resource in question. - type: string - target: - description: target specifies the target value for the - given metric - properties: - averageUtilization: - description: |- - averageUtilization is the target value of the average of the - resource metric across all relevant pods, represented as a percentage of - the requested value of the resource for the pods. - Currently only valid for Resource metric source type - format: int32 - type: integer - averageValue: - anyOf: - - type: integer - - type: string - description: |- - averageValue is the target value of the average of the - metric across all relevant pods (as a quantity) - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - type: - description: type represents whether the metric - type is Utilization, Value, or AverageValue - type: string - value: - anyOf: - - type: integer - - type: string - description: value is the target value of the metric - (as a quantity). - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - required: - - type - type: object - required: - - name - - target - type: object - type: - description: |- - type is the type of metric source. It should be one of "ContainerResource", "External", - "Object", "Pods" or "Resource", each mapping to a matching field in the object. - Note: "ContainerResource" type is available on when the feature-gate - HPAContainerMetrics is enabled - type: string - required: - - type - type: object - type: array - minReplicas: - description: |- - minReplicas is the lower limit for the number of replicas to which the training job - can scale down. It defaults to null. - format: int32 - type: integer - type: object - paddleReplicaSpecs: - additionalProperties: - description: ReplicaSpec is a description of the replica - properties: - replicas: - description: |- - Replicas is the desired number of replicas of the given template. - If unspecified, defaults to 1. - format: int32 - type: integer - restartPolicy: - description: |- - Restart policy for all replicas within the job. - One of Always, OnFailure, Never and ExitCode. - Default to Never. - type: string - template: - description: |- - Template is the object that describes the pod that - will be created for this replica. RestartPolicy in PodTemplateSpec - will be overide by RestartPolicy in ReplicaSpec - properties: - metadata: - description: |- - Standard object's metadata. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#metadata - properties: - annotations: - additionalProperties: - type: string - type: object - finalizers: - items: - type: string - type: array - labels: - additionalProperties: - type: string - type: object - name: - type: string - namespace: - type: string - type: object - spec: - description: |- - Specification of the desired behavior of the pod. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#spec-and-status - properties: - activeDeadlineSeconds: - description: |- - Optional duration in seconds the pod may be active on the node relative to - StartTime before the system will actively try to mark it failed and kill associated containers. - Value must be a positive integer. - format: int64 - type: integer - affinity: - description: If specified, the pod's scheduling constraints - properties: - nodeAffinity: - description: Describes node affinity scheduling - rules for the pod. - properties: - preferredDuringSchedulingIgnoredDuringExecution: - description: |- - The scheduler will prefer to schedule pods to nodes that satisfy - the affinity expressions specified by this field, but it may choose - a node that violates one or more of the expressions. The node that is - most preferred is the one with the greatest sum of weights, i.e. - for each node that meets all of the scheduling requirements (resource - request, requiredDuringScheduling affinity expressions, etc. - items: - description: |- - An empty preferred scheduling term matches all objects with implicit weight 0 - (i.e. it's a no-op). A null preferred scheduling term matches no objects (i.e. is also a no-op). - properties: - preference: - description: A node selector term, associated - with the corresponding weight. - properties: - matchExpressions: - description: A list of node selector - requirements by node's labels. - items: - description: |- - A node selector requirement is a selector that contains values, a key, and an operator - that relates the key and values. - properties: - key: - description: The label key that - the selector applies to. - type: string - operator: - description: |- - Represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. - type: string - values: - description: |- - An array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. If the operator is Gt or Lt, the values - array must have a single element, which will be interpreted as an integer. - This array is replaced during a strategic merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchFields: - description: A list of node selector - requirements by node's fields. - items: - description: |- - A node selector requirement is a selector that contains values, a key, and an operator - that relates the key and values. - properties: - key: - description: The label key that - the selector applies to. - type: string - operator: - description: |- - Represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. - type: string - values: - description: |- - An array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. If the operator is Gt or Lt, the values - array must have a single element, which will be interpreted as an integer. - This array is replaced during a strategic merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - type: object - x-kubernetes-map-type: atomic - weight: - description: Weight associated with matching - the corresponding nodeSelectorTerm, - in the range 1-100. - format: int32 - type: integer - required: - - preference - - weight - type: object - type: array - x-kubernetes-list-type: atomic - requiredDuringSchedulingIgnoredDuringExecution: - description: |- - If the affinity requirements specified by this field are not met at - scheduling time, the pod will not be scheduled onto the node. - If the affinity requirements specified by this field cease to be met - at some point during pod execution (e.g. due to an update), the system - may or may not try to eventually evict the pod from its node. - properties: - nodeSelectorTerms: - description: Required. A list of node selector - terms. The terms are ORed. - items: - description: |- - A null or empty node selector term matches no objects. The requirements of - them are ANDed. - The TopologySelectorTerm type implements a subset of the NodeSelectorTerm. - properties: - matchExpressions: - description: A list of node selector - requirements by node's labels. - items: - description: |- - A node selector requirement is a selector that contains values, a key, and an operator - that relates the key and values. - properties: - key: - description: The label key that - the selector applies to. - type: string - operator: - description: |- - Represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. - type: string - values: - description: |- - An array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. If the operator is Gt or Lt, the values - array must have a single element, which will be interpreted as an integer. - This array is replaced during a strategic merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchFields: - description: A list of node selector - requirements by node's fields. - items: - description: |- - A node selector requirement is a selector that contains values, a key, and an operator - that relates the key and values. - properties: - key: - description: The label key that - the selector applies to. - type: string - operator: - description: |- - Represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. - type: string - values: - description: |- - An array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. If the operator is Gt or Lt, the values - array must have a single element, which will be interpreted as an integer. - This array is replaced during a strategic merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - type: object - x-kubernetes-map-type: atomic - type: array - x-kubernetes-list-type: atomic - required: - - nodeSelectorTerms - type: object - x-kubernetes-map-type: atomic - type: object - podAffinity: - description: Describes pod affinity scheduling rules - (e.g. co-locate this pod in the same node, zone, - etc. as some other pod(s)). - properties: - preferredDuringSchedulingIgnoredDuringExecution: - description: |- - The scheduler will prefer to schedule pods to nodes that satisfy - the affinity expressions specified by this field, but it may choose - a node that violates one or more of the expressions. The node that is - most preferred is the one with the greatest sum of weights, i.e. - for each node that meets all of the scheduling requirements (resource - request, requiredDuringScheduling affinity expressions, etc. - items: - description: The weights of all of the matched - WeightedPodAffinityTerm fields are added - per-node to find the most preferred node(s) - properties: - podAffinityTerm: - description: Required. A pod affinity - term, associated with the corresponding - weight. - properties: - labelSelector: - description: |- - A label query over a set of resources, in this case pods. - If it's null, this PodAffinityTerm matches with no Pods. - properties: - matchExpressions: - description: matchExpressions - is a list of label selector - requirements. The requirements - are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the - label key that the selector - applies to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - matchLabelKeys: - description: |- - MatchLabelKeys is a set of pod label keys to select which pods will - be taken into consideration. The keys are used to lookup values from the - incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` - to select the group of existing pods which pods will be taken into consideration - for the incoming pod's pod (anti) affinity. - items: - type: string - type: array - x-kubernetes-list-type: atomic - mismatchLabelKeys: - description: |- - MismatchLabelKeys is a set of pod label keys to select which pods will - be taken into consideration. The keys are used to lookup values from the - incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` - to select the group of existing pods which pods will be taken into consideration - for the incoming pod's pod (anti) affinity. - items: - type: string - type: array - x-kubernetes-list-type: atomic - namespaceSelector: - description: |- - A label query over the set of namespaces that the term applies to. - The term is applied to the union of the namespaces selected by this field - and the ones listed in the namespaces field. - null selector and null or empty namespaces list means "this pod's namespace". - An empty selector ({}) matches all namespaces. - properties: - matchExpressions: - description: matchExpressions - is a list of label selector - requirements. The requirements - are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the - label key that the selector - applies to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - namespaces: - description: |- - namespaces specifies a static list of namespace names that the term applies to. - The term is applied to the union of the namespaces listed in this field - and the ones selected by namespaceSelector. - null or empty namespaces list and null namespaceSelector means "this pod's namespace". - items: - type: string - type: array - x-kubernetes-list-type: atomic - topologyKey: - description: |- - This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching - the labelSelector in the specified namespaces, where co-located is defined as running on a node - whose value of the label with key topologyKey matches that of any node on which any of the - selected pods is running. - Empty topologyKey is not allowed. - type: string - required: - - topologyKey - type: object - weight: - description: |- - weight associated with matching the corresponding podAffinityTerm, - in the range 1-100. - format: int32 - type: integer - required: - - podAffinityTerm - - weight - type: object - type: array - x-kubernetes-list-type: atomic - requiredDuringSchedulingIgnoredDuringExecution: - description: |- - If the affinity requirements specified by this field are not met at - scheduling time, the pod will not be scheduled onto the node. - If the affinity requirements specified by this field cease to be met - at some point during pod execution (e.g. due to a pod label update), the - system may or may not try to eventually evict the pod from its node. - items: - description: |- - Defines a set of pods (namely those matching the labelSelector - relative to the given namespace(s)) that this pod should be - co-located (affinity) or not co-located (anti-affinity) with, - where co-located is defined as running on a node whose value of - the label with key matches that of any node on which - a pod of the set of pods is running - properties: - labelSelector: - description: |- - A label query over a set of resources, in this case pods. - If it's null, this PodAffinityTerm matches with no Pods. - properties: - matchExpressions: - description: matchExpressions is a - list of label selector requirements. - The requirements are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the label - key that the selector applies - to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - matchLabelKeys: - description: |- - MatchLabelKeys is a set of pod label keys to select which pods will - be taken into consideration. The keys are used to lookup values from the - incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` - to select the group of existing pods which pods will be taken into consideration - for the incoming pod's pod (anti) affinity. - items: - type: string - type: array - x-kubernetes-list-type: atomic - mismatchLabelKeys: - description: |- - MismatchLabelKeys is a set of pod label keys to select which pods will - be taken into consideration. The keys are used to lookup values from the - incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` - to select the group of existing pods which pods will be taken into consideration - for the incoming pod's pod (anti) affinity. - items: - type: string - type: array - x-kubernetes-list-type: atomic - namespaceSelector: - description: |- - A label query over the set of namespaces that the term applies to. - The term is applied to the union of the namespaces selected by this field - and the ones listed in the namespaces field. - null selector and null or empty namespaces list means "this pod's namespace". - An empty selector ({}) matches all namespaces. - properties: - matchExpressions: - description: matchExpressions is a - list of label selector requirements. - The requirements are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the label - key that the selector applies - to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - namespaces: - description: |- - namespaces specifies a static list of namespace names that the term applies to. - The term is applied to the union of the namespaces listed in this field - and the ones selected by namespaceSelector. - null or empty namespaces list and null namespaceSelector means "this pod's namespace". - items: - type: string - type: array - x-kubernetes-list-type: atomic - topologyKey: - description: |- - This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching - the labelSelector in the specified namespaces, where co-located is defined as running on a node - whose value of the label with key topologyKey matches that of any node on which any of the - selected pods is running. - Empty topologyKey is not allowed. - type: string - required: - - topologyKey - type: object - type: array - x-kubernetes-list-type: atomic - type: object - podAntiAffinity: - description: Describes pod anti-affinity scheduling - rules (e.g. avoid putting this pod in the same - node, zone, etc. as some other pod(s)). - properties: - preferredDuringSchedulingIgnoredDuringExecution: - description: |- - The scheduler will prefer to schedule pods to nodes that satisfy - the anti-affinity expressions specified by this field, but it may choose - a node that violates one or more of the expressions. The node that is - most preferred is the one with the greatest sum of weights, i.e. - items: - description: The weights of all of the matched - WeightedPodAffinityTerm fields are added - per-node to find the most preferred node(s) - properties: - podAffinityTerm: - description: Required. A pod affinity - term, associated with the corresponding - weight. - properties: - labelSelector: - description: |- - A label query over a set of resources, in this case pods. - If it's null, this PodAffinityTerm matches with no Pods. - properties: - matchExpressions: - description: matchExpressions - is a list of label selector - requirements. The requirements - are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the - label key that the selector - applies to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - matchLabelKeys: - description: |- - MatchLabelKeys is a set of pod label keys to select which pods will - be taken into consideration. The keys are used to lookup values from the - incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` - to select the group of existing pods which pods will be taken into consideration - for the incoming pod's pod (anti) affinity. - items: - type: string - type: array - x-kubernetes-list-type: atomic - mismatchLabelKeys: - description: |- - MismatchLabelKeys is a set of pod label keys to select which pods will - be taken into consideration. The keys are used to lookup values from the - incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` - to select the group of existing pods which pods will be taken into consideration - for the incoming pod's pod (anti) affinity. - items: - type: string - type: array - x-kubernetes-list-type: atomic - namespaceSelector: - description: |- - A label query over the set of namespaces that the term applies to. - The term is applied to the union of the namespaces selected by this field - and the ones listed in the namespaces field. - null selector and null or empty namespaces list means "this pod's namespace". - An empty selector ({}) matches all namespaces. - properties: - matchExpressions: - description: matchExpressions - is a list of label selector - requirements. The requirements - are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the - label key that the selector - applies to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - namespaces: - description: |- - namespaces specifies a static list of namespace names that the term applies to. - The term is applied to the union of the namespaces listed in this field - and the ones selected by namespaceSelector. - null or empty namespaces list and null namespaceSelector means "this pod's namespace". - items: - type: string - type: array - x-kubernetes-list-type: atomic - topologyKey: - description: |- - This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching - the labelSelector in the specified namespaces, where co-located is defined as running on a node - whose value of the label with key topologyKey matches that of any node on which any of the - selected pods is running. - Empty topologyKey is not allowed. - type: string - required: - - topologyKey - type: object - weight: - description: |- - weight associated with matching the corresponding podAffinityTerm, - in the range 1-100. - format: int32 - type: integer - required: - - podAffinityTerm - - weight - type: object - type: array - x-kubernetes-list-type: atomic - requiredDuringSchedulingIgnoredDuringExecution: - description: |- - If the anti-affinity requirements specified by this field are not met at - scheduling time, the pod will not be scheduled onto the node. - If the anti-affinity requirements specified by this field cease to be met - at some point during pod execution (e.g. due to a pod label update), the - system may or may not try to eventually evict the pod from its node. - items: - description: |- - Defines a set of pods (namely those matching the labelSelector - relative to the given namespace(s)) that this pod should be - co-located (affinity) or not co-located (anti-affinity) with, - where co-located is defined as running on a node whose value of - the label with key matches that of any node on which - a pod of the set of pods is running - properties: - labelSelector: - description: |- - A label query over a set of resources, in this case pods. - If it's null, this PodAffinityTerm matches with no Pods. - properties: - matchExpressions: - description: matchExpressions is a - list of label selector requirements. - The requirements are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the label - key that the selector applies - to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - matchLabelKeys: - description: |- - MatchLabelKeys is a set of pod label keys to select which pods will - be taken into consideration. The keys are used to lookup values from the - incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` - to select the group of existing pods which pods will be taken into consideration - for the incoming pod's pod (anti) affinity. - items: - type: string - type: array - x-kubernetes-list-type: atomic - mismatchLabelKeys: - description: |- - MismatchLabelKeys is a set of pod label keys to select which pods will - be taken into consideration. The keys are used to lookup values from the - incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` - to select the group of existing pods which pods will be taken into consideration - for the incoming pod's pod (anti) affinity. - items: - type: string - type: array - x-kubernetes-list-type: atomic - namespaceSelector: - description: |- - A label query over the set of namespaces that the term applies to. - The term is applied to the union of the namespaces selected by this field - and the ones listed in the namespaces field. - null selector and null or empty namespaces list means "this pod's namespace". - An empty selector ({}) matches all namespaces. - properties: - matchExpressions: - description: matchExpressions is a - list of label selector requirements. - The requirements are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the label - key that the selector applies - to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - namespaces: - description: |- - namespaces specifies a static list of namespace names that the term applies to. - The term is applied to the union of the namespaces listed in this field - and the ones selected by namespaceSelector. - null or empty namespaces list and null namespaceSelector means "this pod's namespace". - items: - type: string - type: array - x-kubernetes-list-type: atomic - topologyKey: - description: |- - This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching - the labelSelector in the specified namespaces, where co-located is defined as running on a node - whose value of the label with key topologyKey matches that of any node on which any of the - selected pods is running. - Empty topologyKey is not allowed. - type: string - required: - - topologyKey - type: object - type: array - x-kubernetes-list-type: atomic - type: object - type: object - automountServiceAccountToken: - description: AutomountServiceAccountToken indicates - whether a service account token should be automatically - mounted. - type: boolean - containers: - description: |- - List of containers belonging to the pod. - Containers cannot currently be added or removed. - There must be at least one container in a Pod. - Cannot be updated. - items: - description: A single application container that you - want to run within a pod. - properties: - args: - description: |- - Arguments to the entrypoint. - The container image's CMD is used if this is not provided. - Variable references $(VAR_NAME) are expanded using the container's environment. If a variable - cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced - to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. - items: - type: string - type: array - x-kubernetes-list-type: atomic - command: - description: |- - Entrypoint array. Not executed within a shell. - The container image's ENTRYPOINT is used if this is not provided. - Variable references $(VAR_NAME) are expanded using the container's environment. If a variable - cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced - to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. - items: - type: string - type: array - x-kubernetes-list-type: atomic - env: - description: |- - List of environment variables to set in the container. - Cannot be updated. - items: - description: EnvVar represents an environment - variable present in a Container. - properties: - name: - description: Name of the environment variable. - Must be a C_IDENTIFIER. - type: string - value: - description: |- - Variable references $(VAR_NAME) are expanded - using the previously defined environment variables in the container and - any service environment variables. If a variable cannot be resolved, - the reference in the input string will be unchanged. Double $$ are reduced - to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. - "$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)". - type: string - valueFrom: - description: Source for the environment - variable's value. Cannot be used if value - is not empty. - properties: - configMapKeyRef: - description: Selects a key of a ConfigMap. - properties: - key: - description: The key to select. - type: string - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the - ConfigMap or its key must be defined - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - fieldRef: - description: |- - Selects a field of the pod: supports metadata.name, metadata.namespace, `metadata.labels['']`, `metadata.annotations['']`, - spec.nodeName, spec.serviceAccountName, status.hostIP, status.podIP, status.podIPs. - properties: - apiVersion: - description: Version of the schema - the FieldPath is written in terms - of, defaults to "v1". - type: string - fieldPath: - description: Path of the field to - select in the specified API version. - type: string - required: - - fieldPath - type: object - x-kubernetes-map-type: atomic - resourceFieldRef: - description: |- - Selects a resource of the container: only resources limits and requests - (limits.cpu, limits.memory, limits.ephemeral-storage, requests.cpu, requests.memory and requests.ephemeral-storage) are currently supported. - properties: - containerName: - description: 'Container name: required - for volumes, optional for env - vars' - type: string - divisor: - anyOf: - - type: integer - - type: string - description: Specifies the output - format of the exposed resources, - defaults to "1" - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - resource: - description: 'Required: resource - to select' - type: string - required: - - resource - type: object - x-kubernetes-map-type: atomic - secretKeyRef: - description: Selects a key of a secret - in the pod's namespace - properties: - key: - description: The key of the secret - to select from. Must be a valid - secret key. - type: string - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the - Secret or its key must be defined - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - type: object - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - envFrom: - description: |- - List of sources to populate environment variables in the container. - The keys defined within a source must be a C_IDENTIFIER. All invalid keys - will be reported as an event when the container is starting. When a key exists in multiple - sources, the value associated with the last source will take precedence. - Values defined by an Env with a duplicate key will take precedence. - Cannot be updated. - items: - description: EnvFromSource represents the source - of a set of ConfigMaps - properties: - configMapRef: - description: The ConfigMap to select from - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the ConfigMap - must be defined - type: boolean - type: object - x-kubernetes-map-type: atomic - prefix: - description: An optional identifier to prepend - to each key in the ConfigMap. Must be - a C_IDENTIFIER. - type: string - secretRef: - description: The Secret to select from - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the Secret - must be defined - type: boolean - type: object - x-kubernetes-map-type: atomic - type: object - type: array - x-kubernetes-list-type: atomic - image: - description: |- - Container image name. - More info: https://kubernetes.io/docs/concepts/containers/images - This field is optional to allow higher level config management to default or override - container images in workload controllers like Deployments and StatefulSets. - type: string - imagePullPolicy: - description: |- - Image pull policy. - One of Always, Never, IfNotPresent. - Defaults to Always if :latest tag is specified, or IfNotPresent otherwise. - Cannot be updated. - More info: https://kubernetes.io/docs/concepts/containers/images#updating-images - type: string - lifecycle: - description: |- - Actions that the management system should take in response to container lifecycle events. - Cannot be updated. - properties: - postStart: - description: |- - PostStart is called immediately after a container is created. If the handler fails, - the container is terminated and restarted according to its restart policy. - Other management of the container blocks until the hook completes. - More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks - properties: - exec: - description: Exec specifies the action - to take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - httpGet: - description: HTTPGet specifies the http - request to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set - in the request. HTTP allows repeated - headers. - items: - description: HTTPHeader describes - a custom header to be used in - HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field - value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the - HTTP server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - sleep: - description: Sleep represents the duration - that the container should sleep before - being terminated. - properties: - seconds: - description: Seconds is the number - of seconds to sleep. - format: int64 - type: integer - required: - - seconds - type: object - tcpSocket: - description: |- - Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept - for the backward compatibility. There are no validation of this field and - lifecycle hooks will fail in runtime when tcp handler is specified. - properties: - host: - description: 'Optional: Host name - to connect to, defaults to the pod - IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - type: object - preStop: - description: |- - PreStop is called immediately before a container is terminated due to an - API request or management event such as liveness/startup probe failure, - preemption, resource contention, etc. The handler is not called if the - container crashes or exits. The Pod's termination grace period countdown begins before the - PreStop hook is executed. - properties: - exec: - description: Exec specifies the action - to take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - httpGet: - description: HTTPGet specifies the http - request to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set - in the request. HTTP allows repeated - headers. - items: - description: HTTPHeader describes - a custom header to be used in - HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field - value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the - HTTP server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - sleep: - description: Sleep represents the duration - that the container should sleep before - being terminated. - properties: - seconds: - description: Seconds is the number - of seconds to sleep. - format: int64 - type: integer - required: - - seconds - type: object - tcpSocket: - description: |- - Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept - for the backward compatibility. There are no validation of this field and - lifecycle hooks will fail in runtime when tcp handler is specified. - properties: - host: - description: 'Optional: Host name - to connect to, defaults to the pod - IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - type: object - type: object - livenessProbe: - description: |- - Periodic probe of container liveness. - Container will be restarted if the probe fails. - Cannot be updated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - properties: - exec: - description: Exec specifies the action to - take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - failureThreshold: - description: |- - Minimum consecutive failures for the probe to be considered failed after having succeeded. - Defaults to 3. Minimum value is 1. - format: int32 - type: integer - grpc: - description: GRPC specifies an action involving - a GRPC port. - properties: - port: - description: Port number of the gRPC service. - Number must be in the range 1 to 65535. - format: int32 - type: integer - service: - default: "" - description: |- - Service is the name of the service to place in the gRPC HealthCheckRequest - (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - - If this is not specified, the default behavior is defined by gRPC. - type: string - required: - - port - type: object - httpGet: - description: HTTPGet specifies the http request - to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set in - the request. HTTP allows repeated headers. - items: - description: HTTPHeader describes a - custom header to be used in HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the HTTP - server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - initialDelaySeconds: - description: |- - Number of seconds after the container has started before liveness probes are initiated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - periodSeconds: - description: |- - How often (in seconds) to perform the probe. - Default to 10 seconds. Minimum value is 1. - format: int32 - type: integer - successThreshold: - description: |- - Minimum consecutive successes for the probe to be considered successful after having failed. - Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. - format: int32 - type: integer - tcpSocket: - description: TCPSocket specifies an action - involving a TCP port. - properties: - host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - description: |- - Optional duration in seconds the pod needs to terminate gracefully upon probe failure. - The grace period is the duration in seconds after the processes running in the pod are sent - a termination signal and the time when the processes are forcibly halted with a kill signal. - Set this value longer than the expected cleanup time for your process. - format: int64 - type: integer - timeoutSeconds: - description: |- - Number of seconds after which the probe times out. - Defaults to 1 second. Minimum value is 1. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - type: object - name: - description: |- - Name of the container specified as a DNS_LABEL. - Each container in a pod must have a unique name (DNS_LABEL). - Cannot be updated. - type: string - ports: - description: |- - List of ports to expose from the container. Not specifying a port here - DOES NOT prevent that port from being exposed. Any port which is - listening on the default "0.0.0.0" address inside a container will be - accessible from the network. - Modifying this array with strategic merge patch may corrupt the data. - For more information See https://github.com/kubernetes/kubernetes/issues/108255. - items: - description: ContainerPort represents a network - port in a single container. - properties: - containerPort: - description: |- - Number of port to expose on the pod's IP address. - This must be a valid port number, 0 < x < 65536. - format: int32 - type: integer - hostIP: - description: What host IP to bind the external - port to. - type: string - hostPort: - description: |- - Number of port to expose on the host. - If specified, this must be a valid port number, 0 < x < 65536. - If HostNetwork is specified, this must match ContainerPort. - Most containers do not need this. - format: int32 - type: integer - name: - description: |- - If specified, this must be an IANA_SVC_NAME and unique within the pod. Each - named port in a pod must have a unique name. Name for the port that can be - referred to by services. - type: string - protocol: - default: TCP - description: |- - Protocol for port. Must be UDP, TCP, or SCTP. - Defaults to "TCP". - type: string - required: - - containerPort - type: object - type: array - x-kubernetes-list-map-keys: - - containerPort - - protocol - x-kubernetes-list-type: map - readinessProbe: - description: |- - Periodic probe of container service readiness. - Container will be removed from service endpoints if the probe fails. - Cannot be updated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - properties: - exec: - description: Exec specifies the action to - take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - failureThreshold: - description: |- - Minimum consecutive failures for the probe to be considered failed after having succeeded. - Defaults to 3. Minimum value is 1. - format: int32 - type: integer - grpc: - description: GRPC specifies an action involving - a GRPC port. - properties: - port: - description: Port number of the gRPC service. - Number must be in the range 1 to 65535. - format: int32 - type: integer - service: - default: "" - description: |- - Service is the name of the service to place in the gRPC HealthCheckRequest - (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - - If this is not specified, the default behavior is defined by gRPC. - type: string - required: - - port - type: object - httpGet: - description: HTTPGet specifies the http request - to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set in - the request. HTTP allows repeated headers. - items: - description: HTTPHeader describes a - custom header to be used in HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the HTTP - server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - initialDelaySeconds: - description: |- - Number of seconds after the container has started before liveness probes are initiated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - periodSeconds: - description: |- - How often (in seconds) to perform the probe. - Default to 10 seconds. Minimum value is 1. - format: int32 - type: integer - successThreshold: - description: |- - Minimum consecutive successes for the probe to be considered successful after having failed. - Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. - format: int32 - type: integer - tcpSocket: - description: TCPSocket specifies an action - involving a TCP port. - properties: - host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - description: |- - Optional duration in seconds the pod needs to terminate gracefully upon probe failure. - The grace period is the duration in seconds after the processes running in the pod are sent - a termination signal and the time when the processes are forcibly halted with a kill signal. - Set this value longer than the expected cleanup time for your process. - format: int64 - type: integer - timeoutSeconds: - description: |- - Number of seconds after which the probe times out. - Defaults to 1 second. Minimum value is 1. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - type: object - resizePolicy: - description: Resources resize policy for the container. - items: - description: ContainerResizePolicy represents - resource resize policy for the container. - properties: - resourceName: - description: |- - Name of the resource to which this resource resize policy applies. - Supported values: cpu, memory. - type: string - restartPolicy: - description: |- - Restart policy to apply when specified resource is resized. - If not specified, it defaults to NotRequired. - type: string - required: - - resourceName - - restartPolicy - type: object - type: array - x-kubernetes-list-type: atomic - resources: - description: |- - Compute Resources required by this container. - Cannot be updated. - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - properties: - claims: - description: |- - Claims lists the names of resources, defined in spec.resourceClaims, - that are used by this container. - - This is an alpha field and requires enabling the - DynamicResourceAllocation feature gate. - - This field is immutable. It can only be set for containers. - items: - description: ResourceClaim references one - entry in PodSpec.ResourceClaims. - properties: - name: - description: |- - Name must match the name of one entry in pod.spec.resourceClaims of - the Pod where this field is used. It makes that resource available - inside a container. - type: string - request: - description: |- - Request is the name chosen for a request in the referenced claim. - If empty, everything from the claim is made available, otherwise - only the result of this request. - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - limits: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: |- - Limits describes the maximum amount of compute resources allowed. - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - type: object - requests: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: |- - Requests describes the minimum amount of compute resources required. - If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, - otherwise to an implementation-defined value. Requests cannot exceed Limits. - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - type: object - type: object - restartPolicy: - description: |- - RestartPolicy defines the restart behavior of individual containers in a pod. - This field may only be set for init containers, and the only allowed value is "Always". - For non-init containers or when this field is not specified, - the restart behavior is defined by the Pod's restart policy and the container type. - type: string - securityContext: - description: |- - SecurityContext defines the security options the container should be run with. - If set, the fields of SecurityContext override the equivalent fields of PodSecurityContext. - More info: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/ - properties: - allowPrivilegeEscalation: - description: |- - AllowPrivilegeEscalation controls whether a process can gain more - privileges than its parent process. This bool directly controls if - the no_new_privs flag will be set on the container process. - AllowPrivilegeEscalation is true always when the container is: - 1) run as Privileged - 2) has CAP_SYS_ADMIN - Note that this field cannot be set when spec.os.name is windows. - type: boolean - appArmorProfile: - description: |- - appArmorProfile is the AppArmor options to use by this container. If set, this profile - overrides the pod's appArmorProfile. - Note that this field cannot be set when spec.os.name is windows. - properties: - localhostProfile: - description: |- - localhostProfile indicates a profile loaded on the node that should be used. - The profile must be preconfigured on the node to work. - Must match the loaded name of the profile. - Must be set if and only if type is "Localhost". - type: string - type: - description: |- - type indicates which kind of AppArmor profile will be applied. - Valid options are: - Localhost - a profile pre-loaded on the node. - RuntimeDefault - the container runtime's default profile. - Unconfined - no AppArmor enforcement. - type: string - required: - - type - type: object - capabilities: - description: |- - The capabilities to add/drop when running containers. - Defaults to the default set of capabilities granted by the container runtime. - Note that this field cannot be set when spec.os.name is windows. - properties: - add: - description: Added capabilities - items: - description: Capability represent POSIX - capabilities type - type: string - type: array - x-kubernetes-list-type: atomic - drop: - description: Removed capabilities - items: - description: Capability represent POSIX - capabilities type - type: string - type: array - x-kubernetes-list-type: atomic - type: object - privileged: - description: |- - Run container in privileged mode. - Processes in privileged containers are essentially equivalent to root on the host. - Defaults to false. - Note that this field cannot be set when spec.os.name is windows. - type: boolean - procMount: - description: |- - procMount denotes the type of proc mount to use for the containers. - The default value is Default which uses the container runtime defaults for - readonly paths and masked paths. - This requires the ProcMountType feature flag to be enabled. - Note that this field cannot be set when spec.os.name is windows. - type: string - readOnlyRootFilesystem: - description: |- - Whether this container has a read-only root filesystem. - Default is false. - Note that this field cannot be set when spec.os.name is windows. - type: boolean - runAsGroup: - description: |- - The GID to run the entrypoint of the container process. - Uses runtime default if unset. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is windows. - format: int64 - type: integer - runAsNonRoot: - description: |- - Indicates that the container must run as a non-root user. - If true, the Kubelet will validate the image at runtime to ensure that it - does not run as UID 0 (root) and fail to start the container if it does. - If unset or false, no such validation will be performed. - May also be set in PodSecurityContext. - type: boolean - runAsUser: - description: |- - The UID to run the entrypoint of the container process. - Defaults to user specified in image metadata if unspecified. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is windows. - format: int64 - type: integer - seLinuxOptions: - description: |- - The SELinux context to be applied to the container. - If unspecified, the container runtime will allocate a random SELinux context for each - container. May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is windows. - properties: - level: - description: Level is SELinux level label - that applies to the container. - type: string - role: - description: Role is a SELinux role label - that applies to the container. - type: string - type: - description: Type is a SELinux type label - that applies to the container. - type: string - user: - description: User is a SELinux user label - that applies to the container. - type: string - type: object - seccompProfile: - description: |- - The seccomp options to use by this container. If seccomp options are - provided at both the pod & container level, the container options - override the pod options. - Note that this field cannot be set when spec.os.name is windows. - properties: - localhostProfile: - description: |- - localhostProfile indicates a profile defined in a file on the node should be used. - The profile must be preconfigured on the node to work. - Must be a descending path, relative to the kubelet's configured seccomp profile location. - Must be set if type is "Localhost". Must NOT be set for any other type. - type: string - type: - description: |- - type indicates which kind of seccomp profile will be applied. - Valid options are: - - Localhost - a profile defined in a file on the node should be used. - RuntimeDefault - the container runtime default profile should be used. - Unconfined - no profile should be applied. - type: string - required: - - type - type: object - windowsOptions: - description: |- - The Windows specific settings applied to all containers. - If unspecified, the options from the PodSecurityContext will be used. - If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is linux. - properties: - gmsaCredentialSpec: - description: |- - GMSACredentialSpec is where the GMSA admission webhook - (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the - GMSA credential spec named by the GMSACredentialSpecName field. - type: string - gmsaCredentialSpecName: - description: GMSACredentialSpecName is - the name of the GMSA credential spec - to use. - type: string - hostProcess: - description: |- - HostProcess determines if a container should be run as a 'Host Process' container. - All of a Pod's containers must have the same effective HostProcess value - (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers). - In addition, if HostProcess is true then HostNetwork must also be set to true. - type: boolean - runAsUserName: - description: |- - The UserName in Windows to run the entrypoint of the container process. - Defaults to the user specified in image metadata if unspecified. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - type: string - type: object - type: object - startupProbe: - description: |- - StartupProbe indicates that the Pod has successfully initialized. - If specified, no other probes are executed until this completes successfully. - If this probe fails, the Pod will be restarted, just as if the livenessProbe failed. - properties: - exec: - description: Exec specifies the action to - take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - failureThreshold: - description: |- - Minimum consecutive failures for the probe to be considered failed after having succeeded. - Defaults to 3. Minimum value is 1. - format: int32 - type: integer - grpc: - description: GRPC specifies an action involving - a GRPC port. - properties: - port: - description: Port number of the gRPC service. - Number must be in the range 1 to 65535. - format: int32 - type: integer - service: - default: "" - description: |- - Service is the name of the service to place in the gRPC HealthCheckRequest - (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - - If this is not specified, the default behavior is defined by gRPC. - type: string - required: - - port - type: object - httpGet: - description: HTTPGet specifies the http request - to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set in - the request. HTTP allows repeated headers. - items: - description: HTTPHeader describes a - custom header to be used in HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the HTTP - server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - initialDelaySeconds: - description: |- - Number of seconds after the container has started before liveness probes are initiated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - periodSeconds: - description: |- - How often (in seconds) to perform the probe. - Default to 10 seconds. Minimum value is 1. - format: int32 - type: integer - successThreshold: - description: |- - Minimum consecutive successes for the probe to be considered successful after having failed. - Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. - format: int32 - type: integer - tcpSocket: - description: TCPSocket specifies an action - involving a TCP port. - properties: - host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - description: |- - Optional duration in seconds the pod needs to terminate gracefully upon probe failure. - The grace period is the duration in seconds after the processes running in the pod are sent - a termination signal and the time when the processes are forcibly halted with a kill signal. - Set this value longer than the expected cleanup time for your process. - format: int64 - type: integer - timeoutSeconds: - description: |- - Number of seconds after which the probe times out. - Defaults to 1 second. Minimum value is 1. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - type: object - stdin: - description: |- - Whether this container should allocate a buffer for stdin in the container runtime. If this - is not set, reads from stdin in the container will always result in EOF. - Default is false. - type: boolean - stdinOnce: - description: |- - Whether the container runtime should close the stdin channel after it has been opened by - a single attach. When stdin is true the stdin stream will remain open across multiple attach - sessions. - type: boolean - terminationMessagePath: - description: |- - Optional: Path at which the file to which the container's termination message - will be written is mounted into the container's filesystem. - Message written is intended to be brief final status, such as an assertion failure message. - Will be truncated by the node if greater than 4096 bytes. The total message length across - all containers will be limited to 12kb. - Defaults to /dev/termination-log. - type: string - terminationMessagePolicy: - description: |- - Indicate how the termination message should be populated. File will use the contents of - terminationMessagePath to populate the container status message on both success and failure. - FallbackToLogsOnError will use the last chunk of container log output if the termination - message file is empty and the container exited with an error. - type: string - tty: - description: |- - Whether this container should allocate a TTY for itself, also requires 'stdin' to be true. - Default is false. - type: boolean - volumeDevices: - description: volumeDevices is the list of block - devices to be used by the container. - items: - description: volumeDevice describes a mapping - of a raw block device within a container. - properties: - devicePath: - description: devicePath is the path inside - of the container that the device will - be mapped to. - type: string - name: - description: name must match the name of - a persistentVolumeClaim in the pod - type: string - required: - - devicePath - - name - type: object - type: array - x-kubernetes-list-map-keys: - - devicePath - x-kubernetes-list-type: map - volumeMounts: - description: |- - Pod volumes to mount into the container's filesystem. - Cannot be updated. - items: - description: VolumeMount describes a mounting - of a Volume within a container. - properties: - mountPath: - description: |- - Path within the container at which the volume should be mounted. Must - not contain ':'. - type: string - mountPropagation: - description: |- - mountPropagation determines how mounts are propagated from the host - to container and the other way around. - When not set, MountPropagationNone is used. - This field is beta in 1.10. - When RecursiveReadOnly is set to IfPossible or to Enabled, MountPropagation must be None or unspecified - (which defaults to None). - type: string - name: - description: This must match the Name of - a Volume. - type: string - readOnly: - description: |- - Mounted read-only if true, read-write otherwise (false or unspecified). - Defaults to false. - type: boolean - recursiveReadOnly: - description: |- - RecursiveReadOnly specifies whether read-only mounts should be handled - recursively. - - If ReadOnly is false, this field has no meaning and must be unspecified. - - If ReadOnly is true, and this field is set to Disabled, the mount is not made - recursively read-only. If this field is set to IfPossible, the mount is made - recursively read-only, if it is supported by the container runtime. - type: string - subPath: - description: |- - Path within the volume from which the container's volume should be mounted. - Defaults to "" (volume's root). - type: string - subPathExpr: - description: |- - Expanded path within the volume from which the container's volume should be mounted. - Behaves similarly to SubPath but environment variable references $(VAR_NAME) are expanded using the container's environment. - Defaults to "" (volume's root). - SubPathExpr and SubPath are mutually exclusive. - type: string - required: - - mountPath - - name - type: object - type: array - x-kubernetes-list-map-keys: - - mountPath - x-kubernetes-list-type: map - workingDir: - description: |- - Container's working directory. - If not specified, the container runtime's default will be used, which - might be configured in the container image. - Cannot be updated. - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - dnsConfig: - description: |- - Specifies the DNS parameters of a pod. - Parameters specified here will be merged to the generated DNS - configuration based on DNSPolicy. - properties: - nameservers: - description: |- - A list of DNS name server IP addresses. - This will be appended to the base nameservers generated from DNSPolicy. - Duplicated nameservers will be removed. - items: - type: string - type: array - x-kubernetes-list-type: atomic - options: - description: |- - A list of DNS resolver options. - This will be merged with the base options generated from DNSPolicy. - Duplicated entries will be removed. Resolution options given in Options - will override those that appear in the base DNSPolicy. - items: - description: PodDNSConfigOption defines DNS resolver - options of a pod. - properties: - name: - description: Required. - type: string - value: - type: string - type: object - type: array - x-kubernetes-list-type: atomic - searches: - description: |- - A list of DNS search domains for host-name lookup. - This will be appended to the base search paths generated from DNSPolicy. - Duplicated search paths will be removed. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - dnsPolicy: - description: |- - Set DNS policy for the pod. - Defaults to "ClusterFirst". - Valid values are 'ClusterFirstWithHostNet', 'ClusterFirst', 'Default' or 'None'. - DNS parameters given in DNSConfig will be merged with the policy selected with DNSPolicy. - To have DNS options set along with hostNetwork, you have to specify DNS policy - explicitly to 'ClusterFirstWithHostNet'. - type: string - enableServiceLinks: - description: |- - EnableServiceLinks indicates whether information about services should be injected into pod's - environment variables, matching the syntax of Docker links. - Optional: Defaults to true. - type: boolean - ephemeralContainers: - description: |- - List of ephemeral containers run in this pod. Ephemeral containers may be run in an existing - pod to perform user-initiated actions such as debugging. This list cannot be specified when - creating a pod, and it cannot be modified by updating the pod spec. In order to add an - ephemeral container to an existing pod, use the pod's ephemeralcontainers subresource. - items: - description: |- - An EphemeralContainer is a temporary container that you may add to an existing Pod for - user-initiated activities such as debugging. Ephemeral containers have no resource or - scheduling guarantees, and they will not be restarted when they exit or when a Pod is - removed or restarted. The kubelet may evict a Pod if an ephemeral container causes the - Pod to exceed its resource allocation. - properties: - args: - description: |- - Arguments to the entrypoint. - The image's CMD is used if this is not provided. - Variable references $(VAR_NAME) are expanded using the container's environment. If a variable - cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced - to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. "$$(VAR_NAME)" will - produce the string literal "$(VAR_NAME)". - items: - type: string - type: array - x-kubernetes-list-type: atomic - command: - description: |- - Entrypoint array. Not executed within a shell. - The image's ENTRYPOINT is used if this is not provided. - Variable references $(VAR_NAME) are expanded using the container's environment. If a variable - cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced - to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. - items: - type: string - type: array - x-kubernetes-list-type: atomic - env: - description: |- - List of environment variables to set in the container. - Cannot be updated. - items: - description: EnvVar represents an environment - variable present in a Container. - properties: - name: - description: Name of the environment variable. - Must be a C_IDENTIFIER. - type: string - value: - description: |- - Variable references $(VAR_NAME) are expanded - using the previously defined environment variables in the container and - any service environment variables. If a variable cannot be resolved, - the reference in the input string will be unchanged. Double $$ are reduced - to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. - "$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)". - type: string - valueFrom: - description: Source for the environment - variable's value. Cannot be used if value - is not empty. - properties: - configMapKeyRef: - description: Selects a key of a ConfigMap. - properties: - key: - description: The key to select. - type: string - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the - ConfigMap or its key must be defined - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - fieldRef: - description: |- - Selects a field of the pod: supports metadata.name, metadata.namespace, `metadata.labels['']`, `metadata.annotations['']`, - spec.nodeName, spec.serviceAccountName, status.hostIP, status.podIP, status.podIPs. - properties: - apiVersion: - description: Version of the schema - the FieldPath is written in terms - of, defaults to "v1". - type: string - fieldPath: - description: Path of the field to - select in the specified API version. - type: string - required: - - fieldPath - type: object - x-kubernetes-map-type: atomic - resourceFieldRef: - description: |- - Selects a resource of the container: only resources limits and requests - (limits.cpu, limits.memory, limits.ephemeral-storage, requests.cpu, requests.memory and requests.ephemeral-storage) are currently supported. - properties: - containerName: - description: 'Container name: required - for volumes, optional for env - vars' - type: string - divisor: - anyOf: - - type: integer - - type: string - description: Specifies the output - format of the exposed resources, - defaults to "1" - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - resource: - description: 'Required: resource - to select' - type: string - required: - - resource - type: object - x-kubernetes-map-type: atomic - secretKeyRef: - description: Selects a key of a secret - in the pod's namespace - properties: - key: - description: The key of the secret - to select from. Must be a valid - secret key. - type: string - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the - Secret or its key must be defined - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - type: object - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - envFrom: - description: |- - List of sources to populate environment variables in the container. - The keys defined within a source must be a C_IDENTIFIER. All invalid keys - will be reported as an event when the container is starting. When a key exists in multiple - sources, the value associated with the last source will take precedence. - Values defined by an Env with a duplicate key will take precedence. - Cannot be updated. - items: - description: EnvFromSource represents the source - of a set of ConfigMaps - properties: - configMapRef: - description: The ConfigMap to select from - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the ConfigMap - must be defined - type: boolean - type: object - x-kubernetes-map-type: atomic - prefix: - description: An optional identifier to prepend - to each key in the ConfigMap. Must be - a C_IDENTIFIER. - type: string - secretRef: - description: The Secret to select from - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the Secret - must be defined - type: boolean - type: object - x-kubernetes-map-type: atomic - type: object - type: array - x-kubernetes-list-type: atomic - image: - description: |- - Container image name. - More info: https://kubernetes.io/docs/concepts/containers/images - type: string - imagePullPolicy: - description: |- - Image pull policy. - One of Always, Never, IfNotPresent. - Defaults to Always if :latest tag is specified, or IfNotPresent otherwise. - Cannot be updated. - More info: https://kubernetes.io/docs/concepts/containers/images#updating-images - type: string - lifecycle: - description: Lifecycle is not allowed for ephemeral - containers. - properties: - postStart: - description: |- - PostStart is called immediately after a container is created. If the handler fails, - the container is terminated and restarted according to its restart policy. - Other management of the container blocks until the hook completes. - More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks - properties: - exec: - description: Exec specifies the action - to take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - httpGet: - description: HTTPGet specifies the http - request to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set - in the request. HTTP allows repeated - headers. - items: - description: HTTPHeader describes - a custom header to be used in - HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field - value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the - HTTP server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - sleep: - description: Sleep represents the duration - that the container should sleep before - being terminated. - properties: - seconds: - description: Seconds is the number - of seconds to sleep. - format: int64 - type: integer - required: - - seconds - type: object - tcpSocket: - description: |- - Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept - for the backward compatibility. There are no validation of this field and - lifecycle hooks will fail in runtime when tcp handler is specified. - properties: - host: - description: 'Optional: Host name - to connect to, defaults to the pod - IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - type: object - preStop: - description: |- - PreStop is called immediately before a container is terminated due to an - API request or management event such as liveness/startup probe failure, - preemption, resource contention, etc. The handler is not called if the - container crashes or exits. The Pod's termination grace period countdown begins before the - PreStop hook is executed. - properties: - exec: - description: Exec specifies the action - to take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - httpGet: - description: HTTPGet specifies the http - request to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set - in the request. HTTP allows repeated - headers. - items: - description: HTTPHeader describes - a custom header to be used in - HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field - value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the - HTTP server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - sleep: - description: Sleep represents the duration - that the container should sleep before - being terminated. - properties: - seconds: - description: Seconds is the number - of seconds to sleep. - format: int64 - type: integer - required: - - seconds - type: object - tcpSocket: - description: |- - Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept - for the backward compatibility. There are no validation of this field and - lifecycle hooks will fail in runtime when tcp handler is specified. - properties: - host: - description: 'Optional: Host name - to connect to, defaults to the pod - IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - type: object - type: object - livenessProbe: - description: Probes are not allowed for ephemeral - containers. - properties: - exec: - description: Exec specifies the action to - take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - failureThreshold: - description: |- - Minimum consecutive failures for the probe to be considered failed after having succeeded. - Defaults to 3. Minimum value is 1. - format: int32 - type: integer - grpc: - description: GRPC specifies an action involving - a GRPC port. - properties: - port: - description: Port number of the gRPC service. - Number must be in the range 1 to 65535. - format: int32 - type: integer - service: - default: "" - description: |- - Service is the name of the service to place in the gRPC HealthCheckRequest - (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - - If this is not specified, the default behavior is defined by gRPC. - type: string - required: - - port - type: object - httpGet: - description: HTTPGet specifies the http request - to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set in - the request. HTTP allows repeated headers. - items: - description: HTTPHeader describes a - custom header to be used in HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the HTTP - server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - initialDelaySeconds: - description: |- - Number of seconds after the container has started before liveness probes are initiated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - periodSeconds: - description: |- - How often (in seconds) to perform the probe. - Default to 10 seconds. Minimum value is 1. - format: int32 - type: integer - successThreshold: - description: |- - Minimum consecutive successes for the probe to be considered successful after having failed. - Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. - format: int32 - type: integer - tcpSocket: - description: TCPSocket specifies an action - involving a TCP port. - properties: - host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - description: |- - Optional duration in seconds the pod needs to terminate gracefully upon probe failure. - The grace period is the duration in seconds after the processes running in the pod are sent - a termination signal and the time when the processes are forcibly halted with a kill signal. - Set this value longer than the expected cleanup time for your process. - format: int64 - type: integer - timeoutSeconds: - description: |- - Number of seconds after which the probe times out. - Defaults to 1 second. Minimum value is 1. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - type: object - name: - description: |- - Name of the ephemeral container specified as a DNS_LABEL. - This name must be unique among all containers, init containers and ephemeral containers. - type: string - ports: - description: Ports are not allowed for ephemeral - containers. - items: - description: ContainerPort represents a network - port in a single container. - properties: - containerPort: - description: |- - Number of port to expose on the pod's IP address. - This must be a valid port number, 0 < x < 65536. - format: int32 - type: integer - hostIP: - description: What host IP to bind the external - port to. - type: string - hostPort: - description: |- - Number of port to expose on the host. - If specified, this must be a valid port number, 0 < x < 65536. - If HostNetwork is specified, this must match ContainerPort. - Most containers do not need this. - format: int32 - type: integer - name: - description: |- - If specified, this must be an IANA_SVC_NAME and unique within the pod. Each - named port in a pod must have a unique name. Name for the port that can be - referred to by services. - type: string - protocol: - default: TCP - description: |- - Protocol for port. Must be UDP, TCP, or SCTP. - Defaults to "TCP". - type: string - required: - - containerPort - type: object - type: array - x-kubernetes-list-map-keys: - - containerPort - - protocol - x-kubernetes-list-type: map - readinessProbe: - description: Probes are not allowed for ephemeral - containers. - properties: - exec: - description: Exec specifies the action to - take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - failureThreshold: - description: |- - Minimum consecutive failures for the probe to be considered failed after having succeeded. - Defaults to 3. Minimum value is 1. - format: int32 - type: integer - grpc: - description: GRPC specifies an action involving - a GRPC port. - properties: - port: - description: Port number of the gRPC service. - Number must be in the range 1 to 65535. - format: int32 - type: integer - service: - default: "" - description: |- - Service is the name of the service to place in the gRPC HealthCheckRequest - (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - - If this is not specified, the default behavior is defined by gRPC. - type: string - required: - - port - type: object - httpGet: - description: HTTPGet specifies the http request - to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set in - the request. HTTP allows repeated headers. - items: - description: HTTPHeader describes a - custom header to be used in HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the HTTP - server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - initialDelaySeconds: - description: |- - Number of seconds after the container has started before liveness probes are initiated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - periodSeconds: - description: |- - How often (in seconds) to perform the probe. - Default to 10 seconds. Minimum value is 1. - format: int32 - type: integer - successThreshold: - description: |- - Minimum consecutive successes for the probe to be considered successful after having failed. - Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. - format: int32 - type: integer - tcpSocket: - description: TCPSocket specifies an action - involving a TCP port. - properties: - host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - description: |- - Optional duration in seconds the pod needs to terminate gracefully upon probe failure. - The grace period is the duration in seconds after the processes running in the pod are sent - a termination signal and the time when the processes are forcibly halted with a kill signal. - Set this value longer than the expected cleanup time for your process. - format: int64 - type: integer - timeoutSeconds: - description: |- - Number of seconds after which the probe times out. - Defaults to 1 second. Minimum value is 1. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - type: object - resizePolicy: - description: Resources resize policy for the container. - items: - description: ContainerResizePolicy represents - resource resize policy for the container. - properties: - resourceName: - description: |- - Name of the resource to which this resource resize policy applies. - Supported values: cpu, memory. - type: string - restartPolicy: - description: |- - Restart policy to apply when specified resource is resized. - If not specified, it defaults to NotRequired. - type: string - required: - - resourceName - - restartPolicy - type: object - type: array - x-kubernetes-list-type: atomic - resources: - description: |- - Resources are not allowed for ephemeral containers. Ephemeral containers use spare resources - already allocated to the pod. - properties: - claims: - description: |- - Claims lists the names of resources, defined in spec.resourceClaims, - that are used by this container. - - This is an alpha field and requires enabling the - DynamicResourceAllocation feature gate. - - This field is immutable. It can only be set for containers. - items: - description: ResourceClaim references one - entry in PodSpec.ResourceClaims. - properties: - name: - description: |- - Name must match the name of one entry in pod.spec.resourceClaims of - the Pod where this field is used. It makes that resource available - inside a container. - type: string - request: - description: |- - Request is the name chosen for a request in the referenced claim. - If empty, everything from the claim is made available, otherwise - only the result of this request. - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - limits: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: |- - Limits describes the maximum amount of compute resources allowed. - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - type: object - requests: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: |- - Requests describes the minimum amount of compute resources required. - If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, - otherwise to an implementation-defined value. Requests cannot exceed Limits. - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - type: object - type: object - restartPolicy: - description: |- - Restart policy for the container to manage the restart behavior of each - container within a pod. - This may only be set for init containers. You cannot set this field on - ephemeral containers. - type: string - securityContext: - description: |- - Optional: SecurityContext defines the security options the ephemeral container should be run with. - If set, the fields of SecurityContext override the equivalent fields of PodSecurityContext. - properties: - allowPrivilegeEscalation: - description: |- - AllowPrivilegeEscalation controls whether a process can gain more - privileges than its parent process. This bool directly controls if - the no_new_privs flag will be set on the container process. - AllowPrivilegeEscalation is true always when the container is: - 1) run as Privileged - 2) has CAP_SYS_ADMIN - Note that this field cannot be set when spec.os.name is windows. - type: boolean - appArmorProfile: - description: |- - appArmorProfile is the AppArmor options to use by this container. If set, this profile - overrides the pod's appArmorProfile. - Note that this field cannot be set when spec.os.name is windows. - properties: - localhostProfile: - description: |- - localhostProfile indicates a profile loaded on the node that should be used. - The profile must be preconfigured on the node to work. - Must match the loaded name of the profile. - Must be set if and only if type is "Localhost". - type: string - type: - description: |- - type indicates which kind of AppArmor profile will be applied. - Valid options are: - Localhost - a profile pre-loaded on the node. - RuntimeDefault - the container runtime's default profile. - Unconfined - no AppArmor enforcement. - type: string - required: - - type - type: object - capabilities: - description: |- - The capabilities to add/drop when running containers. - Defaults to the default set of capabilities granted by the container runtime. - Note that this field cannot be set when spec.os.name is windows. - properties: - add: - description: Added capabilities - items: - description: Capability represent POSIX - capabilities type - type: string - type: array - x-kubernetes-list-type: atomic - drop: - description: Removed capabilities - items: - description: Capability represent POSIX - capabilities type - type: string - type: array - x-kubernetes-list-type: atomic - type: object - privileged: - description: |- - Run container in privileged mode. - Processes in privileged containers are essentially equivalent to root on the host. - Defaults to false. - Note that this field cannot be set when spec.os.name is windows. - type: boolean - procMount: - description: |- - procMount denotes the type of proc mount to use for the containers. - The default value is Default which uses the container runtime defaults for - readonly paths and masked paths. - This requires the ProcMountType feature flag to be enabled. - Note that this field cannot be set when spec.os.name is windows. - type: string - readOnlyRootFilesystem: - description: |- - Whether this container has a read-only root filesystem. - Default is false. - Note that this field cannot be set when spec.os.name is windows. - type: boolean - runAsGroup: - description: |- - The GID to run the entrypoint of the container process. - Uses runtime default if unset. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is windows. - format: int64 - type: integer - runAsNonRoot: - description: |- - Indicates that the container must run as a non-root user. - If true, the Kubelet will validate the image at runtime to ensure that it - does not run as UID 0 (root) and fail to start the container if it does. - If unset or false, no such validation will be performed. - May also be set in PodSecurityContext. - type: boolean - runAsUser: - description: |- - The UID to run the entrypoint of the container process. - Defaults to user specified in image metadata if unspecified. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is windows. - format: int64 - type: integer - seLinuxOptions: - description: |- - The SELinux context to be applied to the container. - If unspecified, the container runtime will allocate a random SELinux context for each - container. May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is windows. - properties: - level: - description: Level is SELinux level label - that applies to the container. - type: string - role: - description: Role is a SELinux role label - that applies to the container. - type: string - type: - description: Type is a SELinux type label - that applies to the container. - type: string - user: - description: User is a SELinux user label - that applies to the container. - type: string - type: object - seccompProfile: - description: |- - The seccomp options to use by this container. If seccomp options are - provided at both the pod & container level, the container options - override the pod options. - Note that this field cannot be set when spec.os.name is windows. - properties: - localhostProfile: - description: |- - localhostProfile indicates a profile defined in a file on the node should be used. - The profile must be preconfigured on the node to work. - Must be a descending path, relative to the kubelet's configured seccomp profile location. - Must be set if type is "Localhost". Must NOT be set for any other type. - type: string - type: - description: |- - type indicates which kind of seccomp profile will be applied. - Valid options are: - - Localhost - a profile defined in a file on the node should be used. - RuntimeDefault - the container runtime default profile should be used. - Unconfined - no profile should be applied. - type: string - required: - - type - type: object - windowsOptions: - description: |- - The Windows specific settings applied to all containers. - If unspecified, the options from the PodSecurityContext will be used. - If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is linux. - properties: - gmsaCredentialSpec: - description: |- - GMSACredentialSpec is where the GMSA admission webhook - (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the - GMSA credential spec named by the GMSACredentialSpecName field. - type: string - gmsaCredentialSpecName: - description: GMSACredentialSpecName is - the name of the GMSA credential spec - to use. - type: string - hostProcess: - description: |- - HostProcess determines if a container should be run as a 'Host Process' container. - All of a Pod's containers must have the same effective HostProcess value - (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers). - In addition, if HostProcess is true then HostNetwork must also be set to true. - type: boolean - runAsUserName: - description: |- - The UserName in Windows to run the entrypoint of the container process. - Defaults to the user specified in image metadata if unspecified. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - type: string - type: object - type: object - startupProbe: - description: Probes are not allowed for ephemeral - containers. - properties: - exec: - description: Exec specifies the action to - take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - failureThreshold: - description: |- - Minimum consecutive failures for the probe to be considered failed after having succeeded. - Defaults to 3. Minimum value is 1. - format: int32 - type: integer - grpc: - description: GRPC specifies an action involving - a GRPC port. - properties: - port: - description: Port number of the gRPC service. - Number must be in the range 1 to 65535. - format: int32 - type: integer - service: - default: "" - description: |- - Service is the name of the service to place in the gRPC HealthCheckRequest - (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - - If this is not specified, the default behavior is defined by gRPC. - type: string - required: - - port - type: object - httpGet: - description: HTTPGet specifies the http request - to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set in - the request. HTTP allows repeated headers. - items: - description: HTTPHeader describes a - custom header to be used in HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the HTTP - server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - initialDelaySeconds: - description: |- - Number of seconds after the container has started before liveness probes are initiated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - periodSeconds: - description: |- - How often (in seconds) to perform the probe. - Default to 10 seconds. Minimum value is 1. - format: int32 - type: integer - successThreshold: - description: |- - Minimum consecutive successes for the probe to be considered successful after having failed. - Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. - format: int32 - type: integer - tcpSocket: - description: TCPSocket specifies an action - involving a TCP port. - properties: - host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - description: |- - Optional duration in seconds the pod needs to terminate gracefully upon probe failure. - The grace period is the duration in seconds after the processes running in the pod are sent - a termination signal and the time when the processes are forcibly halted with a kill signal. - Set this value longer than the expected cleanup time for your process. - format: int64 - type: integer - timeoutSeconds: - description: |- - Number of seconds after which the probe times out. - Defaults to 1 second. Minimum value is 1. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - type: object - stdin: - description: |- - Whether this container should allocate a buffer for stdin in the container runtime. If this - is not set, reads from stdin in the container will always result in EOF. - Default is false. - type: boolean - stdinOnce: - description: |- - Whether the container runtime should close the stdin channel after it has been opened by - a single attach. When stdin is true the stdin stream will remain open across multiple attach - sessions. - type: boolean - targetContainerName: - description: |- - If set, the name of the container from PodSpec that this ephemeral container targets. - The ephemeral container will be run in the namespaces (IPC, PID, etc) of this container. - If not set then the ephemeral container uses the namespaces configured in the Pod spec. - - The container runtime must implement support for this feature. - type: string - terminationMessagePath: - description: |- - Optional: Path at which the file to which the container's termination message - will be written is mounted into the container's filesystem. - Message written is intended to be brief final status, such as an assertion failure message. - Will be truncated by the node if greater than 4096 bytes. The total message length across - all containers will be limited to 12kb. - Defaults to /dev/termination-log. - type: string - terminationMessagePolicy: - description: |- - Indicate how the termination message should be populated. File will use the contents of - terminationMessagePath to populate the container status message on both success and failure. - FallbackToLogsOnError will use the last chunk of container log output if the termination - message file is empty and the container exited with an error. - type: string - tty: - description: |- - Whether this container should allocate a TTY for itself, also requires 'stdin' to be true. - Default is false. - type: boolean - volumeDevices: - description: volumeDevices is the list of block - devices to be used by the container. - items: - description: volumeDevice describes a mapping - of a raw block device within a container. - properties: - devicePath: - description: devicePath is the path inside - of the container that the device will - be mapped to. - type: string - name: - description: name must match the name of - a persistentVolumeClaim in the pod - type: string - required: - - devicePath - - name - type: object - type: array - x-kubernetes-list-map-keys: - - devicePath - x-kubernetes-list-type: map - volumeMounts: - description: |- - Pod volumes to mount into the container's filesystem. Subpath mounts are not allowed for ephemeral containers. - Cannot be updated. - items: - description: VolumeMount describes a mounting - of a Volume within a container. - properties: - mountPath: - description: |- - Path within the container at which the volume should be mounted. Must - not contain ':'. - type: string - mountPropagation: - description: |- - mountPropagation determines how mounts are propagated from the host - to container and the other way around. - When not set, MountPropagationNone is used. - This field is beta in 1.10. - When RecursiveReadOnly is set to IfPossible or to Enabled, MountPropagation must be None or unspecified - (which defaults to None). - type: string - name: - description: This must match the Name of - a Volume. - type: string - readOnly: - description: |- - Mounted read-only if true, read-write otherwise (false or unspecified). - Defaults to false. - type: boolean - recursiveReadOnly: - description: |- - RecursiveReadOnly specifies whether read-only mounts should be handled - recursively. - - If ReadOnly is false, this field has no meaning and must be unspecified. - - If ReadOnly is true, and this field is set to Disabled, the mount is not made - recursively read-only. If this field is set to IfPossible, the mount is made - recursively read-only, if it is supported by the container runtime. - type: string - subPath: - description: |- - Path within the volume from which the container's volume should be mounted. - Defaults to "" (volume's root). - type: string - subPathExpr: - description: |- - Expanded path within the volume from which the container's volume should be mounted. - Behaves similarly to SubPath but environment variable references $(VAR_NAME) are expanded using the container's environment. - Defaults to "" (volume's root). - SubPathExpr and SubPath are mutually exclusive. - type: string - required: - - mountPath - - name - type: object - type: array - x-kubernetes-list-map-keys: - - mountPath - x-kubernetes-list-type: map - workingDir: - description: |- - Container's working directory. - If not specified, the container runtime's default will be used, which - might be configured in the container image. - Cannot be updated. - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - hostAliases: - description: |- - HostAliases is an optional list of hosts and IPs that will be injected into the pod's hosts - file if specified. - items: - description: |- - HostAlias holds the mapping between IP and hostnames that will be injected as an entry in the - pod's hosts file. - properties: - hostnames: - description: Hostnames for the above IP address. - items: - type: string - type: array - x-kubernetes-list-type: atomic - ip: - description: IP address of the host file entry. - type: string - required: - - ip - type: object - type: array - x-kubernetes-list-map-keys: - - ip - x-kubernetes-list-type: map - hostIPC: - description: |- - Use the host's ipc namespace. - Optional: Default to false. - type: boolean - hostNetwork: - description: |- - Host networking requested for this pod. Use the host's network namespace. - If this option is set, the ports that will be used must be specified. - Default to false. - type: boolean - hostPID: - description: |- - Use the host's pid namespace. - Optional: Default to false. - type: boolean - hostUsers: - description: |- - Use the host's user namespace. - Optional: Default to true. - If set to true or not present, the pod will be run in the host user namespace, useful - for when the pod needs a feature only available to the host user namespace, such as - loading a kernel module with CAP_SYS_MODULE. - When set to false, a new userns is created for the pod. - type: boolean - hostname: - description: |- - Specifies the hostname of the Pod - If not specified, the pod's hostname will be set to a system-defined value. - type: string - imagePullSecrets: - description: |- - ImagePullSecrets is an optional list of references to secrets in the same namespace to use for pulling any of the images used by this PodSpec. - If specified, these secrets will be passed to individual puller implementations for them to use. - More info: https://kubernetes.io/docs/concepts/containers/images#specifying-imagepullsecrets-on-a-pod - items: - description: |- - LocalObjectReference contains enough information to let you locate the - referenced object inside the same namespace. - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - type: object - x-kubernetes-map-type: atomic - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - initContainers: - description: |- - List of initialization containers belonging to the pod. - Init containers are executed in order prior to containers being started. If any - init container fails, the pod is considered to have failed and is handled according - to its restartPolicy. The name for an init container or normal container must be - unique among all containers. - items: - description: A single application container that you - want to run within a pod. - properties: - args: - description: |- - Arguments to the entrypoint. - The container image's CMD is used if this is not provided. - Variable references $(VAR_NAME) are expanded using the container's environment. If a variable - cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced - to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. - items: - type: string - type: array - x-kubernetes-list-type: atomic - command: - description: |- - Entrypoint array. Not executed within a shell. - The container image's ENTRYPOINT is used if this is not provided. - Variable references $(VAR_NAME) are expanded using the container's environment. If a variable - cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced - to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. - items: - type: string - type: array - x-kubernetes-list-type: atomic - env: - description: |- - List of environment variables to set in the container. - Cannot be updated. - items: - description: EnvVar represents an environment - variable present in a Container. - properties: - name: - description: Name of the environment variable. - Must be a C_IDENTIFIER. - type: string - value: - description: |- - Variable references $(VAR_NAME) are expanded - using the previously defined environment variables in the container and - any service environment variables. If a variable cannot be resolved, - the reference in the input string will be unchanged. Double $$ are reduced - to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. - "$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)". - type: string - valueFrom: - description: Source for the environment - variable's value. Cannot be used if value - is not empty. - properties: - configMapKeyRef: - description: Selects a key of a ConfigMap. - properties: - key: - description: The key to select. - type: string - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the - ConfigMap or its key must be defined - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - fieldRef: - description: |- - Selects a field of the pod: supports metadata.name, metadata.namespace, `metadata.labels['']`, `metadata.annotations['']`, - spec.nodeName, spec.serviceAccountName, status.hostIP, status.podIP, status.podIPs. - properties: - apiVersion: - description: Version of the schema - the FieldPath is written in terms - of, defaults to "v1". - type: string - fieldPath: - description: Path of the field to - select in the specified API version. - type: string - required: - - fieldPath - type: object - x-kubernetes-map-type: atomic - resourceFieldRef: - description: |- - Selects a resource of the container: only resources limits and requests - (limits.cpu, limits.memory, limits.ephemeral-storage, requests.cpu, requests.memory and requests.ephemeral-storage) are currently supported. - properties: - containerName: - description: 'Container name: required - for volumes, optional for env - vars' - type: string - divisor: - anyOf: - - type: integer - - type: string - description: Specifies the output - format of the exposed resources, - defaults to "1" - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - resource: - description: 'Required: resource - to select' - type: string - required: - - resource - type: object - x-kubernetes-map-type: atomic - secretKeyRef: - description: Selects a key of a secret - in the pod's namespace - properties: - key: - description: The key of the secret - to select from. Must be a valid - secret key. - type: string - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the - Secret or its key must be defined - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - type: object - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - envFrom: - description: |- - List of sources to populate environment variables in the container. - The keys defined within a source must be a C_IDENTIFIER. All invalid keys - will be reported as an event when the container is starting. When a key exists in multiple - sources, the value associated with the last source will take precedence. - Values defined by an Env with a duplicate key will take precedence. - Cannot be updated. - items: - description: EnvFromSource represents the source - of a set of ConfigMaps - properties: - configMapRef: - description: The ConfigMap to select from - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the ConfigMap - must be defined - type: boolean - type: object - x-kubernetes-map-type: atomic - prefix: - description: An optional identifier to prepend - to each key in the ConfigMap. Must be - a C_IDENTIFIER. - type: string - secretRef: - description: The Secret to select from - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the Secret - must be defined - type: boolean - type: object - x-kubernetes-map-type: atomic - type: object - type: array - x-kubernetes-list-type: atomic - image: - description: |- - Container image name. - More info: https://kubernetes.io/docs/concepts/containers/images - This field is optional to allow higher level config management to default or override - container images in workload controllers like Deployments and StatefulSets. - type: string - imagePullPolicy: - description: |- - Image pull policy. - One of Always, Never, IfNotPresent. - Defaults to Always if :latest tag is specified, or IfNotPresent otherwise. - Cannot be updated. - More info: https://kubernetes.io/docs/concepts/containers/images#updating-images - type: string - lifecycle: - description: |- - Actions that the management system should take in response to container lifecycle events. - Cannot be updated. - properties: - postStart: - description: |- - PostStart is called immediately after a container is created. If the handler fails, - the container is terminated and restarted according to its restart policy. - Other management of the container blocks until the hook completes. - More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks - properties: - exec: - description: Exec specifies the action - to take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - httpGet: - description: HTTPGet specifies the http - request to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set - in the request. HTTP allows repeated - headers. - items: - description: HTTPHeader describes - a custom header to be used in - HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field - value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the - HTTP server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - sleep: - description: Sleep represents the duration - that the container should sleep before - being terminated. - properties: - seconds: - description: Seconds is the number - of seconds to sleep. - format: int64 - type: integer - required: - - seconds - type: object - tcpSocket: - description: |- - Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept - for the backward compatibility. There are no validation of this field and - lifecycle hooks will fail in runtime when tcp handler is specified. - properties: - host: - description: 'Optional: Host name - to connect to, defaults to the pod - IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - type: object - preStop: - description: |- - PreStop is called immediately before a container is terminated due to an - API request or management event such as liveness/startup probe failure, - preemption, resource contention, etc. The handler is not called if the - container crashes or exits. The Pod's termination grace period countdown begins before the - PreStop hook is executed. - properties: - exec: - description: Exec specifies the action - to take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - httpGet: - description: HTTPGet specifies the http - request to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set - in the request. HTTP allows repeated - headers. - items: - description: HTTPHeader describes - a custom header to be used in - HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field - value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the - HTTP server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - sleep: - description: Sleep represents the duration - that the container should sleep before - being terminated. - properties: - seconds: - description: Seconds is the number - of seconds to sleep. - format: int64 - type: integer - required: - - seconds - type: object - tcpSocket: - description: |- - Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept - for the backward compatibility. There are no validation of this field and - lifecycle hooks will fail in runtime when tcp handler is specified. - properties: - host: - description: 'Optional: Host name - to connect to, defaults to the pod - IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - type: object - type: object - livenessProbe: - description: |- - Periodic probe of container liveness. - Container will be restarted if the probe fails. - Cannot be updated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - properties: - exec: - description: Exec specifies the action to - take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - failureThreshold: - description: |- - Minimum consecutive failures for the probe to be considered failed after having succeeded. - Defaults to 3. Minimum value is 1. - format: int32 - type: integer - grpc: - description: GRPC specifies an action involving - a GRPC port. - properties: - port: - description: Port number of the gRPC service. - Number must be in the range 1 to 65535. - format: int32 - type: integer - service: - default: "" - description: |- - Service is the name of the service to place in the gRPC HealthCheckRequest - (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - - If this is not specified, the default behavior is defined by gRPC. - type: string - required: - - port - type: object - httpGet: - description: HTTPGet specifies the http request - to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set in - the request. HTTP allows repeated headers. - items: - description: HTTPHeader describes a - custom header to be used in HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the HTTP - server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - initialDelaySeconds: - description: |- - Number of seconds after the container has started before liveness probes are initiated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - periodSeconds: - description: |- - How often (in seconds) to perform the probe. - Default to 10 seconds. Minimum value is 1. - format: int32 - type: integer - successThreshold: - description: |- - Minimum consecutive successes for the probe to be considered successful after having failed. - Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. - format: int32 - type: integer - tcpSocket: - description: TCPSocket specifies an action - involving a TCP port. - properties: - host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - description: |- - Optional duration in seconds the pod needs to terminate gracefully upon probe failure. - The grace period is the duration in seconds after the processes running in the pod are sent - a termination signal and the time when the processes are forcibly halted with a kill signal. - Set this value longer than the expected cleanup time for your process. - format: int64 - type: integer - timeoutSeconds: - description: |- - Number of seconds after which the probe times out. - Defaults to 1 second. Minimum value is 1. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - type: object - name: - description: |- - Name of the container specified as a DNS_LABEL. - Each container in a pod must have a unique name (DNS_LABEL). - Cannot be updated. - type: string - ports: - description: |- - List of ports to expose from the container. Not specifying a port here - DOES NOT prevent that port from being exposed. Any port which is - listening on the default "0.0.0.0" address inside a container will be - accessible from the network. - Modifying this array with strategic merge patch may corrupt the data. - For more information See https://github.com/kubernetes/kubernetes/issues/108255. - items: - description: ContainerPort represents a network - port in a single container. - properties: - containerPort: - description: |- - Number of port to expose on the pod's IP address. - This must be a valid port number, 0 < x < 65536. - format: int32 - type: integer - hostIP: - description: What host IP to bind the external - port to. - type: string - hostPort: - description: |- - Number of port to expose on the host. - If specified, this must be a valid port number, 0 < x < 65536. - If HostNetwork is specified, this must match ContainerPort. - Most containers do not need this. - format: int32 - type: integer - name: - description: |- - If specified, this must be an IANA_SVC_NAME and unique within the pod. Each - named port in a pod must have a unique name. Name for the port that can be - referred to by services. - type: string - protocol: - default: TCP - description: |- - Protocol for port. Must be UDP, TCP, or SCTP. - Defaults to "TCP". - type: string - required: - - containerPort - type: object - type: array - x-kubernetes-list-map-keys: - - containerPort - - protocol - x-kubernetes-list-type: map - readinessProbe: - description: |- - Periodic probe of container service readiness. - Container will be removed from service endpoints if the probe fails. - Cannot be updated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - properties: - exec: - description: Exec specifies the action to - take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - failureThreshold: - description: |- - Minimum consecutive failures for the probe to be considered failed after having succeeded. - Defaults to 3. Minimum value is 1. - format: int32 - type: integer - grpc: - description: GRPC specifies an action involving - a GRPC port. - properties: - port: - description: Port number of the gRPC service. - Number must be in the range 1 to 65535. - format: int32 - type: integer - service: - default: "" - description: |- - Service is the name of the service to place in the gRPC HealthCheckRequest - (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - - If this is not specified, the default behavior is defined by gRPC. - type: string - required: - - port - type: object - httpGet: - description: HTTPGet specifies the http request - to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set in - the request. HTTP allows repeated headers. - items: - description: HTTPHeader describes a - custom header to be used in HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the HTTP - server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - initialDelaySeconds: - description: |- - Number of seconds after the container has started before liveness probes are initiated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - periodSeconds: - description: |- - How often (in seconds) to perform the probe. - Default to 10 seconds. Minimum value is 1. - format: int32 - type: integer - successThreshold: - description: |- - Minimum consecutive successes for the probe to be considered successful after having failed. - Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. - format: int32 - type: integer - tcpSocket: - description: TCPSocket specifies an action - involving a TCP port. - properties: - host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - description: |- - Optional duration in seconds the pod needs to terminate gracefully upon probe failure. - The grace period is the duration in seconds after the processes running in the pod are sent - a termination signal and the time when the processes are forcibly halted with a kill signal. - Set this value longer than the expected cleanup time for your process. - format: int64 - type: integer - timeoutSeconds: - description: |- - Number of seconds after which the probe times out. - Defaults to 1 second. Minimum value is 1. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - type: object - resizePolicy: - description: Resources resize policy for the container. - items: - description: ContainerResizePolicy represents - resource resize policy for the container. - properties: - resourceName: - description: |- - Name of the resource to which this resource resize policy applies. - Supported values: cpu, memory. - type: string - restartPolicy: - description: |- - Restart policy to apply when specified resource is resized. - If not specified, it defaults to NotRequired. - type: string - required: - - resourceName - - restartPolicy - type: object - type: array - x-kubernetes-list-type: atomic - resources: - description: |- - Compute Resources required by this container. - Cannot be updated. - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - properties: - claims: - description: |- - Claims lists the names of resources, defined in spec.resourceClaims, - that are used by this container. - - This is an alpha field and requires enabling the - DynamicResourceAllocation feature gate. - - This field is immutable. It can only be set for containers. - items: - description: ResourceClaim references one - entry in PodSpec.ResourceClaims. - properties: - name: - description: |- - Name must match the name of one entry in pod.spec.resourceClaims of - the Pod where this field is used. It makes that resource available - inside a container. - type: string - request: - description: |- - Request is the name chosen for a request in the referenced claim. - If empty, everything from the claim is made available, otherwise - only the result of this request. - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - limits: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: |- - Limits describes the maximum amount of compute resources allowed. - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - type: object - requests: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: |- - Requests describes the minimum amount of compute resources required. - If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, - otherwise to an implementation-defined value. Requests cannot exceed Limits. - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - type: object - type: object - restartPolicy: - description: |- - RestartPolicy defines the restart behavior of individual containers in a pod. - This field may only be set for init containers, and the only allowed value is "Always". - For non-init containers or when this field is not specified, - the restart behavior is defined by the Pod's restart policy and the container type. - type: string - securityContext: - description: |- - SecurityContext defines the security options the container should be run with. - If set, the fields of SecurityContext override the equivalent fields of PodSecurityContext. - More info: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/ - properties: - allowPrivilegeEscalation: - description: |- - AllowPrivilegeEscalation controls whether a process can gain more - privileges than its parent process. This bool directly controls if - the no_new_privs flag will be set on the container process. - AllowPrivilegeEscalation is true always when the container is: - 1) run as Privileged - 2) has CAP_SYS_ADMIN - Note that this field cannot be set when spec.os.name is windows. - type: boolean - appArmorProfile: - description: |- - appArmorProfile is the AppArmor options to use by this container. If set, this profile - overrides the pod's appArmorProfile. - Note that this field cannot be set when spec.os.name is windows. - properties: - localhostProfile: - description: |- - localhostProfile indicates a profile loaded on the node that should be used. - The profile must be preconfigured on the node to work. - Must match the loaded name of the profile. - Must be set if and only if type is "Localhost". - type: string - type: - description: |- - type indicates which kind of AppArmor profile will be applied. - Valid options are: - Localhost - a profile pre-loaded on the node. - RuntimeDefault - the container runtime's default profile. - Unconfined - no AppArmor enforcement. - type: string - required: - - type - type: object - capabilities: - description: |- - The capabilities to add/drop when running containers. - Defaults to the default set of capabilities granted by the container runtime. - Note that this field cannot be set when spec.os.name is windows. - properties: - add: - description: Added capabilities - items: - description: Capability represent POSIX - capabilities type - type: string - type: array - x-kubernetes-list-type: atomic - drop: - description: Removed capabilities - items: - description: Capability represent POSIX - capabilities type - type: string - type: array - x-kubernetes-list-type: atomic - type: object - privileged: - description: |- - Run container in privileged mode. - Processes in privileged containers are essentially equivalent to root on the host. - Defaults to false. - Note that this field cannot be set when spec.os.name is windows. - type: boolean - procMount: - description: |- - procMount denotes the type of proc mount to use for the containers. - The default value is Default which uses the container runtime defaults for - readonly paths and masked paths. - This requires the ProcMountType feature flag to be enabled. - Note that this field cannot be set when spec.os.name is windows. - type: string - readOnlyRootFilesystem: - description: |- - Whether this container has a read-only root filesystem. - Default is false. - Note that this field cannot be set when spec.os.name is windows. - type: boolean - runAsGroup: - description: |- - The GID to run the entrypoint of the container process. - Uses runtime default if unset. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is windows. - format: int64 - type: integer - runAsNonRoot: - description: |- - Indicates that the container must run as a non-root user. - If true, the Kubelet will validate the image at runtime to ensure that it - does not run as UID 0 (root) and fail to start the container if it does. - If unset or false, no such validation will be performed. - May also be set in PodSecurityContext. - type: boolean - runAsUser: - description: |- - The UID to run the entrypoint of the container process. - Defaults to user specified in image metadata if unspecified. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is windows. - format: int64 - type: integer - seLinuxOptions: - description: |- - The SELinux context to be applied to the container. - If unspecified, the container runtime will allocate a random SELinux context for each - container. May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is windows. - properties: - level: - description: Level is SELinux level label - that applies to the container. - type: string - role: - description: Role is a SELinux role label - that applies to the container. - type: string - type: - description: Type is a SELinux type label - that applies to the container. - type: string - user: - description: User is a SELinux user label - that applies to the container. - type: string - type: object - seccompProfile: - description: |- - The seccomp options to use by this container. If seccomp options are - provided at both the pod & container level, the container options - override the pod options. - Note that this field cannot be set when spec.os.name is windows. - properties: - localhostProfile: - description: |- - localhostProfile indicates a profile defined in a file on the node should be used. - The profile must be preconfigured on the node to work. - Must be a descending path, relative to the kubelet's configured seccomp profile location. - Must be set if type is "Localhost". Must NOT be set for any other type. - type: string - type: - description: |- - type indicates which kind of seccomp profile will be applied. - Valid options are: - - Localhost - a profile defined in a file on the node should be used. - RuntimeDefault - the container runtime default profile should be used. - Unconfined - no profile should be applied. - type: string - required: - - type - type: object - windowsOptions: - description: |- - The Windows specific settings applied to all containers. - If unspecified, the options from the PodSecurityContext will be used. - If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is linux. - properties: - gmsaCredentialSpec: - description: |- - GMSACredentialSpec is where the GMSA admission webhook - (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the - GMSA credential spec named by the GMSACredentialSpecName field. - type: string - gmsaCredentialSpecName: - description: GMSACredentialSpecName is - the name of the GMSA credential spec - to use. - type: string - hostProcess: - description: |- - HostProcess determines if a container should be run as a 'Host Process' container. - All of a Pod's containers must have the same effective HostProcess value - (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers). - In addition, if HostProcess is true then HostNetwork must also be set to true. - type: boolean - runAsUserName: - description: |- - The UserName in Windows to run the entrypoint of the container process. - Defaults to the user specified in image metadata if unspecified. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - type: string - type: object - type: object - startupProbe: - description: |- - StartupProbe indicates that the Pod has successfully initialized. - If specified, no other probes are executed until this completes successfully. - If this probe fails, the Pod will be restarted, just as if the livenessProbe failed. - properties: - exec: - description: Exec specifies the action to - take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - failureThreshold: - description: |- - Minimum consecutive failures for the probe to be considered failed after having succeeded. - Defaults to 3. Minimum value is 1. - format: int32 - type: integer - grpc: - description: GRPC specifies an action involving - a GRPC port. - properties: - port: - description: Port number of the gRPC service. - Number must be in the range 1 to 65535. - format: int32 - type: integer - service: - default: "" - description: |- - Service is the name of the service to place in the gRPC HealthCheckRequest - (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - - If this is not specified, the default behavior is defined by gRPC. - type: string - required: - - port - type: object - httpGet: - description: HTTPGet specifies the http request - to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set in - the request. HTTP allows repeated headers. - items: - description: HTTPHeader describes a - custom header to be used in HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the HTTP - server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - initialDelaySeconds: - description: |- - Number of seconds after the container has started before liveness probes are initiated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - periodSeconds: - description: |- - How often (in seconds) to perform the probe. - Default to 10 seconds. Minimum value is 1. - format: int32 - type: integer - successThreshold: - description: |- - Minimum consecutive successes for the probe to be considered successful after having failed. - Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. - format: int32 - type: integer - tcpSocket: - description: TCPSocket specifies an action - involving a TCP port. - properties: - host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - description: |- - Optional duration in seconds the pod needs to terminate gracefully upon probe failure. - The grace period is the duration in seconds after the processes running in the pod are sent - a termination signal and the time when the processes are forcibly halted with a kill signal. - Set this value longer than the expected cleanup time for your process. - format: int64 - type: integer - timeoutSeconds: - description: |- - Number of seconds after which the probe times out. - Defaults to 1 second. Minimum value is 1. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - type: object - stdin: - description: |- - Whether this container should allocate a buffer for stdin in the container runtime. If this - is not set, reads from stdin in the container will always result in EOF. - Default is false. - type: boolean - stdinOnce: - description: |- - Whether the container runtime should close the stdin channel after it has been opened by - a single attach. When stdin is true the stdin stream will remain open across multiple attach - sessions. - type: boolean - terminationMessagePath: - description: |- - Optional: Path at which the file to which the container's termination message - will be written is mounted into the container's filesystem. - Message written is intended to be brief final status, such as an assertion failure message. - Will be truncated by the node if greater than 4096 bytes. The total message length across - all containers will be limited to 12kb. - Defaults to /dev/termination-log. - type: string - terminationMessagePolicy: - description: |- - Indicate how the termination message should be populated. File will use the contents of - terminationMessagePath to populate the container status message on both success and failure. - FallbackToLogsOnError will use the last chunk of container log output if the termination - message file is empty and the container exited with an error. - type: string - tty: - description: |- - Whether this container should allocate a TTY for itself, also requires 'stdin' to be true. - Default is false. - type: boolean - volumeDevices: - description: volumeDevices is the list of block - devices to be used by the container. - items: - description: volumeDevice describes a mapping - of a raw block device within a container. - properties: - devicePath: - description: devicePath is the path inside - of the container that the device will - be mapped to. - type: string - name: - description: name must match the name of - a persistentVolumeClaim in the pod - type: string - required: - - devicePath - - name - type: object - type: array - x-kubernetes-list-map-keys: - - devicePath - x-kubernetes-list-type: map - volumeMounts: - description: |- - Pod volumes to mount into the container's filesystem. - Cannot be updated. - items: - description: VolumeMount describes a mounting - of a Volume within a container. - properties: - mountPath: - description: |- - Path within the container at which the volume should be mounted. Must - not contain ':'. - type: string - mountPropagation: - description: |- - mountPropagation determines how mounts are propagated from the host - to container and the other way around. - When not set, MountPropagationNone is used. - This field is beta in 1.10. - When RecursiveReadOnly is set to IfPossible or to Enabled, MountPropagation must be None or unspecified - (which defaults to None). - type: string - name: - description: This must match the Name of - a Volume. - type: string - readOnly: - description: |- - Mounted read-only if true, read-write otherwise (false or unspecified). - Defaults to false. - type: boolean - recursiveReadOnly: - description: |- - RecursiveReadOnly specifies whether read-only mounts should be handled - recursively. - - If ReadOnly is false, this field has no meaning and must be unspecified. - - If ReadOnly is true, and this field is set to Disabled, the mount is not made - recursively read-only. If this field is set to IfPossible, the mount is made - recursively read-only, if it is supported by the container runtime. - type: string - subPath: - description: |- - Path within the volume from which the container's volume should be mounted. - Defaults to "" (volume's root). - type: string - subPathExpr: - description: |- - Expanded path within the volume from which the container's volume should be mounted. - Behaves similarly to SubPath but environment variable references $(VAR_NAME) are expanded using the container's environment. - Defaults to "" (volume's root). - SubPathExpr and SubPath are mutually exclusive. - type: string - required: - - mountPath - - name - type: object - type: array - x-kubernetes-list-map-keys: - - mountPath - x-kubernetes-list-type: map - workingDir: - description: |- - Container's working directory. - If not specified, the container runtime's default will be used, which - might be configured in the container image. - Cannot be updated. - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - nodeName: - description: |- - NodeName indicates in which node this pod is scheduled. - If empty, this pod is a candidate for scheduling by the scheduler defined in schedulerName. - Once this field is set, the kubelet for this node becomes responsible for the lifecycle of this pod. - This field should not be used to express a desire for the pod to be scheduled on a specific node. - https://kubernetes. - type: string - nodeSelector: - additionalProperties: - type: string - description: |- - NodeSelector is a selector which must be true for the pod to fit on a node. - Selector which must match a node's labels for the pod to be scheduled on that node. - More info: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/ - type: object - x-kubernetes-map-type: atomic - os: - description: |- - Specifies the OS of the containers in the pod. - Some pod and container fields are restricted if this is set. - - If the OS field is set to linux, the following fields must be unset: - -securityContext.windowsOptions - - If the OS field is set to windows, following fields must be unset: - - spec.hostPID - - spec.hostIPC - - spec.hostUsers - - spec.securityContext.appArmorProfile - - spec.securityContext. - properties: - name: - description: |- - Name is the name of the operating system. The currently supported values are linux and windows. - Additional value may be defined in future and can be one of: - https://github.com/opencontainers/runtime-spec/blob/master/config.md#platform-specific-configuration - Clients should expect to handle additional values and treat unrecognized values in this field as os: null - type: string - required: - - name - type: object - overhead: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: |- - Overhead represents the resource overhead associated with running a pod for a given RuntimeClass. - This field will be autopopulated at admission time by the RuntimeClass admission controller. If - the RuntimeClass admission controller is enabled, overhead must not be set in Pod create requests. - The RuntimeClass admission controller will reject Pod create requests which have the overhead already - set. - type: object - preemptionPolicy: - description: |- - PreemptionPolicy is the Policy for preempting pods with lower priority. - One of Never, PreemptLowerPriority. - Defaults to PreemptLowerPriority if unset. - type: string - priority: - description: |- - The priority value. Various system components use this field to find the - priority of the pod. When Priority Admission Controller is enabled, it - prevents users from setting this field. The admission controller populates - this field from PriorityClassName. - The higher the value, the higher the priority. - format: int32 - type: integer - priorityClassName: - description: |- - If specified, indicates the pod's priority. "system-node-critical" and - "system-cluster-critical" are two special keywords which indicate the - highest priorities with the former being the highest priority. Any other - name must be defined by creating a PriorityClass object with that name. - If not specified, the pod priority will be default or zero if there is no - default. - type: string - readinessGates: - description: |- - If specified, all readiness gates will be evaluated for pod readiness. - A pod is ready when all its containers are ready AND - all conditions specified in the readiness gates have status equal to "True" - More info: https://git.k8s.io/enhancements/keps/sig-network/580-pod-readiness-gates - items: - description: PodReadinessGate contains the reference - to a pod condition - properties: - conditionType: - description: ConditionType refers to a condition - in the pod's condition list with matching type. - type: string - required: - - conditionType - type: object - type: array - x-kubernetes-list-type: atomic - resourceClaims: - description: |- - ResourceClaims defines which ResourceClaims must be allocated - and reserved before the Pod is allowed to start. The resources - will be made available to those containers which consume them - by name. - - This is an alpha field and requires enabling the - DynamicResourceAllocation feature gate. - - This field is immutable. - items: - description: |- - PodResourceClaim references exactly one ResourceClaim, either directly - or by naming a ResourceClaimTemplate which is then turned into a ResourceClaim - for the pod. - - It adds a name to it that uniquely identifies the ResourceClaim inside the Pod. - Containers that need access to the ResourceClaim reference it with this name. - properties: - name: - description: |- - Name uniquely identifies this resource claim inside the pod. - This must be a DNS_LABEL. - type: string - resourceClaimName: - description: |- - ResourceClaimName is the name of a ResourceClaim object in the same - namespace as this pod. - - Exactly one of ResourceClaimName and ResourceClaimTemplateName must - be set. - type: string - resourceClaimTemplateName: - description: |- - ResourceClaimTemplateName is the name of a ResourceClaimTemplate - object in the same namespace as this pod. - - The template will be used to create a new ResourceClaim, which will - be bound to this pod. When this pod is deleted, the ResourceClaim - will also be deleted. - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - restartPolicy: - description: |- - Restart policy for all containers within the pod. - One of Always, OnFailure, Never. In some contexts, only a subset of those values may be permitted. - Default to Always. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#restart-policy - type: string - runtimeClassName: - description: |- - RuntimeClassName refers to a RuntimeClass object in the node.k8s.io group, which should be used - to run this pod. If no RuntimeClass resource matches the named class, the pod will not be run. - If unset or empty, the "legacy" RuntimeClass will be used, which is an implicit class with an - empty definition that uses the default runtime handler. - More info: https://git.k8s. - type: string - schedulerName: - description: |- - If specified, the pod will be dispatched by specified scheduler. - If not specified, the pod will be dispatched by default scheduler. - type: string - schedulingGates: - description: |- - SchedulingGates is an opaque list of values that if specified will block scheduling the pod. - If schedulingGates is not empty, the pod will stay in the SchedulingGated state and the - scheduler will not attempt to schedule the pod. - - SchedulingGates can only be set at pod creation time, and be removed only afterwards. - items: - description: PodSchedulingGate is associated to a - Pod to guard its scheduling. - properties: - name: - description: |- - Name of the scheduling gate. - Each scheduling gate must have a unique name field. - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - securityContext: - description: |- - SecurityContext holds pod-level security attributes and common container settings. - Optional: Defaults to empty. See type description for default values of each field. - properties: - appArmorProfile: - description: |- - appArmorProfile is the AppArmor options to use by the containers in this pod. - Note that this field cannot be set when spec.os.name is windows. - properties: - localhostProfile: - description: |- - localhostProfile indicates a profile loaded on the node that should be used. - The profile must be preconfigured on the node to work. - Must match the loaded name of the profile. - Must be set if and only if type is "Localhost". - type: string - type: - description: |- - type indicates which kind of AppArmor profile will be applied. - Valid options are: - Localhost - a profile pre-loaded on the node. - RuntimeDefault - the container runtime's default profile. - Unconfined - no AppArmor enforcement. - type: string - required: - - type - type: object - fsGroup: - description: |- - A special supplemental group that applies to all containers in a pod. - Some volume types allow the Kubelet to change the ownership of that volume - to be owned by the pod: - - 1. The owning GID will be the FSGroup - 2. The setgid bit is set (new files created in the volume will be owned by FSGroup) - 3. - format: int64 - type: integer - fsGroupChangePolicy: - description: |- - fsGroupChangePolicy defines behavior of changing ownership and permission of the volume - before being exposed inside Pod. This field will only apply to - volume types which support fsGroup based ownership(and permissions). - It will have no effect on ephemeral volume types such as: secret, configmaps - and emptydir. - Valid values are "OnRootMismatch" and "Always". If not specified, "Always" is used. - type: string - runAsGroup: - description: |- - The GID to run the entrypoint of the container process. - Uses runtime default if unset. - May also be set in SecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence - for that container. - Note that this field cannot be set when spec.os.name is windows. - format: int64 - type: integer - runAsNonRoot: - description: |- - Indicates that the container must run as a non-root user. - If true, the Kubelet will validate the image at runtime to ensure that it - does not run as UID 0 (root) and fail to start the container if it does. - If unset or false, no such validation will be performed. - May also be set in SecurityContext. - type: boolean - runAsUser: - description: |- - The UID to run the entrypoint of the container process. - Defaults to user specified in image metadata if unspecified. - May also be set in SecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence - for that container. - Note that this field cannot be set when spec.os.name is windows. - format: int64 - type: integer - seLinuxOptions: - description: |- - The SELinux context to be applied to all containers. - If unspecified, the container runtime will allocate a random SELinux context for each - container. May also be set in SecurityContext. If set in - both SecurityContext and PodSecurityContext, the value specified in SecurityContext - takes precedence for that container. - Note that this field cannot be set when spec.os.name is windows. - properties: - level: - description: Level is SELinux level label that - applies to the container. - type: string - role: - description: Role is a SELinux role label that - applies to the container. - type: string - type: - description: Type is a SELinux type label that - applies to the container. - type: string - user: - description: User is a SELinux user label that - applies to the container. - type: string - type: object - seccompProfile: - description: |- - The seccomp options to use by the containers in this pod. - Note that this field cannot be set when spec.os.name is windows. - properties: - localhostProfile: - description: |- - localhostProfile indicates a profile defined in a file on the node should be used. - The profile must be preconfigured on the node to work. - Must be a descending path, relative to the kubelet's configured seccomp profile location. - Must be set if type is "Localhost". Must NOT be set for any other type. - type: string - type: - description: |- - type indicates which kind of seccomp profile will be applied. - Valid options are: - - Localhost - a profile defined in a file on the node should be used. - RuntimeDefault - the container runtime default profile should be used. - Unconfined - no profile should be applied. - type: string - required: - - type - type: object - supplementalGroups: - description: |- - A list of groups applied to the first process run in each container, in - addition to the container's primary GID and fsGroup (if specified). If - the SupplementalGroupsPolicy feature is enabled, the - supplementalGroupsPolicy field determines whether these are in addition - to or instead of any group memberships defined in the container image. - items: - format: int64 - type: integer - type: array - x-kubernetes-list-type: atomic - supplementalGroupsPolicy: - description: |- - Defines how supplemental groups of the first container processes are calculated. - Valid values are "Merge" and "Strict". If not specified, "Merge" is used. - (Alpha) Using the field requires the SupplementalGroupsPolicy feature gate to be enabled - and the container runtime must implement support for this feature. - Note that this field cannot be set when spec.os.name is windows. - type: string - sysctls: - description: |- - Sysctls hold a list of namespaced sysctls used for the pod. Pods with unsupported - sysctls (by the container runtime) might fail to launch. - Note that this field cannot be set when spec.os.name is windows. - items: - description: Sysctl defines a kernel parameter - to be set - properties: - name: - description: Name of a property to set - type: string - value: - description: Value of a property to set - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - windowsOptions: - description: |- - The Windows specific settings applied to all containers. - If unspecified, the options within a container's SecurityContext will be used. - If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is linux. - properties: - gmsaCredentialSpec: - description: |- - GMSACredentialSpec is where the GMSA admission webhook - (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the - GMSA credential spec named by the GMSACredentialSpecName field. - type: string - gmsaCredentialSpecName: - description: GMSACredentialSpecName is the name - of the GMSA credential spec to use. - type: string - hostProcess: - description: |- - HostProcess determines if a container should be run as a 'Host Process' container. - All of a Pod's containers must have the same effective HostProcess value - (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers). - In addition, if HostProcess is true then HostNetwork must also be set to true. - type: boolean - runAsUserName: - description: |- - The UserName in Windows to run the entrypoint of the container process. - Defaults to the user specified in image metadata if unspecified. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - type: string - type: object - type: object - serviceAccount: - description: |- - DeprecatedServiceAccount is a deprecated alias for ServiceAccountName. - Deprecated: Use serviceAccountName instead. - type: string - serviceAccountName: - description: |- - ServiceAccountName is the name of the ServiceAccount to use to run this pod. - More info: https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/ - type: string - setHostnameAsFQDN: - description: |- - If true the pod's hostname will be configured as the pod's FQDN, rather than the leaf name (the default). - In Linux containers, this means setting the FQDN in the hostname field of the kernel (the nodename field of struct utsname). - type: boolean - shareProcessNamespace: - description: |- - Share a single process namespace between all of the containers in a pod. - When this is set containers will be able to view and signal processes from other containers - in the same pod, and the first process in each container will not be assigned PID 1. - HostPID and ShareProcessNamespace cannot both be set. - Optional: Default to false. - type: boolean - subdomain: - description: |- - If specified, the fully qualified Pod hostname will be "...svc.". - If not specified, the pod will not have a domainname at all. - type: string - terminationGracePeriodSeconds: - description: |- - Optional duration in seconds the pod needs to terminate gracefully. May be decreased in delete request. - Value must be non-negative integer. The value zero indicates stop immediately via - the kill signal (no opportunity to shut down). - If this value is nil, the default grace period will be used instead. - format: int64 - type: integer - tolerations: - description: If specified, the pod's tolerations. - items: - description: |- - The pod this Toleration is attached to tolerates any taint that matches - the triple using the matching operator . - properties: - effect: - description: |- - Effect indicates the taint effect to match. Empty means match all taint effects. - When specified, allowed values are NoSchedule, PreferNoSchedule and NoExecute. - type: string - key: - description: |- - Key is the taint key that the toleration applies to. Empty means match all taint keys. - If the key is empty, operator must be Exists; this combination means to match all values and all keys. - type: string - operator: - description: |- - Operator represents a key's relationship to the value. - Valid operators are Exists and Equal. Defaults to Equal. - Exists is equivalent to wildcard for value, so that a pod can - tolerate all taints of a particular category. - type: string - tolerationSeconds: - description: |- - TolerationSeconds represents the period of time the toleration (which must be - of effect NoExecute, otherwise this field is ignored) tolerates the taint. By default, - it is not set, which means tolerate the taint forever (do not evict). Zero and - negative values will be treated as 0 (evict immediately) by the system. - format: int64 - type: integer - value: - description: |- - Value is the taint value the toleration matches to. - If the operator is Exists, the value should be empty, otherwise just a regular string. - type: string - type: object - type: array - x-kubernetes-list-type: atomic - topologySpreadConstraints: - description: |- - TopologySpreadConstraints describes how a group of pods ought to spread across topology - domains. Scheduler will schedule pods in a way which abides by the constraints. - All topologySpreadConstraints are ANDed. - items: - description: TopologySpreadConstraint specifies how - to spread matching pods among the given topology. - properties: - labelSelector: - description: |- - LabelSelector is used to find matching pods. - Pods that match this label selector are counted to determine the number of pods - in their corresponding topology domain. - properties: - matchExpressions: - description: matchExpressions is a list of - label selector requirements. The requirements - are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the label key that - the selector applies to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - matchLabelKeys: - description: |- - MatchLabelKeys is a set of pod label keys to select the pods over which - spreading will be calculated. The keys are used to lookup values from the - incoming pod labels, those key-value labels are ANDed with labelSelector - to select the group of existing pods over which spreading will be calculated - for the incoming pod. The same key is forbidden to exist in both MatchLabelKeys and LabelSelector. - items: - type: string - type: array - x-kubernetes-list-type: atomic - maxSkew: - description: |- - MaxSkew describes the degree to which pods may be unevenly distributed. - When `whenUnsatisfiable=DoNotSchedule`, it is the maximum permitted difference - between the number of matching pods in the target topology and the global minimum. - The global minimum is the minimum number of matching pods in an eligible domain - or zero if the number of eligible domains is less than MinDomains. - format: int32 - type: integer - minDomains: - description: |- - MinDomains indicates a minimum number of eligible domains. - When the number of eligible domains with matching topology keys is less than minDomains, - Pod Topology Spread treats "global minimum" as 0, and then the calculation of Skew is performed. - And when the number of eligible domains with matching topology keys equals or greater than minDomains, - this value has no effect on scheduling. - format: int32 - type: integer - nodeAffinityPolicy: - description: |- - NodeAffinityPolicy indicates how we will treat Pod's nodeAffinity/nodeSelector - when calculating pod topology spread skew. Options are: - - Honor: only nodes matching nodeAffinity/nodeSelector are included in the calculations. - - Ignore: nodeAffinity/nodeSelector are ignored. All nodes are included in the calculations. - - If this value is nil, the behavior is equivalent to the Honor policy. - type: string - nodeTaintsPolicy: - description: |- - NodeTaintsPolicy indicates how we will treat node taints when calculating - pod topology spread skew. Options are: - - Honor: nodes without taints, along with tainted nodes for which the incoming pod - has a toleration, are included. - - Ignore: node taints are ignored. All nodes are included. - - If this value is nil, the behavior is equivalent to the Ignore policy. - type: string - topologyKey: - description: |- - TopologyKey is the key of node labels. Nodes that have a label with this key - and identical values are considered to be in the same topology. - We consider each as a "bucket", and try to put balanced number - of pods into each bucket. - We define a domain as a particular instance of a topology. - type: string - whenUnsatisfiable: - description: |- - WhenUnsatisfiable indicates how to deal with a pod if it doesn't satisfy - the spread constraint. - - DoNotSchedule (default) tells the scheduler not to schedule it. - - ScheduleAnyway tells the scheduler to schedule the pod in any location, - but giving higher precedence to topologies that would help reduce the - skew. - type: string - required: - - maxSkew - - topologyKey - - whenUnsatisfiable - type: object - type: array - x-kubernetes-list-map-keys: - - topologyKey - - whenUnsatisfiable - x-kubernetes-list-type: map - volumes: - description: |- - List of volumes that can be mounted by containers belonging to the pod. - More info: https://kubernetes.io/docs/concepts/storage/volumes - items: - description: Volume represents a named volume in a - pod that may be accessed by any container in the - pod. - properties: - awsElasticBlockStore: - description: |- - awsElasticBlockStore represents an AWS Disk resource that is attached to a - kubelet's host machine and then exposed to the pod. - More info: https://kubernetes.io/docs/concepts/storage/volumes#awselasticblockstore - properties: - fsType: - description: |- - fsType is the filesystem type of the volume that you want to mount. - Tip: Ensure that the filesystem type is supported by the host operating system. - Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - More info: https://kubernetes.io/docs/concepts/storage/volumes#awselasticblockstore - type: string - partition: - description: |- - partition is the partition in the volume that you want to mount. - If omitted, the default is to mount by volume name. - Examples: For volume /dev/sda1, you specify the partition as "1". - Similarly, the volume partition for /dev/sda is "0" (or you can leave the property empty). - format: int32 - type: integer - readOnly: - description: |- - readOnly value true will force the readOnly setting in VolumeMounts. - More info: https://kubernetes.io/docs/concepts/storage/volumes#awselasticblockstore - type: boolean - volumeID: - description: |- - volumeID is unique ID of the persistent disk resource in AWS (Amazon EBS volume). - More info: https://kubernetes.io/docs/concepts/storage/volumes#awselasticblockstore - type: string - required: - - volumeID - type: object - azureDisk: - description: azureDisk represents an Azure Data - Disk mount on the host and bind mount to the - pod. - properties: - cachingMode: - description: 'cachingMode is the Host Caching - mode: None, Read Only, Read Write.' - type: string - diskName: - description: diskName is the Name of the data - disk in the blob storage - type: string - diskURI: - description: diskURI is the URI of data disk - in the blob storage - type: string - fsType: - default: ext4 - description: |- - fsType is Filesystem type to mount. - Must be a filesystem type supported by the host operating system. - Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - type: string - kind: - description: 'kind expected values are Shared: - multiple blob disks per storage account Dedicated: - single blob disk per storage account Managed: - azure managed data disk (only in managed - availability set). defaults to shared' - type: string - readOnly: - default: false - description: |- - readOnly Defaults to false (read/write). ReadOnly here will force - the ReadOnly setting in VolumeMounts. - type: boolean - required: - - diskName - - diskURI - type: object - azureFile: - description: azureFile represents an Azure File - Service mount on the host and bind mount to - the pod. - properties: - readOnly: - description: |- - readOnly defaults to false (read/write). ReadOnly here will force - the ReadOnly setting in VolumeMounts. - type: boolean - secretName: - description: secretName is the name of secret - that contains Azure Storage Account Name - and Key - type: string - shareName: - description: shareName is the azure share - Name - type: string - required: - - secretName - - shareName - type: object - cephfs: - description: cephFS represents a Ceph FS mount - on the host that shares a pod's lifetime - properties: - monitors: - description: |- - monitors is Required: Monitors is a collection of Ceph monitors - More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it - items: - type: string - type: array - x-kubernetes-list-type: atomic - path: - description: 'path is Optional: Used as the - mounted root, rather than the full Ceph - tree, default is /' - type: string - readOnly: - description: |- - readOnly is Optional: Defaults to false (read/write). ReadOnly here will force - the ReadOnly setting in VolumeMounts. - More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it - type: boolean - secretFile: - description: |- - secretFile is Optional: SecretFile is the path to key ring for User, default is /etc/ceph/user.secret - More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it - type: string - secretRef: - description: |- - secretRef is Optional: SecretRef is reference to the authentication secret for User, default is empty. - More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - type: object - x-kubernetes-map-type: atomic - user: - description: |- - user is optional: User is the rados user name, default is admin - More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it - type: string - required: - - monitors - type: object - cinder: - description: |- - cinder represents a cinder volume attached and mounted on kubelets host machine. - More info: https://examples.k8s.io/mysql-cinder-pd/README.md - properties: - fsType: - description: |- - fsType is the filesystem type to mount. - Must be a filesystem type supported by the host operating system. - Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - More info: https://examples.k8s.io/mysql-cinder-pd/README.md - type: string - readOnly: - description: |- - readOnly defaults to false (read/write). ReadOnly here will force - the ReadOnly setting in VolumeMounts. - More info: https://examples.k8s.io/mysql-cinder-pd/README.md - type: boolean - secretRef: - description: |- - secretRef is optional: points to a secret object containing parameters used to connect - to OpenStack. - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - type: object - x-kubernetes-map-type: atomic - volumeID: - description: |- - volumeID used to identify the volume in cinder. - More info: https://examples.k8s.io/mysql-cinder-pd/README.md - type: string - required: - - volumeID - type: object - configMap: - description: configMap represents a configMap - that should populate this volume - properties: - defaultMode: - description: |- - defaultMode is optional: mode bits used to set permissions on created files by default. - Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. - YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. - Defaults to 0644. - Directories within the path are not affected by this setting. - format: int32 - type: integer - items: - description: |- - items if unspecified, each key-value pair in the Data field of the referenced - ConfigMap will be projected into the volume as a file whose name is the - key and content is the value. If specified, the listed keys will be - projected into the specified paths, and unlisted keys will not be - present. - items: - description: Maps a string key to a path - within a volume. - properties: - key: - description: key is the key to project. - type: string - mode: - description: |- - mode is Optional: mode bits used to set permissions on this file. - Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. - YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. - If not specified, the volume defaultMode will be used. - format: int32 - type: integer - path: - description: |- - path is the relative path of the file to map the key to. - May not be an absolute path. - May not contain the path element '..'. - May not start with the string '..'. - type: string - required: - - key - - path - type: object - type: array - x-kubernetes-list-type: atomic - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: optional specify whether the - ConfigMap or its keys must be defined - type: boolean - type: object - x-kubernetes-map-type: atomic - csi: - description: csi (Container Storage Interface) - represents ephemeral storage that is handled - by certain external CSI drivers (Beta feature). - properties: - driver: - description: |- - driver is the name of the CSI driver that handles this volume. - Consult with your admin for the correct name as registered in the cluster. - type: string - fsType: - description: |- - fsType to mount. Ex. "ext4", "xfs", "ntfs". - If not provided, the empty value is passed to the associated CSI driver - which will determine the default filesystem to apply. - type: string - nodePublishSecretRef: - description: |- - nodePublishSecretRef is a reference to the secret object containing - sensitive information to pass to the CSI driver to complete the CSI - NodePublishVolume and NodeUnpublishVolume calls. - This field is optional, and may be empty if no secret is required. If the - secret object contains more than one secret, all secret references are passed. - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - type: object - x-kubernetes-map-type: atomic - readOnly: - description: |- - readOnly specifies a read-only configuration for the volume. - Defaults to false (read/write). - type: boolean - volumeAttributes: - additionalProperties: - type: string - description: |- - volumeAttributes stores driver-specific properties that are passed to the CSI - driver. Consult your driver's documentation for supported values. - type: object - required: - - driver - type: object - downwardAPI: - description: downwardAPI represents downward API - about the pod that should populate this volume - properties: - defaultMode: - description: |- - Optional: mode bits to use on created files by default. Must be a - Optional: mode bits used to set permissions on created files by default. - Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. - YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. - Defaults to 0644. - Directories within the path are not affected by this setting. - format: int32 - type: integer - items: - description: Items is a list of downward API - volume file - items: - description: DownwardAPIVolumeFile represents - information to create the file containing - the pod field - properties: - fieldRef: - description: 'Required: Selects a field - of the pod: only annotations, labels, - name, namespace and uid are supported.' - properties: - apiVersion: - description: Version of the schema - the FieldPath is written in terms - of, defaults to "v1". - type: string - fieldPath: - description: Path of the field to - select in the specified API version. - type: string - required: - - fieldPath - type: object - x-kubernetes-map-type: atomic - mode: - description: |- - Optional: mode bits used to set permissions on this file, must be an octal value - between 0000 and 0777 or a decimal value between 0 and 511. - YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. - If not specified, the volume defaultMode will be used. - format: int32 - type: integer - path: - description: 'Required: Path is the - relative path name of the file to - be created. Must not be absolute or - contain the ''..'' path. Must be utf-8 - encoded. The first item of the relative - path must not start with ''..''' - type: string - resourceFieldRef: - description: |- - Selects a resource of the container: only resources limits and requests - (limits.cpu, limits.memory, requests.cpu and requests.memory) are currently supported. - properties: - containerName: - description: 'Container name: required - for volumes, optional for env - vars' - type: string - divisor: - anyOf: - - type: integer - - type: string - description: Specifies the output - format of the exposed resources, - defaults to "1" - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - resource: - description: 'Required: resource - to select' - type: string - required: - - resource - type: object - x-kubernetes-map-type: atomic - required: - - path - type: object - type: array - x-kubernetes-list-type: atomic - type: object - emptyDir: - description: |- - emptyDir represents a temporary directory that shares a pod's lifetime. - More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir - properties: - medium: - description: |- - medium represents what type of storage medium should back this directory. - The default is "" which means to use the node's default medium. - Must be an empty string (default) or Memory. - More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir - type: string - sizeLimit: - anyOf: - - type: integer - - type: string - description: |- - sizeLimit is the total amount of local storage required for this EmptyDir volume. - The size limit is also applicable for memory medium. - The maximum usage on memory medium EmptyDir would be the minimum value between - the SizeLimit specified here and the sum of memory limits of all containers in a pod. - The default is nil which means that the limit is undefined. - More info: https://kubernetes. - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - type: object - ephemeral: - description: |- - ephemeral represents a volume that is handled by a cluster storage driver. - The volume's lifecycle is tied to the pod that defines it - it will be created before the pod starts, - and deleted when the pod is removed. - properties: - volumeClaimTemplate: - description: |- - Will be used to create a stand-alone PVC to provision the volume. - The pod in which this EphemeralVolumeSource is embedded will be the - owner of the PVC, i.e. the PVC will be deleted together with the - pod. The name of the PVC will be `-` where - `` is the name from the `PodSpec.Volumes` array - entry. - properties: - metadata: - description: |- - May contain labels and annotations that will be copied into the PVC - when creating it. No other fields are allowed and will be rejected during - validation. - properties: - annotations: - additionalProperties: - type: string - type: object - finalizers: - items: - type: string - type: array - labels: - additionalProperties: - type: string - type: object - name: - type: string - namespace: - type: string - type: object - spec: - description: |- - The specification for the PersistentVolumeClaim. The entire content is - copied unchanged into the PVC that gets created from this - template. The same fields as in a PersistentVolumeClaim - are also valid here. - properties: - accessModes: - description: |- - accessModes contains the desired access modes the volume should have. - More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#access-modes-1 - items: - type: string - type: array - x-kubernetes-list-type: atomic - dataSource: - description: |- - dataSource field can be used to specify either: - * An existing VolumeSnapshot object (snapshot.storage.k8s.io/VolumeSnapshot) - * An existing PVC (PersistentVolumeClaim) - If the provisioner or an external controller can support the specified data source, - it will create a new volume based on the contents of the specified data source. - properties: - apiGroup: - description: |- - APIGroup is the group for the resource being referenced. - If APIGroup is not specified, the specified Kind must be in the core API group. - For any other third-party types, APIGroup is required. - type: string - kind: - description: Kind is the type - of resource being referenced - type: string - name: - description: Name is the name - of resource being referenced - type: string - required: - - kind - - name - type: object - x-kubernetes-map-type: atomic - dataSourceRef: - description: |- - dataSourceRef specifies the object from which to populate the volume with data, if a non-empty - volume is desired. This may be any object from a non-empty API group (non - core object) or a PersistentVolumeClaim object. - When this field is specified, volume binding will only succeed if the type of - the specified object matches some installed volume populator or dynamic - provisioner. - properties: - apiGroup: - description: |- - APIGroup is the group for the resource being referenced. - If APIGroup is not specified, the specified Kind must be in the core API group. - For any other third-party types, APIGroup is required. - type: string - kind: - description: Kind is the type - of resource being referenced - type: string - name: - description: Name is the name - of resource being referenced - type: string - namespace: - description: |- - Namespace is the namespace of resource being referenced - Note that when a namespace is specified, a gateway.networking.k8s.io/ReferenceGrant object is required in the referent namespace to allow that namespace's owner to accept the reference. See the ReferenceGrant documentation for details. - (Alpha) This field requires the CrossNamespaceVolumeDataSource feature gate to be enabled. - type: string - required: - - kind - - name - type: object - resources: - description: |- - resources represents the minimum resources the volume should have. - If RecoverVolumeExpansionFailure feature is enabled users are allowed to specify resource requirements - that are lower than previous value but must still be higher than capacity recorded in the - status field of the claim. - More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#resources - properties: - limits: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: |- - Limits describes the maximum amount of compute resources allowed. - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - type: object - requests: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: |- - Requests describes the minimum amount of compute resources required. - If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, - otherwise to an implementation-defined value. Requests cannot exceed Limits. - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - type: object - type: object - selector: - description: selector is a label query - over volumes to consider for binding. - properties: - matchExpressions: - description: matchExpressions - is a list of label selector - requirements. The requirements - are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the - label key that the selector - applies to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - storageClassName: - description: |- - storageClassName is the name of the StorageClass required by the claim. - More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#class-1 - type: string - volumeAttributesClassName: - description: |- - volumeAttributesClassName may be used to set the VolumeAttributesClass used by this claim. - If specified, the CSI driver will create or update the volume with the attributes defined - in the corresponding VolumeAttributesClass. This has a different purpose than storageClassName, - it can be changed after the claim is created. - type: string - volumeMode: - description: |- - volumeMode defines what type of volume is required by the claim. - Value of Filesystem is implied when not included in claim spec. - type: string - volumeName: - description: volumeName is the binding - reference to the PersistentVolume - backing this claim. - type: string - type: object - required: - - spec - type: object - type: object - fc: - description: fc represents a Fibre Channel resource - that is attached to a kubelet's host machine - and then exposed to the pod. - properties: - fsType: - description: |- - fsType is the filesystem type to mount. - Must be a filesystem type supported by the host operating system. - Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - type: string - lun: - description: 'lun is Optional: FC target lun - number' - format: int32 - type: integer - readOnly: - description: |- - readOnly is Optional: Defaults to false (read/write). ReadOnly here will force - the ReadOnly setting in VolumeMounts. - type: boolean - targetWWNs: - description: 'targetWWNs is Optional: FC target - worldwide names (WWNs)' - items: - type: string - type: array - x-kubernetes-list-type: atomic - wwids: - description: |- - wwids Optional: FC volume world wide identifiers (wwids) - Either wwids or combination of targetWWNs and lun must be set, but not both simultaneously. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - flexVolume: - description: |- - flexVolume represents a generic volume resource that is - provisioned/attached using an exec based plugin. - properties: - driver: - description: driver is the name of the driver - to use for this volume. - type: string - fsType: - description: |- - fsType is the filesystem type to mount. - Must be a filesystem type supported by the host operating system. - Ex. "ext4", "xfs", "ntfs". The default filesystem depends on FlexVolume script. - type: string - options: - additionalProperties: - type: string - description: 'options is Optional: this field - holds extra command options if any.' - type: object - readOnly: - description: |- - readOnly is Optional: defaults to false (read/write). ReadOnly here will force - the ReadOnly setting in VolumeMounts. - type: boolean - secretRef: - description: |- - secretRef is Optional: secretRef is reference to the secret object containing - sensitive information to pass to the plugin scripts. This may be - empty if no secret object is specified. If the secret object - contains more than one secret, all secrets are passed to the plugin - scripts. - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - type: object - x-kubernetes-map-type: atomic - required: - - driver - type: object - flocker: - description: flocker represents a Flocker volume - attached to a kubelet's host machine. This depends - on the Flocker control service being running - properties: - datasetName: - description: |- - datasetName is Name of the dataset stored as metadata -> name on the dataset for Flocker - should be considered as deprecated - type: string - datasetUUID: - description: datasetUUID is the UUID of the - dataset. This is unique identifier of a - Flocker dataset - type: string - type: object - gcePersistentDisk: - description: |- - gcePersistentDisk represents a GCE Disk resource that is attached to a - kubelet's host machine and then exposed to the pod. - More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk - properties: - fsType: - description: |- - fsType is filesystem type of the volume that you want to mount. - Tip: Ensure that the filesystem type is supported by the host operating system. - Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk - type: string - partition: - description: |- - partition is the partition in the volume that you want to mount. - If omitted, the default is to mount by volume name. - Examples: For volume /dev/sda1, you specify the partition as "1". - Similarly, the volume partition for /dev/sda is "0" (or you can leave the property empty). - More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk - format: int32 - type: integer - pdName: - description: |- - pdName is unique name of the PD resource in GCE. Used to identify the disk in GCE. - More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk - type: string - readOnly: - description: |- - readOnly here will force the ReadOnly setting in VolumeMounts. - Defaults to false. - More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk - type: boolean - required: - - pdName - type: object - gitRepo: - description: |- - gitRepo represents a git repository at a particular revision. - DEPRECATED: GitRepo is deprecated. To provision a container with a git repo, mount an - EmptyDir into an InitContainer that clones the repo using git, then mount the EmptyDir - into the Pod's container. - properties: - directory: - description: |- - directory is the target directory name. - Must not contain or start with '..'. If '.' is supplied, the volume directory will be the - git repository. Otherwise, if specified, the volume will contain the git repository in - the subdirectory with the given name. - type: string - repository: - description: repository is the URL - type: string - revision: - description: revision is the commit hash for - the specified revision. - type: string - required: - - repository - type: object - glusterfs: - description: |- - glusterfs represents a Glusterfs mount on the host that shares a pod's lifetime. - More info: https://examples.k8s.io/volumes/glusterfs/README.md - properties: - endpoints: - description: |- - endpoints is the endpoint name that details Glusterfs topology. - More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod - type: string - path: - description: |- - path is the Glusterfs volume path. - More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod - type: string - readOnly: - description: |- - readOnly here will force the Glusterfs volume to be mounted with read-only permissions. - Defaults to false. - More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod - type: boolean - required: - - endpoints - - path - type: object - hostPath: - description: |- - hostPath represents a pre-existing file or directory on the host - machine that is directly exposed to the container. This is generally - used for system agents or other privileged things that are allowed - to see the host machine. Most containers will NOT need this. - More info: https://kubernetes.io/docs/concepts/storage/volumes#hostpath - properties: - path: - description: |- - path of the directory on the host. - If the path is a symlink, it will follow the link to the real path. - More info: https://kubernetes.io/docs/concepts/storage/volumes#hostpath - type: string - type: - description: |- - type for HostPath Volume - Defaults to "" - More info: https://kubernetes.io/docs/concepts/storage/volumes#hostpath - type: string - required: - - path - type: object - image: - description: |- - image represents an OCI object (a container image or artifact) pulled and mounted on the kubelet's host machine. - The volume is resolved at pod startup depending on which PullPolicy value is provided: - - - Always: the kubelet always attempts to pull the reference. Container creation will fail If the pull fails. - - Never: the kubelet never pulls the reference and only uses a local image or artifact. - properties: - pullPolicy: - description: |- - Policy for pulling OCI objects. Possible values are: - Always: the kubelet always attempts to pull the reference. Container creation will fail If the pull fails. - Never: the kubelet never pulls the reference and only uses a local image or artifact. Container creation will fail if the reference isn't present. - IfNotPresent: the kubelet pulls if the reference isn't already present on disk. - type: string - reference: - description: |- - Required: Image or artifact reference to be used. - Behaves in the same way as pod.spec.containers[*].image. - Pull secrets will be assembled in the same way as for the container image by looking up node credentials, SA image pull secrets, and pod spec image pull secrets. - More info: https://kubernetes. - type: string - type: object - iscsi: - description: |- - iscsi represents an ISCSI Disk resource that is attached to a - kubelet's host machine and then exposed to the pod. - More info: https://examples.k8s.io/volumes/iscsi/README.md - properties: - chapAuthDiscovery: - description: chapAuthDiscovery defines whether - support iSCSI Discovery CHAP authentication - type: boolean - chapAuthSession: - description: chapAuthSession defines whether - support iSCSI Session CHAP authentication - type: boolean - fsType: - description: |- - fsType is the filesystem type of the volume that you want to mount. - Tip: Ensure that the filesystem type is supported by the host operating system. - Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - More info: https://kubernetes.io/docs/concepts/storage/volumes#iscsi - type: string - initiatorName: - description: |- - initiatorName is the custom iSCSI Initiator Name. - If initiatorName is specified with iscsiInterface simultaneously, new iSCSI interface - : will be created for the connection. - type: string - iqn: - description: iqn is the target iSCSI Qualified - Name. - type: string - iscsiInterface: - default: default - description: |- - iscsiInterface is the interface Name that uses an iSCSI transport. - Defaults to 'default' (tcp). - type: string - lun: - description: lun represents iSCSI Target Lun - number. - format: int32 - type: integer - portals: - description: |- - portals is the iSCSI Target Portal List. The portal is either an IP or ip_addr:port if the port - is other than default (typically TCP ports 860 and 3260). - items: - type: string - type: array - x-kubernetes-list-type: atomic - readOnly: - description: |- - readOnly here will force the ReadOnly setting in VolumeMounts. - Defaults to false. - type: boolean - secretRef: - description: secretRef is the CHAP Secret - for iSCSI target and initiator authentication - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - type: object - x-kubernetes-map-type: atomic - targetPortal: - description: |- - targetPortal is iSCSI Target Portal. The Portal is either an IP or ip_addr:port if the port - is other than default (typically TCP ports 860 and 3260). - type: string - required: - - iqn - - lun - - targetPortal - type: object - name: - description: |- - name of the volume. - Must be a DNS_LABEL and unique within the pod. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - nfs: - description: |- - nfs represents an NFS mount on the host that shares a pod's lifetime - More info: https://kubernetes.io/docs/concepts/storage/volumes#nfs - properties: - path: - description: |- - path that is exported by the NFS server. - More info: https://kubernetes.io/docs/concepts/storage/volumes#nfs - type: string - readOnly: - description: |- - readOnly here will force the NFS export to be mounted with read-only permissions. - Defaults to false. - More info: https://kubernetes.io/docs/concepts/storage/volumes#nfs - type: boolean - server: - description: |- - server is the hostname or IP address of the NFS server. - More info: https://kubernetes.io/docs/concepts/storage/volumes#nfs - type: string - required: - - path - - server - type: object - persistentVolumeClaim: - description: |- - persistentVolumeClaimVolumeSource represents a reference to a - PersistentVolumeClaim in the same namespace. - More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#persistentvolumeclaims - properties: - claimName: - description: |- - claimName is the name of a PersistentVolumeClaim in the same namespace as the pod using this volume. - More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#persistentvolumeclaims - type: string - readOnly: - description: |- - readOnly Will force the ReadOnly setting in VolumeMounts. - Default false. - type: boolean - required: - - claimName - type: object - photonPersistentDisk: - description: photonPersistentDisk represents a - PhotonController persistent disk attached and - mounted on kubelets host machine - properties: - fsType: - description: |- - fsType is the filesystem type to mount. - Must be a filesystem type supported by the host operating system. - Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - type: string - pdID: - description: pdID is the ID that identifies - Photon Controller persistent disk - type: string - required: - - pdID - type: object - portworxVolume: - description: portworxVolume represents a portworx - volume attached and mounted on kubelets host - machine - properties: - fsType: - description: |- - fSType represents the filesystem type to mount - Must be a filesystem type supported by the host operating system. - Ex. "ext4", "xfs". Implicitly inferred to be "ext4" if unspecified. - type: string - readOnly: - description: |- - readOnly defaults to false (read/write). ReadOnly here will force - the ReadOnly setting in VolumeMounts. - type: boolean - volumeID: - description: volumeID uniquely identifies - a Portworx volume - type: string - required: - - volumeID - type: object - projected: - description: projected items for all in one resources - secrets, configmaps, and downward API - properties: - defaultMode: - description: |- - defaultMode are the mode bits used to set permissions on created files by default. - Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. - YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. - Directories within the path are not affected by this setting. - format: int32 - type: integer - sources: - description: |- - sources is the list of volume projections. Each entry in this list - handles one source. - items: - description: |- - Projection that may be projected along with other supported volume types. - Exactly one of these fields must be set. - properties: - clusterTrustBundle: - description: |- - ClusterTrustBundle allows a pod to access the `.spec.trustBundle` field - of ClusterTrustBundle objects in an auto-updating file. - - Alpha, gated by the ClusterTrustBundleProjection feature gate. - - ClusterTrustBundle objects can either be selected by name, or by the - combination of signer name and a label selector. - properties: - labelSelector: - description: |- - Select all ClusterTrustBundles that match this label selector. Only has - effect if signerName is set. Mutually-exclusive with name. If unset, - interpreted as "match nothing". If set but empty, interpreted as "match - everything". - properties: - matchExpressions: - description: matchExpressions - is a list of label selector - requirements. The requirements - are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the - label key that the selector - applies to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - name: - description: |- - Select a single ClusterTrustBundle by object name. Mutually-exclusive - with signerName and labelSelector. - type: string - optional: - description: |- - If true, don't block pod startup if the referenced ClusterTrustBundle(s) - aren't available. If using name, then the named ClusterTrustBundle is - allowed not to exist. If using signerName, then the combination of - signerName and labelSelector is allowed to match zero - ClusterTrustBundles. - type: boolean - path: - description: Relative path from - the volume root to write the bundle. - type: string - signerName: - description: |- - Select all ClusterTrustBundles that match this signer name. - Mutually-exclusive with name. The contents of all selected - ClusterTrustBundles will be unified and deduplicated. - type: string - required: - - path - type: object - configMap: - description: configMap information about - the configMap data to project - properties: - items: - description: |- - items if unspecified, each key-value pair in the Data field of the referenced - ConfigMap will be projected into the volume as a file whose name is the - key and content is the value. If specified, the listed keys will be - projected into the specified paths, and unlisted keys will not be - present. - items: - description: Maps a string key - to a path within a volume. - properties: - key: - description: key is the key - to project. - type: string - mode: - description: |- - mode is Optional: mode bits used to set permissions on this file. - Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. - YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. - If not specified, the volume defaultMode will be used. - format: int32 - type: integer - path: - description: |- - path is the relative path of the file to map the key to. - May not be an absolute path. - May not contain the path element '..'. - May not start with the string '..'. - type: string - required: - - key - - path - type: object - type: array - x-kubernetes-list-type: atomic - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: optional specify whether - the ConfigMap or its keys must - be defined - type: boolean - type: object - x-kubernetes-map-type: atomic - downwardAPI: - description: downwardAPI information - about the downwardAPI data to project - properties: - items: - description: Items is a list of - DownwardAPIVolume file - items: - description: DownwardAPIVolumeFile - represents information to create - the file containing the pod - field - properties: - fieldRef: - description: 'Required: Selects - a field of the pod: only - annotations, labels, name, - namespace and uid are supported.' - properties: - apiVersion: - description: Version of - the schema the FieldPath - is written in terms - of, defaults to "v1". - type: string - fieldPath: - description: Path of the - field to select in the - specified API version. - type: string - required: - - fieldPath - type: object - x-kubernetes-map-type: atomic - mode: - description: |- - Optional: mode bits used to set permissions on this file, must be an octal value - between 0000 and 0777 or a decimal value between 0 and 511. - YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. - If not specified, the volume defaultMode will be used. - format: int32 - type: integer - path: - description: 'Required: Path - is the relative path name - of the file to be created. - Must not be absolute or - contain the ''..'' path. - Must be utf-8 encoded. The - first item of the relative - path must not start with - ''..''' - type: string - resourceFieldRef: - description: |- - Selects a resource of the container: only resources limits and requests - (limits.cpu, limits.memory, requests.cpu and requests.memory) are currently supported. - properties: - containerName: - description: 'Container - name: required for volumes, - optional for env vars' - type: string - divisor: - anyOf: - - type: integer - - type: string - description: Specifies - the output format of - the exposed resources, - defaults to "1" - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - resource: - description: 'Required: - resource to select' - type: string - required: - - resource - type: object - x-kubernetes-map-type: atomic - required: - - path - type: object - type: array - x-kubernetes-list-type: atomic - type: object - secret: - description: secret information about - the secret data to project - properties: - items: - description: |- - items if unspecified, each key-value pair in the Data field of the referenced - Secret will be projected into the volume as a file whose name is the - key and content is the value. If specified, the listed keys will be - projected into the specified paths, and unlisted keys will not be - present. - items: - description: Maps a string key - to a path within a volume. - properties: - key: - description: key is the key - to project. - type: string - mode: - description: |- - mode is Optional: mode bits used to set permissions on this file. - Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. - YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. - If not specified, the volume defaultMode will be used. - format: int32 - type: integer - path: - description: |- - path is the relative path of the file to map the key to. - May not be an absolute path. - May not contain the path element '..'. - May not start with the string '..'. - type: string - required: - - key - - path - type: object - type: array - x-kubernetes-list-type: atomic - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: optional field specify - whether the Secret or its key - must be defined - type: boolean - type: object - x-kubernetes-map-type: atomic - serviceAccountToken: - description: serviceAccountToken is - information about the serviceAccountToken - data to project - properties: - audience: - description: |- - audience is the intended audience of the token. A recipient of a token - must identify itself with an identifier specified in the audience of the - token, and otherwise should reject the token. The audience defaults to the - identifier of the apiserver. - type: string - expirationSeconds: - description: |- - expirationSeconds is the requested duration of validity of the service - account token. As the token approaches expiration, the kubelet volume - plugin will proactively rotate the service account token. The kubelet will - start trying to rotate the token if the token is older than 80 percent of - its time to live or if the token is older than 24 hours.Defaults to 1 hour - and must be at least 10 minutes. - format: int64 - type: integer - path: - description: |- - path is the path relative to the mount point of the file to project the - token into. - type: string - required: - - path - type: object - type: object - type: array - x-kubernetes-list-type: atomic - type: object - quobyte: - description: quobyte represents a Quobyte mount - on the host that shares a pod's lifetime - properties: - group: - description: |- - group to map volume access to - Default is no group - type: string - readOnly: - description: |- - readOnly here will force the Quobyte volume to be mounted with read-only permissions. - Defaults to false. - type: boolean - registry: - description: |- - registry represents a single or multiple Quobyte Registry services - specified as a string as host:port pair (multiple entries are separated with commas) - which acts as the central registry for volumes - type: string - tenant: - description: |- - tenant owning the given Quobyte volume in the Backend - Used with dynamically provisioned Quobyte volumes, value is set by the plugin - type: string - user: - description: |- - user to map volume access to - Defaults to serivceaccount user - type: string - volume: - description: volume is a string that references - an already created Quobyte volume by name. - type: string - required: - - registry - - volume - type: object - rbd: - description: |- - rbd represents a Rados Block Device mount on the host that shares a pod's lifetime. - More info: https://examples.k8s.io/volumes/rbd/README.md - properties: - fsType: - description: |- - fsType is the filesystem type of the volume that you want to mount. - Tip: Ensure that the filesystem type is supported by the host operating system. - Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - More info: https://kubernetes.io/docs/concepts/storage/volumes#rbd - type: string - image: - description: |- - image is the rados image name. - More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it - type: string - keyring: - default: /etc/ceph/keyring - description: |- - keyring is the path to key ring for RBDUser. - Default is /etc/ceph/keyring. - More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it - type: string - monitors: - description: |- - monitors is a collection of Ceph monitors. - More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it - items: - type: string - type: array - x-kubernetes-list-type: atomic - pool: - default: rbd - description: |- - pool is the rados pool name. - Default is rbd. - More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it - type: string - readOnly: - description: |- - readOnly here will force the ReadOnly setting in VolumeMounts. - Defaults to false. - More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it - type: boolean - secretRef: - description: |- - secretRef is name of the authentication secret for RBDUser. If provided - overrides keyring. - Default is nil. - More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - type: object - x-kubernetes-map-type: atomic - user: - default: admin - description: |- - user is the rados user name. - Default is admin. - More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it - type: string - required: - - image - - monitors - type: object - scaleIO: - description: scaleIO represents a ScaleIO persistent - volume attached and mounted on Kubernetes nodes. - properties: - fsType: - default: xfs - description: |- - fsType is the filesystem type to mount. - Must be a filesystem type supported by the host operating system. - Ex. "ext4", "xfs", "ntfs". - Default is "xfs". - type: string - gateway: - description: gateway is the host address of - the ScaleIO API Gateway. - type: string - protectionDomain: - description: protectionDomain is the name - of the ScaleIO Protection Domain for the - configured storage. - type: string - readOnly: - description: |- - readOnly Defaults to false (read/write). ReadOnly here will force - the ReadOnly setting in VolumeMounts. - type: boolean - secretRef: - description: |- - secretRef references to the secret for ScaleIO user and other - sensitive information. If this is not provided, Login operation will fail. - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - type: object - x-kubernetes-map-type: atomic - sslEnabled: - description: sslEnabled Flag enable/disable - SSL communication with Gateway, default - false - type: boolean - storageMode: - default: ThinProvisioned - description: |- - storageMode indicates whether the storage for a volume should be ThickProvisioned or ThinProvisioned. - Default is ThinProvisioned. - type: string - storagePool: - description: storagePool is the ScaleIO Storage - Pool associated with the protection domain. - type: string - system: - description: system is the name of the storage - system as configured in ScaleIO. - type: string - volumeName: - description: |- - volumeName is the name of a volume already created in the ScaleIO system - that is associated with this volume source. - type: string - required: - - gateway - - secretRef - - system - type: object - secret: - description: |- - secret represents a secret that should populate this volume. - More info: https://kubernetes.io/docs/concepts/storage/volumes#secret - properties: - defaultMode: - description: |- - defaultMode is Optional: mode bits used to set permissions on created files by default. - Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. - YAML accepts both octal and decimal values, JSON requires decimal values - for mode bits. Defaults to 0644. - Directories within the path are not affected by this setting. - format: int32 - type: integer - items: - description: |- - items If unspecified, each key-value pair in the Data field of the referenced - Secret will be projected into the volume as a file whose name is the - key and content is the value. If specified, the listed keys will be - projected into the specified paths, and unlisted keys will not be - present. - items: - description: Maps a string key to a path - within a volume. - properties: - key: - description: key is the key to project. - type: string - mode: - description: |- - mode is Optional: mode bits used to set permissions on this file. - Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. - YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. - If not specified, the volume defaultMode will be used. - format: int32 - type: integer - path: - description: |- - path is the relative path of the file to map the key to. - May not be an absolute path. - May not contain the path element '..'. - May not start with the string '..'. - type: string - required: - - key - - path - type: object - type: array - x-kubernetes-list-type: atomic - optional: - description: optional field specify whether - the Secret or its keys must be defined - type: boolean - secretName: - description: |- - secretName is the name of the secret in the pod's namespace to use. - More info: https://kubernetes.io/docs/concepts/storage/volumes#secret - type: string - type: object - storageos: - description: storageOS represents a StorageOS - volume attached and mounted on Kubernetes nodes. - properties: - fsType: - description: |- - fsType is the filesystem type to mount. - Must be a filesystem type supported by the host operating system. - Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - type: string - readOnly: - description: |- - readOnly defaults to false (read/write). ReadOnly here will force - the ReadOnly setting in VolumeMounts. - type: boolean - secretRef: - description: |- - secretRef specifies the secret to use for obtaining the StorageOS API - credentials. If not specified, default values will be attempted. - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - type: object - x-kubernetes-map-type: atomic - volumeName: - description: |- - volumeName is the human-readable name of the StorageOS volume. Volume - names are only unique within a namespace. - type: string - volumeNamespace: - description: |- - volumeNamespace specifies the scope of the volume within StorageOS. If no - namespace is specified then the Pod's namespace will be used. This allows the - Kubernetes name scoping to be mirrored within StorageOS for tighter integration. - Set VolumeName to any name to override the default behaviour. - Set to "default" if you are not using namespaces within StorageOS. - type: string - type: object - vsphereVolume: - description: vsphereVolume represents a vSphere - volume attached and mounted on kubelets host - machine - properties: - fsType: - description: |- - fsType is filesystem type to mount. - Must be a filesystem type supported by the host operating system. - Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - type: string - storagePolicyID: - description: storagePolicyID is the storage - Policy Based Management (SPBM) profile ID - associated with the StoragePolicyName. - type: string - storagePolicyName: - description: storagePolicyName is the storage - Policy Based Management (SPBM) profile name. - type: string - volumePath: - description: volumePath is the path that identifies - vSphere volume vmdk - type: string - required: - - volumePath - type: object - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - required: - - containers - type: object - type: object - type: object - description: |- - A map of PaddleReplicaType (type) to ReplicaSpec (value). Specifies the Paddle cluster configuration. - For example, - { - "Master": PaddleReplicaSpec, - "Worker": PaddleReplicaSpec, - } - type: object - runPolicy: - description: |- - RunPolicy encapsulates various runtime policies of the distributed training - job, for example how to clean up resources and how long the job can stay - active. - properties: - activeDeadlineSeconds: - description: |- - Specifies the duration in seconds relative to the startTime that the job may be active - before the system tries to terminate it; value must be positive integer. - format: int64 - type: integer - backoffLimit: - description: Optional number of retries before marking this job - failed. - format: int32 - type: integer - cleanPodPolicy: - description: |- - CleanPodPolicy defines the policy to kill pods after the job completes. - Default to None. - type: string - managedBy: - description: |- - ManagedBy is used to indicate the controller or entity that manages a job. - The value must be either an empty, 'kubeflow.org/training-operator' or - 'kueue.x-k8s.io/multikueue'. - The training-operator reconciles a job which doesn't have this - field at all or the field value is the reserved string - 'kubeflow.org/training-operator', but delegates reconciling the job - with 'kueue.x-k8s. - type: string - schedulingPolicy: - description: SchedulingPolicy defines the policy related to scheduling, - e.g. gang-scheduling - properties: - minAvailable: - format: int32 - type: integer - minResources: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - type: object - priorityClass: - type: string - queue: - type: string - x-kubernetes-validations: - - message: spec.runPolicy.schedulingPolicy.queue is immutable - rule: self == oldSelf - scheduleTimeoutSeconds: - format: int32 - type: integer - type: object - suspend: - default: false - description: |- - suspend specifies whether the Job controller should create Pods or not. - If a Job is created with suspend set to true, no Pods are created by - the Job controller. If a Job is suspended after creation (i.e. the - flag goes from false to true), the Job controller will delete all - active Pods and PodGroups associated with this Job. - Users must design their workload to gracefully handle this. - type: boolean - ttlSecondsAfterFinished: - description: |- - TTLSecondsAfterFinished is the TTL to clean up jobs. - It may take extra ReconcilePeriod seconds for the cleanup, since - reconcile gets called periodically. - Default to infinite. - format: int32 - type: integer - type: object - required: - - paddleReplicaSpecs - type: object - status: - description: |- - Most recently observed status of the PaddleJob. - Read-only (modified by the system). - properties: - completionTime: - description: |- - Represents time when the job was completed. It is not guaranteed to - be set in happens-before order across separate operations. - It is represented in RFC3339 form and is in UTC. - format: date-time - type: string - conditions: - description: Conditions is an array of current observed job conditions. - items: - description: JobCondition describes the state of the job at a certain - point. - properties: - lastTransitionTime: - description: Last time the condition transitioned from one status - to another. - format: date-time - type: string - lastUpdateTime: - description: The last time this condition was updated. - format: date-time - type: string - message: - description: A human readable message indicating details about - the transition. - type: string - reason: - description: The reason for the condition's last transition. - type: string - status: - description: Status of the condition, one of True, False, Unknown. - type: string - type: - description: Type of job condition. - type: string - required: - - status - - type - type: object - type: array - lastReconcileTime: - description: |- - Represents last time when the job was reconciled. It is not guaranteed to - be set in happens-before order across separate operations. - It is represented in RFC3339 form and is in UTC. - format: date-time - type: string - replicaStatuses: - additionalProperties: - description: ReplicaStatus represents the current observed state - of the replica. - properties: - active: - description: The number of actively running pods. - format: int32 - type: integer - failed: - description: The number of pods which reached phase Failed. - format: int32 - type: integer - labelSelector: - description: 'Deprecated: Use Selector instead' - properties: - matchExpressions: - description: matchExpressions is a list of label selector - requirements. The requirements are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the label key that the selector - applies to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - selector: - description: |- - A Selector is a label query over a set of resources. The result of matchLabels and - matchExpressions are ANDed. An empty Selector matches all objects. A null - Selector matches no objects. - type: string - succeeded: - description: The number of pods which reached phase Succeeded. - format: int32 - type: integer - type: object - description: |- - ReplicaStatuses is map of ReplicaType and ReplicaStatus, - specifies the status of each replica. - type: object - startTime: - description: |- - Represents time when the job was acknowledged by the job controller. - It is not guaranteed to be set in happens-before order across separate operations. - It is represented in RFC3339 form and is in UTC. - format: date-time - type: string - type: object - type: object - served: true - storage: true - subresources: - scale: - labelSelectorPath: .status.replicaStatuses.Worker.selector - specReplicasPath: .spec.paddleReplicaSpecs.Worker.replicas - statusReplicasPath: .status.replicaStatuses.Worker.active - status: {} diff --git a/manifests/base/crds/kubeflow.org_pytorchjobs.yaml b/manifests/base/crds/kubeflow.org_pytorchjobs.yaml deleted file mode 100644 index 1f8c8bc873..0000000000 --- a/manifests/base/crds/kubeflow.org_pytorchjobs.yaml +++ /dev/null @@ -1,8431 +0,0 @@ ---- -apiVersion: apiextensions.k8s.io/v1 -kind: CustomResourceDefinition -metadata: - annotations: - controller-gen.kubebuilder.io/version: v0.16.5 - name: pytorchjobs.kubeflow.org -spec: - group: kubeflow.org - names: - kind: PyTorchJob - listKind: PyTorchJobList - plural: pytorchjobs - singular: pytorchjob - scope: Namespaced - versions: - - additionalPrinterColumns: - - jsonPath: .status.conditions[-1:].type - name: State - type: string - - jsonPath: .metadata.creationTimestamp - name: Age - type: date - name: v1 - schema: - openAPIV3Schema: - description: PyTorchJob Represents a PyTorchJob resource. - properties: - apiVersion: - description: |- - APIVersion defines the versioned schema of this representation of an object. - Servers should convert recognized schemas to the latest internal value, and - may reject unrecognized values. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources - type: string - kind: - description: |- - Kind is a string value representing the REST resource this object represents. - Servers may infer this from the endpoint the client submits requests to. - Cannot be updated. - In CamelCase. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds - type: string - metadata: - type: object - spec: - description: Specification of the desired state of the PyTorchJob. - properties: - elasticPolicy: - properties: - maxReplicas: - description: upper limit for the number of pods that can be set - by the autoscaler; cannot be smaller than MinReplicas, defaults - to null. - format: int32 - type: integer - maxRestarts: - format: int32 - type: integer - metrics: - description: |- - Metrics contains the specifications which are used to calculate the - desired replica count (the maximum replica count across all metrics will - be used). The desired replica count is calculated with multiplying the - ratio between the target value and the current value by the current - number of pods. Ergo, metrics used must decrease as the pod count is - increased, and vice-versa. - items: - description: |- - MetricSpec specifies how to scale based on a single metric - (only `type` and one other matching field should be set at once). - properties: - containerResource: - description: |- - containerResource refers to a resource metric (such as those specified in - requests and limits) known to Kubernetes describing a single container in - each pod of the current scale target (e.g. CPU or memory). Such metrics are - built in to Kubernetes, and have special scaling options on top of those - available to normal per-pod metrics using the "pods" source. - properties: - container: - description: container is the name of the container - in the pods of the scaling target - type: string - name: - description: name is the name of the resource in question. - type: string - target: - description: target specifies the target value for the - given metric - properties: - averageUtilization: - description: |- - averageUtilization is the target value of the average of the - resource metric across all relevant pods, represented as a percentage of - the requested value of the resource for the pods. - Currently only valid for Resource metric source type - format: int32 - type: integer - averageValue: - anyOf: - - type: integer - - type: string - description: |- - averageValue is the target value of the average of the - metric across all relevant pods (as a quantity) - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - type: - description: type represents whether the metric - type is Utilization, Value, or AverageValue - type: string - value: - anyOf: - - type: integer - - type: string - description: value is the target value of the metric - (as a quantity). - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - required: - - type - type: object - required: - - container - - name - - target - type: object - external: - description: |- - external refers to a global metric that is not associated - with any Kubernetes object. It allows autoscaling based on information - coming from components running outside of cluster - (for example length of queue in cloud messaging service, or - QPS from loadbalancer running outside of cluster). - properties: - metric: - description: metric identifies the target metric by - name and selector - properties: - name: - description: name is the name of the given metric - type: string - selector: - description: |- - selector is the string-encoded form of a standard kubernetes label selector for the given metric - When set, it is passed as an additional parameter to the metrics server for more specific metrics scoping. - When unset, just the metricName will be used to gather metrics. - properties: - matchExpressions: - description: matchExpressions is a list of label - selector requirements. The requirements are - ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the label key that - the selector applies to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - required: - - name - type: object - target: - description: target specifies the target value for the - given metric - properties: - averageUtilization: - description: |- - averageUtilization is the target value of the average of the - resource metric across all relevant pods, represented as a percentage of - the requested value of the resource for the pods. - Currently only valid for Resource metric source type - format: int32 - type: integer - averageValue: - anyOf: - - type: integer - - type: string - description: |- - averageValue is the target value of the average of the - metric across all relevant pods (as a quantity) - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - type: - description: type represents whether the metric - type is Utilization, Value, or AverageValue - type: string - value: - anyOf: - - type: integer - - type: string - description: value is the target value of the metric - (as a quantity). - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - required: - - type - type: object - required: - - metric - - target - type: object - object: - description: |- - object refers to a metric describing a single kubernetes object - (for example, hits-per-second on an Ingress object). - properties: - describedObject: - description: describedObject specifies the descriptions - of a object,such as kind,name apiVersion - properties: - apiVersion: - description: apiVersion is the API version of the - referent - type: string - kind: - description: 'kind is the kind of the referent; - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' - type: string - name: - description: 'name is the name of the referent; - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' - type: string - required: - - kind - - name - type: object - metric: - description: metric identifies the target metric by - name and selector - properties: - name: - description: name is the name of the given metric - type: string - selector: - description: |- - selector is the string-encoded form of a standard kubernetes label selector for the given metric - When set, it is passed as an additional parameter to the metrics server for more specific metrics scoping. - When unset, just the metricName will be used to gather metrics. - properties: - matchExpressions: - description: matchExpressions is a list of label - selector requirements. The requirements are - ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the label key that - the selector applies to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - required: - - name - type: object - target: - description: target specifies the target value for the - given metric - properties: - averageUtilization: - description: |- - averageUtilization is the target value of the average of the - resource metric across all relevant pods, represented as a percentage of - the requested value of the resource for the pods. - Currently only valid for Resource metric source type - format: int32 - type: integer - averageValue: - anyOf: - - type: integer - - type: string - description: |- - averageValue is the target value of the average of the - metric across all relevant pods (as a quantity) - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - type: - description: type represents whether the metric - type is Utilization, Value, or AverageValue - type: string - value: - anyOf: - - type: integer - - type: string - description: value is the target value of the metric - (as a quantity). - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - required: - - type - type: object - required: - - describedObject - - metric - - target - type: object - pods: - description: |- - pods refers to a metric describing each pod in the current scale target - (for example, transactions-processed-per-second). The values will be - averaged together before being compared to the target value. - properties: - metric: - description: metric identifies the target metric by - name and selector - properties: - name: - description: name is the name of the given metric - type: string - selector: - description: |- - selector is the string-encoded form of a standard kubernetes label selector for the given metric - When set, it is passed as an additional parameter to the metrics server for more specific metrics scoping. - When unset, just the metricName will be used to gather metrics. - properties: - matchExpressions: - description: matchExpressions is a list of label - selector requirements. The requirements are - ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the label key that - the selector applies to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - required: - - name - type: object - target: - description: target specifies the target value for the - given metric - properties: - averageUtilization: - description: |- - averageUtilization is the target value of the average of the - resource metric across all relevant pods, represented as a percentage of - the requested value of the resource for the pods. - Currently only valid for Resource metric source type - format: int32 - type: integer - averageValue: - anyOf: - - type: integer - - type: string - description: |- - averageValue is the target value of the average of the - metric across all relevant pods (as a quantity) - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - type: - description: type represents whether the metric - type is Utilization, Value, or AverageValue - type: string - value: - anyOf: - - type: integer - - type: string - description: value is the target value of the metric - (as a quantity). - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - required: - - type - type: object - required: - - metric - - target - type: object - resource: - description: |- - resource refers to a resource metric (such as those specified in - requests and limits) known to Kubernetes describing each pod in the - current scale target (e.g. CPU or memory). Such metrics are built in to - Kubernetes, and have special scaling options on top of those available - to normal per-pod metrics using the "pods" source. - properties: - name: - description: name is the name of the resource in question. - type: string - target: - description: target specifies the target value for the - given metric - properties: - averageUtilization: - description: |- - averageUtilization is the target value of the average of the - resource metric across all relevant pods, represented as a percentage of - the requested value of the resource for the pods. - Currently only valid for Resource metric source type - format: int32 - type: integer - averageValue: - anyOf: - - type: integer - - type: string - description: |- - averageValue is the target value of the average of the - metric across all relevant pods (as a quantity) - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - type: - description: type represents whether the metric - type is Utilization, Value, or AverageValue - type: string - value: - anyOf: - - type: integer - - type: string - description: value is the target value of the metric - (as a quantity). - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - required: - - type - type: object - required: - - name - - target - type: object - type: - description: |- - type is the type of metric source. It should be one of "ContainerResource", "External", - "Object", "Pods" or "Resource", each mapping to a matching field in the object. - Note: "ContainerResource" type is available on when the feature-gate - HPAContainerMetrics is enabled - type: string - required: - - type - type: object - type: array - minReplicas: - description: |- - minReplicas is the lower limit for the number of replicas to which the training job - can scale down. It defaults to null. - format: int32 - type: integer - nProcPerNode: - description: |- - Number of workers per node; supported values: [auto, cpu, gpu, int]. - Deprecated: This API is deprecated in v1.7+ - Use .spec.nprocPerNode instead. - format: int32 - type: integer - rdzvBackend: - type: string - rdzvConf: - description: RDZVConf contains additional rendezvous configuration - (=,=,...). - items: - properties: - key: - type: string - value: - type: string - type: object - type: array - rdzvHost: - type: string - rdzvId: - type: string - rdzvPort: - format: int32 - type: integer - standalone: - description: |- - Start a local standalone rendezvous backend that is represented by a C10d TCP store - on port 29400. Useful when launching single-node, multi-worker job. If specified - --rdzv_backend, --rdzv_endpoint, --rdzv_id are auto-assigned; any explicitly set values - are ignored. - type: boolean - type: object - nprocPerNode: - description: |- - Number of workers per node; supported values: [auto, cpu, gpu, int]. - For more, https://github.com/pytorch/pytorch/blob/26f7f470df64d90e092081e39507e4ac751f55d6/torch/distributed/run.py#L629-L658. - Defaults to auto. - type: string - pytorchReplicaSpecs: - additionalProperties: - description: ReplicaSpec is a description of the replica - properties: - replicas: - description: |- - Replicas is the desired number of replicas of the given template. - If unspecified, defaults to 1. - format: int32 - type: integer - restartPolicy: - description: |- - Restart policy for all replicas within the job. - One of Always, OnFailure, Never and ExitCode. - Default to Never. - type: string - template: - description: |- - Template is the object that describes the pod that - will be created for this replica. RestartPolicy in PodTemplateSpec - will be overide by RestartPolicy in ReplicaSpec - properties: - metadata: - description: |- - Standard object's metadata. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#metadata - properties: - annotations: - additionalProperties: - type: string - type: object - finalizers: - items: - type: string - type: array - labels: - additionalProperties: - type: string - type: object - name: - type: string - namespace: - type: string - type: object - spec: - description: |- - Specification of the desired behavior of the pod. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#spec-and-status - properties: - activeDeadlineSeconds: - description: |- - Optional duration in seconds the pod may be active on the node relative to - StartTime before the system will actively try to mark it failed and kill associated containers. - Value must be a positive integer. - format: int64 - type: integer - affinity: - description: If specified, the pod's scheduling constraints - properties: - nodeAffinity: - description: Describes node affinity scheduling - rules for the pod. - properties: - preferredDuringSchedulingIgnoredDuringExecution: - description: |- - The scheduler will prefer to schedule pods to nodes that satisfy - the affinity expressions specified by this field, but it may choose - a node that violates one or more of the expressions. The node that is - most preferred is the one with the greatest sum of weights, i.e. - for each node that meets all of the scheduling requirements (resource - request, requiredDuringScheduling affinity expressions, etc. - items: - description: |- - An empty preferred scheduling term matches all objects with implicit weight 0 - (i.e. it's a no-op). A null preferred scheduling term matches no objects (i.e. is also a no-op). - properties: - preference: - description: A node selector term, associated - with the corresponding weight. - properties: - matchExpressions: - description: A list of node selector - requirements by node's labels. - items: - description: |- - A node selector requirement is a selector that contains values, a key, and an operator - that relates the key and values. - properties: - key: - description: The label key that - the selector applies to. - type: string - operator: - description: |- - Represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. - type: string - values: - description: |- - An array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. If the operator is Gt or Lt, the values - array must have a single element, which will be interpreted as an integer. - This array is replaced during a strategic merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchFields: - description: A list of node selector - requirements by node's fields. - items: - description: |- - A node selector requirement is a selector that contains values, a key, and an operator - that relates the key and values. - properties: - key: - description: The label key that - the selector applies to. - type: string - operator: - description: |- - Represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. - type: string - values: - description: |- - An array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. If the operator is Gt or Lt, the values - array must have a single element, which will be interpreted as an integer. - This array is replaced during a strategic merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - type: object - x-kubernetes-map-type: atomic - weight: - description: Weight associated with matching - the corresponding nodeSelectorTerm, - in the range 1-100. - format: int32 - type: integer - required: - - preference - - weight - type: object - type: array - x-kubernetes-list-type: atomic - requiredDuringSchedulingIgnoredDuringExecution: - description: |- - If the affinity requirements specified by this field are not met at - scheduling time, the pod will not be scheduled onto the node. - If the affinity requirements specified by this field cease to be met - at some point during pod execution (e.g. due to an update), the system - may or may not try to eventually evict the pod from its node. - properties: - nodeSelectorTerms: - description: Required. A list of node selector - terms. The terms are ORed. - items: - description: |- - A null or empty node selector term matches no objects. The requirements of - them are ANDed. - The TopologySelectorTerm type implements a subset of the NodeSelectorTerm. - properties: - matchExpressions: - description: A list of node selector - requirements by node's labels. - items: - description: |- - A node selector requirement is a selector that contains values, a key, and an operator - that relates the key and values. - properties: - key: - description: The label key that - the selector applies to. - type: string - operator: - description: |- - Represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. - type: string - values: - description: |- - An array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. If the operator is Gt or Lt, the values - array must have a single element, which will be interpreted as an integer. - This array is replaced during a strategic merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchFields: - description: A list of node selector - requirements by node's fields. - items: - description: |- - A node selector requirement is a selector that contains values, a key, and an operator - that relates the key and values. - properties: - key: - description: The label key that - the selector applies to. - type: string - operator: - description: |- - Represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. - type: string - values: - description: |- - An array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. If the operator is Gt or Lt, the values - array must have a single element, which will be interpreted as an integer. - This array is replaced during a strategic merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - type: object - x-kubernetes-map-type: atomic - type: array - x-kubernetes-list-type: atomic - required: - - nodeSelectorTerms - type: object - x-kubernetes-map-type: atomic - type: object - podAffinity: - description: Describes pod affinity scheduling rules - (e.g. co-locate this pod in the same node, zone, - etc. as some other pod(s)). - properties: - preferredDuringSchedulingIgnoredDuringExecution: - description: |- - The scheduler will prefer to schedule pods to nodes that satisfy - the affinity expressions specified by this field, but it may choose - a node that violates one or more of the expressions. The node that is - most preferred is the one with the greatest sum of weights, i.e. - for each node that meets all of the scheduling requirements (resource - request, requiredDuringScheduling affinity expressions, etc. - items: - description: The weights of all of the matched - WeightedPodAffinityTerm fields are added - per-node to find the most preferred node(s) - properties: - podAffinityTerm: - description: Required. A pod affinity - term, associated with the corresponding - weight. - properties: - labelSelector: - description: |- - A label query over a set of resources, in this case pods. - If it's null, this PodAffinityTerm matches with no Pods. - properties: - matchExpressions: - description: matchExpressions - is a list of label selector - requirements. The requirements - are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the - label key that the selector - applies to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - matchLabelKeys: - description: |- - MatchLabelKeys is a set of pod label keys to select which pods will - be taken into consideration. The keys are used to lookup values from the - incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` - to select the group of existing pods which pods will be taken into consideration - for the incoming pod's pod (anti) affinity. - items: - type: string - type: array - x-kubernetes-list-type: atomic - mismatchLabelKeys: - description: |- - MismatchLabelKeys is a set of pod label keys to select which pods will - be taken into consideration. The keys are used to lookup values from the - incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` - to select the group of existing pods which pods will be taken into consideration - for the incoming pod's pod (anti) affinity. - items: - type: string - type: array - x-kubernetes-list-type: atomic - namespaceSelector: - description: |- - A label query over the set of namespaces that the term applies to. - The term is applied to the union of the namespaces selected by this field - and the ones listed in the namespaces field. - null selector and null or empty namespaces list means "this pod's namespace". - An empty selector ({}) matches all namespaces. - properties: - matchExpressions: - description: matchExpressions - is a list of label selector - requirements. The requirements - are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the - label key that the selector - applies to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - namespaces: - description: |- - namespaces specifies a static list of namespace names that the term applies to. - The term is applied to the union of the namespaces listed in this field - and the ones selected by namespaceSelector. - null or empty namespaces list and null namespaceSelector means "this pod's namespace". - items: - type: string - type: array - x-kubernetes-list-type: atomic - topologyKey: - description: |- - This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching - the labelSelector in the specified namespaces, where co-located is defined as running on a node - whose value of the label with key topologyKey matches that of any node on which any of the - selected pods is running. - Empty topologyKey is not allowed. - type: string - required: - - topologyKey - type: object - weight: - description: |- - weight associated with matching the corresponding podAffinityTerm, - in the range 1-100. - format: int32 - type: integer - required: - - podAffinityTerm - - weight - type: object - type: array - x-kubernetes-list-type: atomic - requiredDuringSchedulingIgnoredDuringExecution: - description: |- - If the affinity requirements specified by this field are not met at - scheduling time, the pod will not be scheduled onto the node. - If the affinity requirements specified by this field cease to be met - at some point during pod execution (e.g. due to a pod label update), the - system may or may not try to eventually evict the pod from its node. - items: - description: |- - Defines a set of pods (namely those matching the labelSelector - relative to the given namespace(s)) that this pod should be - co-located (affinity) or not co-located (anti-affinity) with, - where co-located is defined as running on a node whose value of - the label with key matches that of any node on which - a pod of the set of pods is running - properties: - labelSelector: - description: |- - A label query over a set of resources, in this case pods. - If it's null, this PodAffinityTerm matches with no Pods. - properties: - matchExpressions: - description: matchExpressions is a - list of label selector requirements. - The requirements are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the label - key that the selector applies - to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - matchLabelKeys: - description: |- - MatchLabelKeys is a set of pod label keys to select which pods will - be taken into consideration. The keys are used to lookup values from the - incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` - to select the group of existing pods which pods will be taken into consideration - for the incoming pod's pod (anti) affinity. - items: - type: string - type: array - x-kubernetes-list-type: atomic - mismatchLabelKeys: - description: |- - MismatchLabelKeys is a set of pod label keys to select which pods will - be taken into consideration. The keys are used to lookup values from the - incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` - to select the group of existing pods which pods will be taken into consideration - for the incoming pod's pod (anti) affinity. - items: - type: string - type: array - x-kubernetes-list-type: atomic - namespaceSelector: - description: |- - A label query over the set of namespaces that the term applies to. - The term is applied to the union of the namespaces selected by this field - and the ones listed in the namespaces field. - null selector and null or empty namespaces list means "this pod's namespace". - An empty selector ({}) matches all namespaces. - properties: - matchExpressions: - description: matchExpressions is a - list of label selector requirements. - The requirements are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the label - key that the selector applies - to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - namespaces: - description: |- - namespaces specifies a static list of namespace names that the term applies to. - The term is applied to the union of the namespaces listed in this field - and the ones selected by namespaceSelector. - null or empty namespaces list and null namespaceSelector means "this pod's namespace". - items: - type: string - type: array - x-kubernetes-list-type: atomic - topologyKey: - description: |- - This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching - the labelSelector in the specified namespaces, where co-located is defined as running on a node - whose value of the label with key topologyKey matches that of any node on which any of the - selected pods is running. - Empty topologyKey is not allowed. - type: string - required: - - topologyKey - type: object - type: array - x-kubernetes-list-type: atomic - type: object - podAntiAffinity: - description: Describes pod anti-affinity scheduling - rules (e.g. avoid putting this pod in the same - node, zone, etc. as some other pod(s)). - properties: - preferredDuringSchedulingIgnoredDuringExecution: - description: |- - The scheduler will prefer to schedule pods to nodes that satisfy - the anti-affinity expressions specified by this field, but it may choose - a node that violates one or more of the expressions. The node that is - most preferred is the one with the greatest sum of weights, i.e. - items: - description: The weights of all of the matched - WeightedPodAffinityTerm fields are added - per-node to find the most preferred node(s) - properties: - podAffinityTerm: - description: Required. A pod affinity - term, associated with the corresponding - weight. - properties: - labelSelector: - description: |- - A label query over a set of resources, in this case pods. - If it's null, this PodAffinityTerm matches with no Pods. - properties: - matchExpressions: - description: matchExpressions - is a list of label selector - requirements. The requirements - are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the - label key that the selector - applies to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - matchLabelKeys: - description: |- - MatchLabelKeys is a set of pod label keys to select which pods will - be taken into consideration. The keys are used to lookup values from the - incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` - to select the group of existing pods which pods will be taken into consideration - for the incoming pod's pod (anti) affinity. - items: - type: string - type: array - x-kubernetes-list-type: atomic - mismatchLabelKeys: - description: |- - MismatchLabelKeys is a set of pod label keys to select which pods will - be taken into consideration. The keys are used to lookup values from the - incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` - to select the group of existing pods which pods will be taken into consideration - for the incoming pod's pod (anti) affinity. - items: - type: string - type: array - x-kubernetes-list-type: atomic - namespaceSelector: - description: |- - A label query over the set of namespaces that the term applies to. - The term is applied to the union of the namespaces selected by this field - and the ones listed in the namespaces field. - null selector and null or empty namespaces list means "this pod's namespace". - An empty selector ({}) matches all namespaces. - properties: - matchExpressions: - description: matchExpressions - is a list of label selector - requirements. The requirements - are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the - label key that the selector - applies to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - namespaces: - description: |- - namespaces specifies a static list of namespace names that the term applies to. - The term is applied to the union of the namespaces listed in this field - and the ones selected by namespaceSelector. - null or empty namespaces list and null namespaceSelector means "this pod's namespace". - items: - type: string - type: array - x-kubernetes-list-type: atomic - topologyKey: - description: |- - This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching - the labelSelector in the specified namespaces, where co-located is defined as running on a node - whose value of the label with key topologyKey matches that of any node on which any of the - selected pods is running. - Empty topologyKey is not allowed. - type: string - required: - - topologyKey - type: object - weight: - description: |- - weight associated with matching the corresponding podAffinityTerm, - in the range 1-100. - format: int32 - type: integer - required: - - podAffinityTerm - - weight - type: object - type: array - x-kubernetes-list-type: atomic - requiredDuringSchedulingIgnoredDuringExecution: - description: |- - If the anti-affinity requirements specified by this field are not met at - scheduling time, the pod will not be scheduled onto the node. - If the anti-affinity requirements specified by this field cease to be met - at some point during pod execution (e.g. due to a pod label update), the - system may or may not try to eventually evict the pod from its node. - items: - description: |- - Defines a set of pods (namely those matching the labelSelector - relative to the given namespace(s)) that this pod should be - co-located (affinity) or not co-located (anti-affinity) with, - where co-located is defined as running on a node whose value of - the label with key matches that of any node on which - a pod of the set of pods is running - properties: - labelSelector: - description: |- - A label query over a set of resources, in this case pods. - If it's null, this PodAffinityTerm matches with no Pods. - properties: - matchExpressions: - description: matchExpressions is a - list of label selector requirements. - The requirements are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the label - key that the selector applies - to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - matchLabelKeys: - description: |- - MatchLabelKeys is a set of pod label keys to select which pods will - be taken into consideration. The keys are used to lookup values from the - incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` - to select the group of existing pods which pods will be taken into consideration - for the incoming pod's pod (anti) affinity. - items: - type: string - type: array - x-kubernetes-list-type: atomic - mismatchLabelKeys: - description: |- - MismatchLabelKeys is a set of pod label keys to select which pods will - be taken into consideration. The keys are used to lookup values from the - incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` - to select the group of existing pods which pods will be taken into consideration - for the incoming pod's pod (anti) affinity. - items: - type: string - type: array - x-kubernetes-list-type: atomic - namespaceSelector: - description: |- - A label query over the set of namespaces that the term applies to. - The term is applied to the union of the namespaces selected by this field - and the ones listed in the namespaces field. - null selector and null or empty namespaces list means "this pod's namespace". - An empty selector ({}) matches all namespaces. - properties: - matchExpressions: - description: matchExpressions is a - list of label selector requirements. - The requirements are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the label - key that the selector applies - to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - namespaces: - description: |- - namespaces specifies a static list of namespace names that the term applies to. - The term is applied to the union of the namespaces listed in this field - and the ones selected by namespaceSelector. - null or empty namespaces list and null namespaceSelector means "this pod's namespace". - items: - type: string - type: array - x-kubernetes-list-type: atomic - topologyKey: - description: |- - This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching - the labelSelector in the specified namespaces, where co-located is defined as running on a node - whose value of the label with key topologyKey matches that of any node on which any of the - selected pods is running. - Empty topologyKey is not allowed. - type: string - required: - - topologyKey - type: object - type: array - x-kubernetes-list-type: atomic - type: object - type: object - automountServiceAccountToken: - description: AutomountServiceAccountToken indicates - whether a service account token should be automatically - mounted. - type: boolean - containers: - description: |- - List of containers belonging to the pod. - Containers cannot currently be added or removed. - There must be at least one container in a Pod. - Cannot be updated. - items: - description: A single application container that you - want to run within a pod. - properties: - args: - description: |- - Arguments to the entrypoint. - The container image's CMD is used if this is not provided. - Variable references $(VAR_NAME) are expanded using the container's environment. If a variable - cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced - to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. - items: - type: string - type: array - x-kubernetes-list-type: atomic - command: - description: |- - Entrypoint array. Not executed within a shell. - The container image's ENTRYPOINT is used if this is not provided. - Variable references $(VAR_NAME) are expanded using the container's environment. If a variable - cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced - to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. - items: - type: string - type: array - x-kubernetes-list-type: atomic - env: - description: |- - List of environment variables to set in the container. - Cannot be updated. - items: - description: EnvVar represents an environment - variable present in a Container. - properties: - name: - description: Name of the environment variable. - Must be a C_IDENTIFIER. - type: string - value: - description: |- - Variable references $(VAR_NAME) are expanded - using the previously defined environment variables in the container and - any service environment variables. If a variable cannot be resolved, - the reference in the input string will be unchanged. Double $$ are reduced - to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. - "$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)". - type: string - valueFrom: - description: Source for the environment - variable's value. Cannot be used if value - is not empty. - properties: - configMapKeyRef: - description: Selects a key of a ConfigMap. - properties: - key: - description: The key to select. - type: string - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the - ConfigMap or its key must be defined - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - fieldRef: - description: |- - Selects a field of the pod: supports metadata.name, metadata.namespace, `metadata.labels['']`, `metadata.annotations['']`, - spec.nodeName, spec.serviceAccountName, status.hostIP, status.podIP, status.podIPs. - properties: - apiVersion: - description: Version of the schema - the FieldPath is written in terms - of, defaults to "v1". - type: string - fieldPath: - description: Path of the field to - select in the specified API version. - type: string - required: - - fieldPath - type: object - x-kubernetes-map-type: atomic - resourceFieldRef: - description: |- - Selects a resource of the container: only resources limits and requests - (limits.cpu, limits.memory, limits.ephemeral-storage, requests.cpu, requests.memory and requests.ephemeral-storage) are currently supported. - properties: - containerName: - description: 'Container name: required - for volumes, optional for env - vars' - type: string - divisor: - anyOf: - - type: integer - - type: string - description: Specifies the output - format of the exposed resources, - defaults to "1" - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - resource: - description: 'Required: resource - to select' - type: string - required: - - resource - type: object - x-kubernetes-map-type: atomic - secretKeyRef: - description: Selects a key of a secret - in the pod's namespace - properties: - key: - description: The key of the secret - to select from. Must be a valid - secret key. - type: string - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the - Secret or its key must be defined - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - type: object - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - envFrom: - description: |- - List of sources to populate environment variables in the container. - The keys defined within a source must be a C_IDENTIFIER. All invalid keys - will be reported as an event when the container is starting. When a key exists in multiple - sources, the value associated with the last source will take precedence. - Values defined by an Env with a duplicate key will take precedence. - Cannot be updated. - items: - description: EnvFromSource represents the source - of a set of ConfigMaps - properties: - configMapRef: - description: The ConfigMap to select from - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the ConfigMap - must be defined - type: boolean - type: object - x-kubernetes-map-type: atomic - prefix: - description: An optional identifier to prepend - to each key in the ConfigMap. Must be - a C_IDENTIFIER. - type: string - secretRef: - description: The Secret to select from - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the Secret - must be defined - type: boolean - type: object - x-kubernetes-map-type: atomic - type: object - type: array - x-kubernetes-list-type: atomic - image: - description: |- - Container image name. - More info: https://kubernetes.io/docs/concepts/containers/images - This field is optional to allow higher level config management to default or override - container images in workload controllers like Deployments and StatefulSets. - type: string - imagePullPolicy: - description: |- - Image pull policy. - One of Always, Never, IfNotPresent. - Defaults to Always if :latest tag is specified, or IfNotPresent otherwise. - Cannot be updated. - More info: https://kubernetes.io/docs/concepts/containers/images#updating-images - type: string - lifecycle: - description: |- - Actions that the management system should take in response to container lifecycle events. - Cannot be updated. - properties: - postStart: - description: |- - PostStart is called immediately after a container is created. If the handler fails, - the container is terminated and restarted according to its restart policy. - Other management of the container blocks until the hook completes. - More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks - properties: - exec: - description: Exec specifies the action - to take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - httpGet: - description: HTTPGet specifies the http - request to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set - in the request. HTTP allows repeated - headers. - items: - description: HTTPHeader describes - a custom header to be used in - HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field - value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the - HTTP server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - sleep: - description: Sleep represents the duration - that the container should sleep before - being terminated. - properties: - seconds: - description: Seconds is the number - of seconds to sleep. - format: int64 - type: integer - required: - - seconds - type: object - tcpSocket: - description: |- - Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept - for the backward compatibility. There are no validation of this field and - lifecycle hooks will fail in runtime when tcp handler is specified. - properties: - host: - description: 'Optional: Host name - to connect to, defaults to the pod - IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - type: object - preStop: - description: |- - PreStop is called immediately before a container is terminated due to an - API request or management event such as liveness/startup probe failure, - preemption, resource contention, etc. The handler is not called if the - container crashes or exits. The Pod's termination grace period countdown begins before the - PreStop hook is executed. - properties: - exec: - description: Exec specifies the action - to take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - httpGet: - description: HTTPGet specifies the http - request to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set - in the request. HTTP allows repeated - headers. - items: - description: HTTPHeader describes - a custom header to be used in - HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field - value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the - HTTP server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - sleep: - description: Sleep represents the duration - that the container should sleep before - being terminated. - properties: - seconds: - description: Seconds is the number - of seconds to sleep. - format: int64 - type: integer - required: - - seconds - type: object - tcpSocket: - description: |- - Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept - for the backward compatibility. There are no validation of this field and - lifecycle hooks will fail in runtime when tcp handler is specified. - properties: - host: - description: 'Optional: Host name - to connect to, defaults to the pod - IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - type: object - type: object - livenessProbe: - description: |- - Periodic probe of container liveness. - Container will be restarted if the probe fails. - Cannot be updated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - properties: - exec: - description: Exec specifies the action to - take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - failureThreshold: - description: |- - Minimum consecutive failures for the probe to be considered failed after having succeeded. - Defaults to 3. Minimum value is 1. - format: int32 - type: integer - grpc: - description: GRPC specifies an action involving - a GRPC port. - properties: - port: - description: Port number of the gRPC service. - Number must be in the range 1 to 65535. - format: int32 - type: integer - service: - default: "" - description: |- - Service is the name of the service to place in the gRPC HealthCheckRequest - (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - - If this is not specified, the default behavior is defined by gRPC. - type: string - required: - - port - type: object - httpGet: - description: HTTPGet specifies the http request - to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set in - the request. HTTP allows repeated headers. - items: - description: HTTPHeader describes a - custom header to be used in HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the HTTP - server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - initialDelaySeconds: - description: |- - Number of seconds after the container has started before liveness probes are initiated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - periodSeconds: - description: |- - How often (in seconds) to perform the probe. - Default to 10 seconds. Minimum value is 1. - format: int32 - type: integer - successThreshold: - description: |- - Minimum consecutive successes for the probe to be considered successful after having failed. - Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. - format: int32 - type: integer - tcpSocket: - description: TCPSocket specifies an action - involving a TCP port. - properties: - host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - description: |- - Optional duration in seconds the pod needs to terminate gracefully upon probe failure. - The grace period is the duration in seconds after the processes running in the pod are sent - a termination signal and the time when the processes are forcibly halted with a kill signal. - Set this value longer than the expected cleanup time for your process. - format: int64 - type: integer - timeoutSeconds: - description: |- - Number of seconds after which the probe times out. - Defaults to 1 second. Minimum value is 1. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - type: object - name: - description: |- - Name of the container specified as a DNS_LABEL. - Each container in a pod must have a unique name (DNS_LABEL). - Cannot be updated. - type: string - ports: - description: |- - List of ports to expose from the container. Not specifying a port here - DOES NOT prevent that port from being exposed. Any port which is - listening on the default "0.0.0.0" address inside a container will be - accessible from the network. - Modifying this array with strategic merge patch may corrupt the data. - For more information See https://github.com/kubernetes/kubernetes/issues/108255. - items: - description: ContainerPort represents a network - port in a single container. - properties: - containerPort: - description: |- - Number of port to expose on the pod's IP address. - This must be a valid port number, 0 < x < 65536. - format: int32 - type: integer - hostIP: - description: What host IP to bind the external - port to. - type: string - hostPort: - description: |- - Number of port to expose on the host. - If specified, this must be a valid port number, 0 < x < 65536. - If HostNetwork is specified, this must match ContainerPort. - Most containers do not need this. - format: int32 - type: integer - name: - description: |- - If specified, this must be an IANA_SVC_NAME and unique within the pod. Each - named port in a pod must have a unique name. Name for the port that can be - referred to by services. - type: string - protocol: - default: TCP - description: |- - Protocol for port. Must be UDP, TCP, or SCTP. - Defaults to "TCP". - type: string - required: - - containerPort - type: object - type: array - x-kubernetes-list-map-keys: - - containerPort - - protocol - x-kubernetes-list-type: map - readinessProbe: - description: |- - Periodic probe of container service readiness. - Container will be removed from service endpoints if the probe fails. - Cannot be updated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - properties: - exec: - description: Exec specifies the action to - take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - failureThreshold: - description: |- - Minimum consecutive failures for the probe to be considered failed after having succeeded. - Defaults to 3. Minimum value is 1. - format: int32 - type: integer - grpc: - description: GRPC specifies an action involving - a GRPC port. - properties: - port: - description: Port number of the gRPC service. - Number must be in the range 1 to 65535. - format: int32 - type: integer - service: - default: "" - description: |- - Service is the name of the service to place in the gRPC HealthCheckRequest - (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - - If this is not specified, the default behavior is defined by gRPC. - type: string - required: - - port - type: object - httpGet: - description: HTTPGet specifies the http request - to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set in - the request. HTTP allows repeated headers. - items: - description: HTTPHeader describes a - custom header to be used in HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the HTTP - server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - initialDelaySeconds: - description: |- - Number of seconds after the container has started before liveness probes are initiated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - periodSeconds: - description: |- - How often (in seconds) to perform the probe. - Default to 10 seconds. Minimum value is 1. - format: int32 - type: integer - successThreshold: - description: |- - Minimum consecutive successes for the probe to be considered successful after having failed. - Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. - format: int32 - type: integer - tcpSocket: - description: TCPSocket specifies an action - involving a TCP port. - properties: - host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - description: |- - Optional duration in seconds the pod needs to terminate gracefully upon probe failure. - The grace period is the duration in seconds after the processes running in the pod are sent - a termination signal and the time when the processes are forcibly halted with a kill signal. - Set this value longer than the expected cleanup time for your process. - format: int64 - type: integer - timeoutSeconds: - description: |- - Number of seconds after which the probe times out. - Defaults to 1 second. Minimum value is 1. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - type: object - resizePolicy: - description: Resources resize policy for the container. - items: - description: ContainerResizePolicy represents - resource resize policy for the container. - properties: - resourceName: - description: |- - Name of the resource to which this resource resize policy applies. - Supported values: cpu, memory. - type: string - restartPolicy: - description: |- - Restart policy to apply when specified resource is resized. - If not specified, it defaults to NotRequired. - type: string - required: - - resourceName - - restartPolicy - type: object - type: array - x-kubernetes-list-type: atomic - resources: - description: |- - Compute Resources required by this container. - Cannot be updated. - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - properties: - claims: - description: |- - Claims lists the names of resources, defined in spec.resourceClaims, - that are used by this container. - - This is an alpha field and requires enabling the - DynamicResourceAllocation feature gate. - - This field is immutable. It can only be set for containers. - items: - description: ResourceClaim references one - entry in PodSpec.ResourceClaims. - properties: - name: - description: |- - Name must match the name of one entry in pod.spec.resourceClaims of - the Pod where this field is used. It makes that resource available - inside a container. - type: string - request: - description: |- - Request is the name chosen for a request in the referenced claim. - If empty, everything from the claim is made available, otherwise - only the result of this request. - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - limits: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: |- - Limits describes the maximum amount of compute resources allowed. - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - type: object - requests: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: |- - Requests describes the minimum amount of compute resources required. - If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, - otherwise to an implementation-defined value. Requests cannot exceed Limits. - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - type: object - type: object - restartPolicy: - description: |- - RestartPolicy defines the restart behavior of individual containers in a pod. - This field may only be set for init containers, and the only allowed value is "Always". - For non-init containers or when this field is not specified, - the restart behavior is defined by the Pod's restart policy and the container type. - type: string - securityContext: - description: |- - SecurityContext defines the security options the container should be run with. - If set, the fields of SecurityContext override the equivalent fields of PodSecurityContext. - More info: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/ - properties: - allowPrivilegeEscalation: - description: |- - AllowPrivilegeEscalation controls whether a process can gain more - privileges than its parent process. This bool directly controls if - the no_new_privs flag will be set on the container process. - AllowPrivilegeEscalation is true always when the container is: - 1) run as Privileged - 2) has CAP_SYS_ADMIN - Note that this field cannot be set when spec.os.name is windows. - type: boolean - appArmorProfile: - description: |- - appArmorProfile is the AppArmor options to use by this container. If set, this profile - overrides the pod's appArmorProfile. - Note that this field cannot be set when spec.os.name is windows. - properties: - localhostProfile: - description: |- - localhostProfile indicates a profile loaded on the node that should be used. - The profile must be preconfigured on the node to work. - Must match the loaded name of the profile. - Must be set if and only if type is "Localhost". - type: string - type: - description: |- - type indicates which kind of AppArmor profile will be applied. - Valid options are: - Localhost - a profile pre-loaded on the node. - RuntimeDefault - the container runtime's default profile. - Unconfined - no AppArmor enforcement. - type: string - required: - - type - type: object - capabilities: - description: |- - The capabilities to add/drop when running containers. - Defaults to the default set of capabilities granted by the container runtime. - Note that this field cannot be set when spec.os.name is windows. - properties: - add: - description: Added capabilities - items: - description: Capability represent POSIX - capabilities type - type: string - type: array - x-kubernetes-list-type: atomic - drop: - description: Removed capabilities - items: - description: Capability represent POSIX - capabilities type - type: string - type: array - x-kubernetes-list-type: atomic - type: object - privileged: - description: |- - Run container in privileged mode. - Processes in privileged containers are essentially equivalent to root on the host. - Defaults to false. - Note that this field cannot be set when spec.os.name is windows. - type: boolean - procMount: - description: |- - procMount denotes the type of proc mount to use for the containers. - The default value is Default which uses the container runtime defaults for - readonly paths and masked paths. - This requires the ProcMountType feature flag to be enabled. - Note that this field cannot be set when spec.os.name is windows. - type: string - readOnlyRootFilesystem: - description: |- - Whether this container has a read-only root filesystem. - Default is false. - Note that this field cannot be set when spec.os.name is windows. - type: boolean - runAsGroup: - description: |- - The GID to run the entrypoint of the container process. - Uses runtime default if unset. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is windows. - format: int64 - type: integer - runAsNonRoot: - description: |- - Indicates that the container must run as a non-root user. - If true, the Kubelet will validate the image at runtime to ensure that it - does not run as UID 0 (root) and fail to start the container if it does. - If unset or false, no such validation will be performed. - May also be set in PodSecurityContext. - type: boolean - runAsUser: - description: |- - The UID to run the entrypoint of the container process. - Defaults to user specified in image metadata if unspecified. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is windows. - format: int64 - type: integer - seLinuxOptions: - description: |- - The SELinux context to be applied to the container. - If unspecified, the container runtime will allocate a random SELinux context for each - container. May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is windows. - properties: - level: - description: Level is SELinux level label - that applies to the container. - type: string - role: - description: Role is a SELinux role label - that applies to the container. - type: string - type: - description: Type is a SELinux type label - that applies to the container. - type: string - user: - description: User is a SELinux user label - that applies to the container. - type: string - type: object - seccompProfile: - description: |- - The seccomp options to use by this container. If seccomp options are - provided at both the pod & container level, the container options - override the pod options. - Note that this field cannot be set when spec.os.name is windows. - properties: - localhostProfile: - description: |- - localhostProfile indicates a profile defined in a file on the node should be used. - The profile must be preconfigured on the node to work. - Must be a descending path, relative to the kubelet's configured seccomp profile location. - Must be set if type is "Localhost". Must NOT be set for any other type. - type: string - type: - description: |- - type indicates which kind of seccomp profile will be applied. - Valid options are: - - Localhost - a profile defined in a file on the node should be used. - RuntimeDefault - the container runtime default profile should be used. - Unconfined - no profile should be applied. - type: string - required: - - type - type: object - windowsOptions: - description: |- - The Windows specific settings applied to all containers. - If unspecified, the options from the PodSecurityContext will be used. - If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is linux. - properties: - gmsaCredentialSpec: - description: |- - GMSACredentialSpec is where the GMSA admission webhook - (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the - GMSA credential spec named by the GMSACredentialSpecName field. - type: string - gmsaCredentialSpecName: - description: GMSACredentialSpecName is - the name of the GMSA credential spec - to use. - type: string - hostProcess: - description: |- - HostProcess determines if a container should be run as a 'Host Process' container. - All of a Pod's containers must have the same effective HostProcess value - (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers). - In addition, if HostProcess is true then HostNetwork must also be set to true. - type: boolean - runAsUserName: - description: |- - The UserName in Windows to run the entrypoint of the container process. - Defaults to the user specified in image metadata if unspecified. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - type: string - type: object - type: object - startupProbe: - description: |- - StartupProbe indicates that the Pod has successfully initialized. - If specified, no other probes are executed until this completes successfully. - If this probe fails, the Pod will be restarted, just as if the livenessProbe failed. - properties: - exec: - description: Exec specifies the action to - take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - failureThreshold: - description: |- - Minimum consecutive failures for the probe to be considered failed after having succeeded. - Defaults to 3. Minimum value is 1. - format: int32 - type: integer - grpc: - description: GRPC specifies an action involving - a GRPC port. - properties: - port: - description: Port number of the gRPC service. - Number must be in the range 1 to 65535. - format: int32 - type: integer - service: - default: "" - description: |- - Service is the name of the service to place in the gRPC HealthCheckRequest - (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - - If this is not specified, the default behavior is defined by gRPC. - type: string - required: - - port - type: object - httpGet: - description: HTTPGet specifies the http request - to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set in - the request. HTTP allows repeated headers. - items: - description: HTTPHeader describes a - custom header to be used in HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the HTTP - server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - initialDelaySeconds: - description: |- - Number of seconds after the container has started before liveness probes are initiated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - periodSeconds: - description: |- - How often (in seconds) to perform the probe. - Default to 10 seconds. Minimum value is 1. - format: int32 - type: integer - successThreshold: - description: |- - Minimum consecutive successes for the probe to be considered successful after having failed. - Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. - format: int32 - type: integer - tcpSocket: - description: TCPSocket specifies an action - involving a TCP port. - properties: - host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - description: |- - Optional duration in seconds the pod needs to terminate gracefully upon probe failure. - The grace period is the duration in seconds after the processes running in the pod are sent - a termination signal and the time when the processes are forcibly halted with a kill signal. - Set this value longer than the expected cleanup time for your process. - format: int64 - type: integer - timeoutSeconds: - description: |- - Number of seconds after which the probe times out. - Defaults to 1 second. Minimum value is 1. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - type: object - stdin: - description: |- - Whether this container should allocate a buffer for stdin in the container runtime. If this - is not set, reads from stdin in the container will always result in EOF. - Default is false. - type: boolean - stdinOnce: - description: |- - Whether the container runtime should close the stdin channel after it has been opened by - a single attach. When stdin is true the stdin stream will remain open across multiple attach - sessions. - type: boolean - terminationMessagePath: - description: |- - Optional: Path at which the file to which the container's termination message - will be written is mounted into the container's filesystem. - Message written is intended to be brief final status, such as an assertion failure message. - Will be truncated by the node if greater than 4096 bytes. The total message length across - all containers will be limited to 12kb. - Defaults to /dev/termination-log. - type: string - terminationMessagePolicy: - description: |- - Indicate how the termination message should be populated. File will use the contents of - terminationMessagePath to populate the container status message on both success and failure. - FallbackToLogsOnError will use the last chunk of container log output if the termination - message file is empty and the container exited with an error. - type: string - tty: - description: |- - Whether this container should allocate a TTY for itself, also requires 'stdin' to be true. - Default is false. - type: boolean - volumeDevices: - description: volumeDevices is the list of block - devices to be used by the container. - items: - description: volumeDevice describes a mapping - of a raw block device within a container. - properties: - devicePath: - description: devicePath is the path inside - of the container that the device will - be mapped to. - type: string - name: - description: name must match the name of - a persistentVolumeClaim in the pod - type: string - required: - - devicePath - - name - type: object - type: array - x-kubernetes-list-map-keys: - - devicePath - x-kubernetes-list-type: map - volumeMounts: - description: |- - Pod volumes to mount into the container's filesystem. - Cannot be updated. - items: - description: VolumeMount describes a mounting - of a Volume within a container. - properties: - mountPath: - description: |- - Path within the container at which the volume should be mounted. Must - not contain ':'. - type: string - mountPropagation: - description: |- - mountPropagation determines how mounts are propagated from the host - to container and the other way around. - When not set, MountPropagationNone is used. - This field is beta in 1.10. - When RecursiveReadOnly is set to IfPossible or to Enabled, MountPropagation must be None or unspecified - (which defaults to None). - type: string - name: - description: This must match the Name of - a Volume. - type: string - readOnly: - description: |- - Mounted read-only if true, read-write otherwise (false or unspecified). - Defaults to false. - type: boolean - recursiveReadOnly: - description: |- - RecursiveReadOnly specifies whether read-only mounts should be handled - recursively. - - If ReadOnly is false, this field has no meaning and must be unspecified. - - If ReadOnly is true, and this field is set to Disabled, the mount is not made - recursively read-only. If this field is set to IfPossible, the mount is made - recursively read-only, if it is supported by the container runtime. - type: string - subPath: - description: |- - Path within the volume from which the container's volume should be mounted. - Defaults to "" (volume's root). - type: string - subPathExpr: - description: |- - Expanded path within the volume from which the container's volume should be mounted. - Behaves similarly to SubPath but environment variable references $(VAR_NAME) are expanded using the container's environment. - Defaults to "" (volume's root). - SubPathExpr and SubPath are mutually exclusive. - type: string - required: - - mountPath - - name - type: object - type: array - x-kubernetes-list-map-keys: - - mountPath - x-kubernetes-list-type: map - workingDir: - description: |- - Container's working directory. - If not specified, the container runtime's default will be used, which - might be configured in the container image. - Cannot be updated. - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - dnsConfig: - description: |- - Specifies the DNS parameters of a pod. - Parameters specified here will be merged to the generated DNS - configuration based on DNSPolicy. - properties: - nameservers: - description: |- - A list of DNS name server IP addresses. - This will be appended to the base nameservers generated from DNSPolicy. - Duplicated nameservers will be removed. - items: - type: string - type: array - x-kubernetes-list-type: atomic - options: - description: |- - A list of DNS resolver options. - This will be merged with the base options generated from DNSPolicy. - Duplicated entries will be removed. Resolution options given in Options - will override those that appear in the base DNSPolicy. - items: - description: PodDNSConfigOption defines DNS resolver - options of a pod. - properties: - name: - description: Required. - type: string - value: - type: string - type: object - type: array - x-kubernetes-list-type: atomic - searches: - description: |- - A list of DNS search domains for host-name lookup. - This will be appended to the base search paths generated from DNSPolicy. - Duplicated search paths will be removed. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - dnsPolicy: - description: |- - Set DNS policy for the pod. - Defaults to "ClusterFirst". - Valid values are 'ClusterFirstWithHostNet', 'ClusterFirst', 'Default' or 'None'. - DNS parameters given in DNSConfig will be merged with the policy selected with DNSPolicy. - To have DNS options set along with hostNetwork, you have to specify DNS policy - explicitly to 'ClusterFirstWithHostNet'. - type: string - enableServiceLinks: - description: |- - EnableServiceLinks indicates whether information about services should be injected into pod's - environment variables, matching the syntax of Docker links. - Optional: Defaults to true. - type: boolean - ephemeralContainers: - description: |- - List of ephemeral containers run in this pod. Ephemeral containers may be run in an existing - pod to perform user-initiated actions such as debugging. This list cannot be specified when - creating a pod, and it cannot be modified by updating the pod spec. In order to add an - ephemeral container to an existing pod, use the pod's ephemeralcontainers subresource. - items: - description: |- - An EphemeralContainer is a temporary container that you may add to an existing Pod for - user-initiated activities such as debugging. Ephemeral containers have no resource or - scheduling guarantees, and they will not be restarted when they exit or when a Pod is - removed or restarted. The kubelet may evict a Pod if an ephemeral container causes the - Pod to exceed its resource allocation. - properties: - args: - description: |- - Arguments to the entrypoint. - The image's CMD is used if this is not provided. - Variable references $(VAR_NAME) are expanded using the container's environment. If a variable - cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced - to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. "$$(VAR_NAME)" will - produce the string literal "$(VAR_NAME)". - items: - type: string - type: array - x-kubernetes-list-type: atomic - command: - description: |- - Entrypoint array. Not executed within a shell. - The image's ENTRYPOINT is used if this is not provided. - Variable references $(VAR_NAME) are expanded using the container's environment. If a variable - cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced - to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. - items: - type: string - type: array - x-kubernetes-list-type: atomic - env: - description: |- - List of environment variables to set in the container. - Cannot be updated. - items: - description: EnvVar represents an environment - variable present in a Container. - properties: - name: - description: Name of the environment variable. - Must be a C_IDENTIFIER. - type: string - value: - description: |- - Variable references $(VAR_NAME) are expanded - using the previously defined environment variables in the container and - any service environment variables. If a variable cannot be resolved, - the reference in the input string will be unchanged. Double $$ are reduced - to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. - "$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)". - type: string - valueFrom: - description: Source for the environment - variable's value. Cannot be used if value - is not empty. - properties: - configMapKeyRef: - description: Selects a key of a ConfigMap. - properties: - key: - description: The key to select. - type: string - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the - ConfigMap or its key must be defined - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - fieldRef: - description: |- - Selects a field of the pod: supports metadata.name, metadata.namespace, `metadata.labels['']`, `metadata.annotations['']`, - spec.nodeName, spec.serviceAccountName, status.hostIP, status.podIP, status.podIPs. - properties: - apiVersion: - description: Version of the schema - the FieldPath is written in terms - of, defaults to "v1". - type: string - fieldPath: - description: Path of the field to - select in the specified API version. - type: string - required: - - fieldPath - type: object - x-kubernetes-map-type: atomic - resourceFieldRef: - description: |- - Selects a resource of the container: only resources limits and requests - (limits.cpu, limits.memory, limits.ephemeral-storage, requests.cpu, requests.memory and requests.ephemeral-storage) are currently supported. - properties: - containerName: - description: 'Container name: required - for volumes, optional for env - vars' - type: string - divisor: - anyOf: - - type: integer - - type: string - description: Specifies the output - format of the exposed resources, - defaults to "1" - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - resource: - description: 'Required: resource - to select' - type: string - required: - - resource - type: object - x-kubernetes-map-type: atomic - secretKeyRef: - description: Selects a key of a secret - in the pod's namespace - properties: - key: - description: The key of the secret - to select from. Must be a valid - secret key. - type: string - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the - Secret or its key must be defined - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - type: object - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - envFrom: - description: |- - List of sources to populate environment variables in the container. - The keys defined within a source must be a C_IDENTIFIER. All invalid keys - will be reported as an event when the container is starting. When a key exists in multiple - sources, the value associated with the last source will take precedence. - Values defined by an Env with a duplicate key will take precedence. - Cannot be updated. - items: - description: EnvFromSource represents the source - of a set of ConfigMaps - properties: - configMapRef: - description: The ConfigMap to select from - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the ConfigMap - must be defined - type: boolean - type: object - x-kubernetes-map-type: atomic - prefix: - description: An optional identifier to prepend - to each key in the ConfigMap. Must be - a C_IDENTIFIER. - type: string - secretRef: - description: The Secret to select from - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the Secret - must be defined - type: boolean - type: object - x-kubernetes-map-type: atomic - type: object - type: array - x-kubernetes-list-type: atomic - image: - description: |- - Container image name. - More info: https://kubernetes.io/docs/concepts/containers/images - type: string - imagePullPolicy: - description: |- - Image pull policy. - One of Always, Never, IfNotPresent. - Defaults to Always if :latest tag is specified, or IfNotPresent otherwise. - Cannot be updated. - More info: https://kubernetes.io/docs/concepts/containers/images#updating-images - type: string - lifecycle: - description: Lifecycle is not allowed for ephemeral - containers. - properties: - postStart: - description: |- - PostStart is called immediately after a container is created. If the handler fails, - the container is terminated and restarted according to its restart policy. - Other management of the container blocks until the hook completes. - More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks - properties: - exec: - description: Exec specifies the action - to take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - httpGet: - description: HTTPGet specifies the http - request to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set - in the request. HTTP allows repeated - headers. - items: - description: HTTPHeader describes - a custom header to be used in - HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field - value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the - HTTP server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - sleep: - description: Sleep represents the duration - that the container should sleep before - being terminated. - properties: - seconds: - description: Seconds is the number - of seconds to sleep. - format: int64 - type: integer - required: - - seconds - type: object - tcpSocket: - description: |- - Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept - for the backward compatibility. There are no validation of this field and - lifecycle hooks will fail in runtime when tcp handler is specified. - properties: - host: - description: 'Optional: Host name - to connect to, defaults to the pod - IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - type: object - preStop: - description: |- - PreStop is called immediately before a container is terminated due to an - API request or management event such as liveness/startup probe failure, - preemption, resource contention, etc. The handler is not called if the - container crashes or exits. The Pod's termination grace period countdown begins before the - PreStop hook is executed. - properties: - exec: - description: Exec specifies the action - to take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - httpGet: - description: HTTPGet specifies the http - request to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set - in the request. HTTP allows repeated - headers. - items: - description: HTTPHeader describes - a custom header to be used in - HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field - value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the - HTTP server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - sleep: - description: Sleep represents the duration - that the container should sleep before - being terminated. - properties: - seconds: - description: Seconds is the number - of seconds to sleep. - format: int64 - type: integer - required: - - seconds - type: object - tcpSocket: - description: |- - Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept - for the backward compatibility. There are no validation of this field and - lifecycle hooks will fail in runtime when tcp handler is specified. - properties: - host: - description: 'Optional: Host name - to connect to, defaults to the pod - IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - type: object - type: object - livenessProbe: - description: Probes are not allowed for ephemeral - containers. - properties: - exec: - description: Exec specifies the action to - take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - failureThreshold: - description: |- - Minimum consecutive failures for the probe to be considered failed after having succeeded. - Defaults to 3. Minimum value is 1. - format: int32 - type: integer - grpc: - description: GRPC specifies an action involving - a GRPC port. - properties: - port: - description: Port number of the gRPC service. - Number must be in the range 1 to 65535. - format: int32 - type: integer - service: - default: "" - description: |- - Service is the name of the service to place in the gRPC HealthCheckRequest - (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - - If this is not specified, the default behavior is defined by gRPC. - type: string - required: - - port - type: object - httpGet: - description: HTTPGet specifies the http request - to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set in - the request. HTTP allows repeated headers. - items: - description: HTTPHeader describes a - custom header to be used in HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the HTTP - server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - initialDelaySeconds: - description: |- - Number of seconds after the container has started before liveness probes are initiated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - periodSeconds: - description: |- - How often (in seconds) to perform the probe. - Default to 10 seconds. Minimum value is 1. - format: int32 - type: integer - successThreshold: - description: |- - Minimum consecutive successes for the probe to be considered successful after having failed. - Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. - format: int32 - type: integer - tcpSocket: - description: TCPSocket specifies an action - involving a TCP port. - properties: - host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - description: |- - Optional duration in seconds the pod needs to terminate gracefully upon probe failure. - The grace period is the duration in seconds after the processes running in the pod are sent - a termination signal and the time when the processes are forcibly halted with a kill signal. - Set this value longer than the expected cleanup time for your process. - format: int64 - type: integer - timeoutSeconds: - description: |- - Number of seconds after which the probe times out. - Defaults to 1 second. Minimum value is 1. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - type: object - name: - description: |- - Name of the ephemeral container specified as a DNS_LABEL. - This name must be unique among all containers, init containers and ephemeral containers. - type: string - ports: - description: Ports are not allowed for ephemeral - containers. - items: - description: ContainerPort represents a network - port in a single container. - properties: - containerPort: - description: |- - Number of port to expose on the pod's IP address. - This must be a valid port number, 0 < x < 65536. - format: int32 - type: integer - hostIP: - description: What host IP to bind the external - port to. - type: string - hostPort: - description: |- - Number of port to expose on the host. - If specified, this must be a valid port number, 0 < x < 65536. - If HostNetwork is specified, this must match ContainerPort. - Most containers do not need this. - format: int32 - type: integer - name: - description: |- - If specified, this must be an IANA_SVC_NAME and unique within the pod. Each - named port in a pod must have a unique name. Name for the port that can be - referred to by services. - type: string - protocol: - default: TCP - description: |- - Protocol for port. Must be UDP, TCP, or SCTP. - Defaults to "TCP". - type: string - required: - - containerPort - type: object - type: array - x-kubernetes-list-map-keys: - - containerPort - - protocol - x-kubernetes-list-type: map - readinessProbe: - description: Probes are not allowed for ephemeral - containers. - properties: - exec: - description: Exec specifies the action to - take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - failureThreshold: - description: |- - Minimum consecutive failures for the probe to be considered failed after having succeeded. - Defaults to 3. Minimum value is 1. - format: int32 - type: integer - grpc: - description: GRPC specifies an action involving - a GRPC port. - properties: - port: - description: Port number of the gRPC service. - Number must be in the range 1 to 65535. - format: int32 - type: integer - service: - default: "" - description: |- - Service is the name of the service to place in the gRPC HealthCheckRequest - (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - - If this is not specified, the default behavior is defined by gRPC. - type: string - required: - - port - type: object - httpGet: - description: HTTPGet specifies the http request - to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set in - the request. HTTP allows repeated headers. - items: - description: HTTPHeader describes a - custom header to be used in HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the HTTP - server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - initialDelaySeconds: - description: |- - Number of seconds after the container has started before liveness probes are initiated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - periodSeconds: - description: |- - How often (in seconds) to perform the probe. - Default to 10 seconds. Minimum value is 1. - format: int32 - type: integer - successThreshold: - description: |- - Minimum consecutive successes for the probe to be considered successful after having failed. - Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. - format: int32 - type: integer - tcpSocket: - description: TCPSocket specifies an action - involving a TCP port. - properties: - host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - description: |- - Optional duration in seconds the pod needs to terminate gracefully upon probe failure. - The grace period is the duration in seconds after the processes running in the pod are sent - a termination signal and the time when the processes are forcibly halted with a kill signal. - Set this value longer than the expected cleanup time for your process. - format: int64 - type: integer - timeoutSeconds: - description: |- - Number of seconds after which the probe times out. - Defaults to 1 second. Minimum value is 1. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - type: object - resizePolicy: - description: Resources resize policy for the container. - items: - description: ContainerResizePolicy represents - resource resize policy for the container. - properties: - resourceName: - description: |- - Name of the resource to which this resource resize policy applies. - Supported values: cpu, memory. - type: string - restartPolicy: - description: |- - Restart policy to apply when specified resource is resized. - If not specified, it defaults to NotRequired. - type: string - required: - - resourceName - - restartPolicy - type: object - type: array - x-kubernetes-list-type: atomic - resources: - description: |- - Resources are not allowed for ephemeral containers. Ephemeral containers use spare resources - already allocated to the pod. - properties: - claims: - description: |- - Claims lists the names of resources, defined in spec.resourceClaims, - that are used by this container. - - This is an alpha field and requires enabling the - DynamicResourceAllocation feature gate. - - This field is immutable. It can only be set for containers. - items: - description: ResourceClaim references one - entry in PodSpec.ResourceClaims. - properties: - name: - description: |- - Name must match the name of one entry in pod.spec.resourceClaims of - the Pod where this field is used. It makes that resource available - inside a container. - type: string - request: - description: |- - Request is the name chosen for a request in the referenced claim. - If empty, everything from the claim is made available, otherwise - only the result of this request. - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - limits: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: |- - Limits describes the maximum amount of compute resources allowed. - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - type: object - requests: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: |- - Requests describes the minimum amount of compute resources required. - If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, - otherwise to an implementation-defined value. Requests cannot exceed Limits. - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - type: object - type: object - restartPolicy: - description: |- - Restart policy for the container to manage the restart behavior of each - container within a pod. - This may only be set for init containers. You cannot set this field on - ephemeral containers. - type: string - securityContext: - description: |- - Optional: SecurityContext defines the security options the ephemeral container should be run with. - If set, the fields of SecurityContext override the equivalent fields of PodSecurityContext. - properties: - allowPrivilegeEscalation: - description: |- - AllowPrivilegeEscalation controls whether a process can gain more - privileges than its parent process. This bool directly controls if - the no_new_privs flag will be set on the container process. - AllowPrivilegeEscalation is true always when the container is: - 1) run as Privileged - 2) has CAP_SYS_ADMIN - Note that this field cannot be set when spec.os.name is windows. - type: boolean - appArmorProfile: - description: |- - appArmorProfile is the AppArmor options to use by this container. If set, this profile - overrides the pod's appArmorProfile. - Note that this field cannot be set when spec.os.name is windows. - properties: - localhostProfile: - description: |- - localhostProfile indicates a profile loaded on the node that should be used. - The profile must be preconfigured on the node to work. - Must match the loaded name of the profile. - Must be set if and only if type is "Localhost". - type: string - type: - description: |- - type indicates which kind of AppArmor profile will be applied. - Valid options are: - Localhost - a profile pre-loaded on the node. - RuntimeDefault - the container runtime's default profile. - Unconfined - no AppArmor enforcement. - type: string - required: - - type - type: object - capabilities: - description: |- - The capabilities to add/drop when running containers. - Defaults to the default set of capabilities granted by the container runtime. - Note that this field cannot be set when spec.os.name is windows. - properties: - add: - description: Added capabilities - items: - description: Capability represent POSIX - capabilities type - type: string - type: array - x-kubernetes-list-type: atomic - drop: - description: Removed capabilities - items: - description: Capability represent POSIX - capabilities type - type: string - type: array - x-kubernetes-list-type: atomic - type: object - privileged: - description: |- - Run container in privileged mode. - Processes in privileged containers are essentially equivalent to root on the host. - Defaults to false. - Note that this field cannot be set when spec.os.name is windows. - type: boolean - procMount: - description: |- - procMount denotes the type of proc mount to use for the containers. - The default value is Default which uses the container runtime defaults for - readonly paths and masked paths. - This requires the ProcMountType feature flag to be enabled. - Note that this field cannot be set when spec.os.name is windows. - type: string - readOnlyRootFilesystem: - description: |- - Whether this container has a read-only root filesystem. - Default is false. - Note that this field cannot be set when spec.os.name is windows. - type: boolean - runAsGroup: - description: |- - The GID to run the entrypoint of the container process. - Uses runtime default if unset. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is windows. - format: int64 - type: integer - runAsNonRoot: - description: |- - Indicates that the container must run as a non-root user. - If true, the Kubelet will validate the image at runtime to ensure that it - does not run as UID 0 (root) and fail to start the container if it does. - If unset or false, no such validation will be performed. - May also be set in PodSecurityContext. - type: boolean - runAsUser: - description: |- - The UID to run the entrypoint of the container process. - Defaults to user specified in image metadata if unspecified. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is windows. - format: int64 - type: integer - seLinuxOptions: - description: |- - The SELinux context to be applied to the container. - If unspecified, the container runtime will allocate a random SELinux context for each - container. May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is windows. - properties: - level: - description: Level is SELinux level label - that applies to the container. - type: string - role: - description: Role is a SELinux role label - that applies to the container. - type: string - type: - description: Type is a SELinux type label - that applies to the container. - type: string - user: - description: User is a SELinux user label - that applies to the container. - type: string - type: object - seccompProfile: - description: |- - The seccomp options to use by this container. If seccomp options are - provided at both the pod & container level, the container options - override the pod options. - Note that this field cannot be set when spec.os.name is windows. - properties: - localhostProfile: - description: |- - localhostProfile indicates a profile defined in a file on the node should be used. - The profile must be preconfigured on the node to work. - Must be a descending path, relative to the kubelet's configured seccomp profile location. - Must be set if type is "Localhost". Must NOT be set for any other type. - type: string - type: - description: |- - type indicates which kind of seccomp profile will be applied. - Valid options are: - - Localhost - a profile defined in a file on the node should be used. - RuntimeDefault - the container runtime default profile should be used. - Unconfined - no profile should be applied. - type: string - required: - - type - type: object - windowsOptions: - description: |- - The Windows specific settings applied to all containers. - If unspecified, the options from the PodSecurityContext will be used. - If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is linux. - properties: - gmsaCredentialSpec: - description: |- - GMSACredentialSpec is where the GMSA admission webhook - (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the - GMSA credential spec named by the GMSACredentialSpecName field. - type: string - gmsaCredentialSpecName: - description: GMSACredentialSpecName is - the name of the GMSA credential spec - to use. - type: string - hostProcess: - description: |- - HostProcess determines if a container should be run as a 'Host Process' container. - All of a Pod's containers must have the same effective HostProcess value - (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers). - In addition, if HostProcess is true then HostNetwork must also be set to true. - type: boolean - runAsUserName: - description: |- - The UserName in Windows to run the entrypoint of the container process. - Defaults to the user specified in image metadata if unspecified. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - type: string - type: object - type: object - startupProbe: - description: Probes are not allowed for ephemeral - containers. - properties: - exec: - description: Exec specifies the action to - take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - failureThreshold: - description: |- - Minimum consecutive failures for the probe to be considered failed after having succeeded. - Defaults to 3. Minimum value is 1. - format: int32 - type: integer - grpc: - description: GRPC specifies an action involving - a GRPC port. - properties: - port: - description: Port number of the gRPC service. - Number must be in the range 1 to 65535. - format: int32 - type: integer - service: - default: "" - description: |- - Service is the name of the service to place in the gRPC HealthCheckRequest - (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - - If this is not specified, the default behavior is defined by gRPC. - type: string - required: - - port - type: object - httpGet: - description: HTTPGet specifies the http request - to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set in - the request. HTTP allows repeated headers. - items: - description: HTTPHeader describes a - custom header to be used in HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the HTTP - server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - initialDelaySeconds: - description: |- - Number of seconds after the container has started before liveness probes are initiated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - periodSeconds: - description: |- - How often (in seconds) to perform the probe. - Default to 10 seconds. Minimum value is 1. - format: int32 - type: integer - successThreshold: - description: |- - Minimum consecutive successes for the probe to be considered successful after having failed. - Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. - format: int32 - type: integer - tcpSocket: - description: TCPSocket specifies an action - involving a TCP port. - properties: - host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - description: |- - Optional duration in seconds the pod needs to terminate gracefully upon probe failure. - The grace period is the duration in seconds after the processes running in the pod are sent - a termination signal and the time when the processes are forcibly halted with a kill signal. - Set this value longer than the expected cleanup time for your process. - format: int64 - type: integer - timeoutSeconds: - description: |- - Number of seconds after which the probe times out. - Defaults to 1 second. Minimum value is 1. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - type: object - stdin: - description: |- - Whether this container should allocate a buffer for stdin in the container runtime. If this - is not set, reads from stdin in the container will always result in EOF. - Default is false. - type: boolean - stdinOnce: - description: |- - Whether the container runtime should close the stdin channel after it has been opened by - a single attach. When stdin is true the stdin stream will remain open across multiple attach - sessions. - type: boolean - targetContainerName: - description: |- - If set, the name of the container from PodSpec that this ephemeral container targets. - The ephemeral container will be run in the namespaces (IPC, PID, etc) of this container. - If not set then the ephemeral container uses the namespaces configured in the Pod spec. - - The container runtime must implement support for this feature. - type: string - terminationMessagePath: - description: |- - Optional: Path at which the file to which the container's termination message - will be written is mounted into the container's filesystem. - Message written is intended to be brief final status, such as an assertion failure message. - Will be truncated by the node if greater than 4096 bytes. The total message length across - all containers will be limited to 12kb. - Defaults to /dev/termination-log. - type: string - terminationMessagePolicy: - description: |- - Indicate how the termination message should be populated. File will use the contents of - terminationMessagePath to populate the container status message on both success and failure. - FallbackToLogsOnError will use the last chunk of container log output if the termination - message file is empty and the container exited with an error. - type: string - tty: - description: |- - Whether this container should allocate a TTY for itself, also requires 'stdin' to be true. - Default is false. - type: boolean - volumeDevices: - description: volumeDevices is the list of block - devices to be used by the container. - items: - description: volumeDevice describes a mapping - of a raw block device within a container. - properties: - devicePath: - description: devicePath is the path inside - of the container that the device will - be mapped to. - type: string - name: - description: name must match the name of - a persistentVolumeClaim in the pod - type: string - required: - - devicePath - - name - type: object - type: array - x-kubernetes-list-map-keys: - - devicePath - x-kubernetes-list-type: map - volumeMounts: - description: |- - Pod volumes to mount into the container's filesystem. Subpath mounts are not allowed for ephemeral containers. - Cannot be updated. - items: - description: VolumeMount describes a mounting - of a Volume within a container. - properties: - mountPath: - description: |- - Path within the container at which the volume should be mounted. Must - not contain ':'. - type: string - mountPropagation: - description: |- - mountPropagation determines how mounts are propagated from the host - to container and the other way around. - When not set, MountPropagationNone is used. - This field is beta in 1.10. - When RecursiveReadOnly is set to IfPossible or to Enabled, MountPropagation must be None or unspecified - (which defaults to None). - type: string - name: - description: This must match the Name of - a Volume. - type: string - readOnly: - description: |- - Mounted read-only if true, read-write otherwise (false or unspecified). - Defaults to false. - type: boolean - recursiveReadOnly: - description: |- - RecursiveReadOnly specifies whether read-only mounts should be handled - recursively. - - If ReadOnly is false, this field has no meaning and must be unspecified. - - If ReadOnly is true, and this field is set to Disabled, the mount is not made - recursively read-only. If this field is set to IfPossible, the mount is made - recursively read-only, if it is supported by the container runtime. - type: string - subPath: - description: |- - Path within the volume from which the container's volume should be mounted. - Defaults to "" (volume's root). - type: string - subPathExpr: - description: |- - Expanded path within the volume from which the container's volume should be mounted. - Behaves similarly to SubPath but environment variable references $(VAR_NAME) are expanded using the container's environment. - Defaults to "" (volume's root). - SubPathExpr and SubPath are mutually exclusive. - type: string - required: - - mountPath - - name - type: object - type: array - x-kubernetes-list-map-keys: - - mountPath - x-kubernetes-list-type: map - workingDir: - description: |- - Container's working directory. - If not specified, the container runtime's default will be used, which - might be configured in the container image. - Cannot be updated. - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - hostAliases: - description: |- - HostAliases is an optional list of hosts and IPs that will be injected into the pod's hosts - file if specified. - items: - description: |- - HostAlias holds the mapping between IP and hostnames that will be injected as an entry in the - pod's hosts file. - properties: - hostnames: - description: Hostnames for the above IP address. - items: - type: string - type: array - x-kubernetes-list-type: atomic - ip: - description: IP address of the host file entry. - type: string - required: - - ip - type: object - type: array - x-kubernetes-list-map-keys: - - ip - x-kubernetes-list-type: map - hostIPC: - description: |- - Use the host's ipc namespace. - Optional: Default to false. - type: boolean - hostNetwork: - description: |- - Host networking requested for this pod. Use the host's network namespace. - If this option is set, the ports that will be used must be specified. - Default to false. - type: boolean - hostPID: - description: |- - Use the host's pid namespace. - Optional: Default to false. - type: boolean - hostUsers: - description: |- - Use the host's user namespace. - Optional: Default to true. - If set to true or not present, the pod will be run in the host user namespace, useful - for when the pod needs a feature only available to the host user namespace, such as - loading a kernel module with CAP_SYS_MODULE. - When set to false, a new userns is created for the pod. - type: boolean - hostname: - description: |- - Specifies the hostname of the Pod - If not specified, the pod's hostname will be set to a system-defined value. - type: string - imagePullSecrets: - description: |- - ImagePullSecrets is an optional list of references to secrets in the same namespace to use for pulling any of the images used by this PodSpec. - If specified, these secrets will be passed to individual puller implementations for them to use. - More info: https://kubernetes.io/docs/concepts/containers/images#specifying-imagepullsecrets-on-a-pod - items: - description: |- - LocalObjectReference contains enough information to let you locate the - referenced object inside the same namespace. - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - type: object - x-kubernetes-map-type: atomic - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - initContainers: - description: |- - List of initialization containers belonging to the pod. - Init containers are executed in order prior to containers being started. If any - init container fails, the pod is considered to have failed and is handled according - to its restartPolicy. The name for an init container or normal container must be - unique among all containers. - items: - description: A single application container that you - want to run within a pod. - properties: - args: - description: |- - Arguments to the entrypoint. - The container image's CMD is used if this is not provided. - Variable references $(VAR_NAME) are expanded using the container's environment. If a variable - cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced - to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. - items: - type: string - type: array - x-kubernetes-list-type: atomic - command: - description: |- - Entrypoint array. Not executed within a shell. - The container image's ENTRYPOINT is used if this is not provided. - Variable references $(VAR_NAME) are expanded using the container's environment. If a variable - cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced - to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. - items: - type: string - type: array - x-kubernetes-list-type: atomic - env: - description: |- - List of environment variables to set in the container. - Cannot be updated. - items: - description: EnvVar represents an environment - variable present in a Container. - properties: - name: - description: Name of the environment variable. - Must be a C_IDENTIFIER. - type: string - value: - description: |- - Variable references $(VAR_NAME) are expanded - using the previously defined environment variables in the container and - any service environment variables. If a variable cannot be resolved, - the reference in the input string will be unchanged. Double $$ are reduced - to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. - "$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)". - type: string - valueFrom: - description: Source for the environment - variable's value. Cannot be used if value - is not empty. - properties: - configMapKeyRef: - description: Selects a key of a ConfigMap. - properties: - key: - description: The key to select. - type: string - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the - ConfigMap or its key must be defined - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - fieldRef: - description: |- - Selects a field of the pod: supports metadata.name, metadata.namespace, `metadata.labels['']`, `metadata.annotations['']`, - spec.nodeName, spec.serviceAccountName, status.hostIP, status.podIP, status.podIPs. - properties: - apiVersion: - description: Version of the schema - the FieldPath is written in terms - of, defaults to "v1". - type: string - fieldPath: - description: Path of the field to - select in the specified API version. - type: string - required: - - fieldPath - type: object - x-kubernetes-map-type: atomic - resourceFieldRef: - description: |- - Selects a resource of the container: only resources limits and requests - (limits.cpu, limits.memory, limits.ephemeral-storage, requests.cpu, requests.memory and requests.ephemeral-storage) are currently supported. - properties: - containerName: - description: 'Container name: required - for volumes, optional for env - vars' - type: string - divisor: - anyOf: - - type: integer - - type: string - description: Specifies the output - format of the exposed resources, - defaults to "1" - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - resource: - description: 'Required: resource - to select' - type: string - required: - - resource - type: object - x-kubernetes-map-type: atomic - secretKeyRef: - description: Selects a key of a secret - in the pod's namespace - properties: - key: - description: The key of the secret - to select from. Must be a valid - secret key. - type: string - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the - Secret or its key must be defined - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - type: object - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - envFrom: - description: |- - List of sources to populate environment variables in the container. - The keys defined within a source must be a C_IDENTIFIER. All invalid keys - will be reported as an event when the container is starting. When a key exists in multiple - sources, the value associated with the last source will take precedence. - Values defined by an Env with a duplicate key will take precedence. - Cannot be updated. - items: - description: EnvFromSource represents the source - of a set of ConfigMaps - properties: - configMapRef: - description: The ConfigMap to select from - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the ConfigMap - must be defined - type: boolean - type: object - x-kubernetes-map-type: atomic - prefix: - description: An optional identifier to prepend - to each key in the ConfigMap. Must be - a C_IDENTIFIER. - type: string - secretRef: - description: The Secret to select from - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the Secret - must be defined - type: boolean - type: object - x-kubernetes-map-type: atomic - type: object - type: array - x-kubernetes-list-type: atomic - image: - description: |- - Container image name. - More info: https://kubernetes.io/docs/concepts/containers/images - This field is optional to allow higher level config management to default or override - container images in workload controllers like Deployments and StatefulSets. - type: string - imagePullPolicy: - description: |- - Image pull policy. - One of Always, Never, IfNotPresent. - Defaults to Always if :latest tag is specified, or IfNotPresent otherwise. - Cannot be updated. - More info: https://kubernetes.io/docs/concepts/containers/images#updating-images - type: string - lifecycle: - description: |- - Actions that the management system should take in response to container lifecycle events. - Cannot be updated. - properties: - postStart: - description: |- - PostStart is called immediately after a container is created. If the handler fails, - the container is terminated and restarted according to its restart policy. - Other management of the container blocks until the hook completes. - More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks - properties: - exec: - description: Exec specifies the action - to take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - httpGet: - description: HTTPGet specifies the http - request to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set - in the request. HTTP allows repeated - headers. - items: - description: HTTPHeader describes - a custom header to be used in - HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field - value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the - HTTP server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - sleep: - description: Sleep represents the duration - that the container should sleep before - being terminated. - properties: - seconds: - description: Seconds is the number - of seconds to sleep. - format: int64 - type: integer - required: - - seconds - type: object - tcpSocket: - description: |- - Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept - for the backward compatibility. There are no validation of this field and - lifecycle hooks will fail in runtime when tcp handler is specified. - properties: - host: - description: 'Optional: Host name - to connect to, defaults to the pod - IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - type: object - preStop: - description: |- - PreStop is called immediately before a container is terminated due to an - API request or management event such as liveness/startup probe failure, - preemption, resource contention, etc. The handler is not called if the - container crashes or exits. The Pod's termination grace period countdown begins before the - PreStop hook is executed. - properties: - exec: - description: Exec specifies the action - to take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - httpGet: - description: HTTPGet specifies the http - request to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set - in the request. HTTP allows repeated - headers. - items: - description: HTTPHeader describes - a custom header to be used in - HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field - value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the - HTTP server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - sleep: - description: Sleep represents the duration - that the container should sleep before - being terminated. - properties: - seconds: - description: Seconds is the number - of seconds to sleep. - format: int64 - type: integer - required: - - seconds - type: object - tcpSocket: - description: |- - Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept - for the backward compatibility. There are no validation of this field and - lifecycle hooks will fail in runtime when tcp handler is specified. - properties: - host: - description: 'Optional: Host name - to connect to, defaults to the pod - IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - type: object - type: object - livenessProbe: - description: |- - Periodic probe of container liveness. - Container will be restarted if the probe fails. - Cannot be updated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - properties: - exec: - description: Exec specifies the action to - take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - failureThreshold: - description: |- - Minimum consecutive failures for the probe to be considered failed after having succeeded. - Defaults to 3. Minimum value is 1. - format: int32 - type: integer - grpc: - description: GRPC specifies an action involving - a GRPC port. - properties: - port: - description: Port number of the gRPC service. - Number must be in the range 1 to 65535. - format: int32 - type: integer - service: - default: "" - description: |- - Service is the name of the service to place in the gRPC HealthCheckRequest - (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - - If this is not specified, the default behavior is defined by gRPC. - type: string - required: - - port - type: object - httpGet: - description: HTTPGet specifies the http request - to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set in - the request. HTTP allows repeated headers. - items: - description: HTTPHeader describes a - custom header to be used in HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the HTTP - server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - initialDelaySeconds: - description: |- - Number of seconds after the container has started before liveness probes are initiated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - periodSeconds: - description: |- - How often (in seconds) to perform the probe. - Default to 10 seconds. Minimum value is 1. - format: int32 - type: integer - successThreshold: - description: |- - Minimum consecutive successes for the probe to be considered successful after having failed. - Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. - format: int32 - type: integer - tcpSocket: - description: TCPSocket specifies an action - involving a TCP port. - properties: - host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - description: |- - Optional duration in seconds the pod needs to terminate gracefully upon probe failure. - The grace period is the duration in seconds after the processes running in the pod are sent - a termination signal and the time when the processes are forcibly halted with a kill signal. - Set this value longer than the expected cleanup time for your process. - format: int64 - type: integer - timeoutSeconds: - description: |- - Number of seconds after which the probe times out. - Defaults to 1 second. Minimum value is 1. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - type: object - name: - description: |- - Name of the container specified as a DNS_LABEL. - Each container in a pod must have a unique name (DNS_LABEL). - Cannot be updated. - type: string - ports: - description: |- - List of ports to expose from the container. Not specifying a port here - DOES NOT prevent that port from being exposed. Any port which is - listening on the default "0.0.0.0" address inside a container will be - accessible from the network. - Modifying this array with strategic merge patch may corrupt the data. - For more information See https://github.com/kubernetes/kubernetes/issues/108255. - items: - description: ContainerPort represents a network - port in a single container. - properties: - containerPort: - description: |- - Number of port to expose on the pod's IP address. - This must be a valid port number, 0 < x < 65536. - format: int32 - type: integer - hostIP: - description: What host IP to bind the external - port to. - type: string - hostPort: - description: |- - Number of port to expose on the host. - If specified, this must be a valid port number, 0 < x < 65536. - If HostNetwork is specified, this must match ContainerPort. - Most containers do not need this. - format: int32 - type: integer - name: - description: |- - If specified, this must be an IANA_SVC_NAME and unique within the pod. Each - named port in a pod must have a unique name. Name for the port that can be - referred to by services. - type: string - protocol: - default: TCP - description: |- - Protocol for port. Must be UDP, TCP, or SCTP. - Defaults to "TCP". - type: string - required: - - containerPort - type: object - type: array - x-kubernetes-list-map-keys: - - containerPort - - protocol - x-kubernetes-list-type: map - readinessProbe: - description: |- - Periodic probe of container service readiness. - Container will be removed from service endpoints if the probe fails. - Cannot be updated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - properties: - exec: - description: Exec specifies the action to - take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - failureThreshold: - description: |- - Minimum consecutive failures for the probe to be considered failed after having succeeded. - Defaults to 3. Minimum value is 1. - format: int32 - type: integer - grpc: - description: GRPC specifies an action involving - a GRPC port. - properties: - port: - description: Port number of the gRPC service. - Number must be in the range 1 to 65535. - format: int32 - type: integer - service: - default: "" - description: |- - Service is the name of the service to place in the gRPC HealthCheckRequest - (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - - If this is not specified, the default behavior is defined by gRPC. - type: string - required: - - port - type: object - httpGet: - description: HTTPGet specifies the http request - to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set in - the request. HTTP allows repeated headers. - items: - description: HTTPHeader describes a - custom header to be used in HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the HTTP - server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - initialDelaySeconds: - description: |- - Number of seconds after the container has started before liveness probes are initiated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - periodSeconds: - description: |- - How often (in seconds) to perform the probe. - Default to 10 seconds. Minimum value is 1. - format: int32 - type: integer - successThreshold: - description: |- - Minimum consecutive successes for the probe to be considered successful after having failed. - Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. - format: int32 - type: integer - tcpSocket: - description: TCPSocket specifies an action - involving a TCP port. - properties: - host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - description: |- - Optional duration in seconds the pod needs to terminate gracefully upon probe failure. - The grace period is the duration in seconds after the processes running in the pod are sent - a termination signal and the time when the processes are forcibly halted with a kill signal. - Set this value longer than the expected cleanup time for your process. - format: int64 - type: integer - timeoutSeconds: - description: |- - Number of seconds after which the probe times out. - Defaults to 1 second. Minimum value is 1. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - type: object - resizePolicy: - description: Resources resize policy for the container. - items: - description: ContainerResizePolicy represents - resource resize policy for the container. - properties: - resourceName: - description: |- - Name of the resource to which this resource resize policy applies. - Supported values: cpu, memory. - type: string - restartPolicy: - description: |- - Restart policy to apply when specified resource is resized. - If not specified, it defaults to NotRequired. - type: string - required: - - resourceName - - restartPolicy - type: object - type: array - x-kubernetes-list-type: atomic - resources: - description: |- - Compute Resources required by this container. - Cannot be updated. - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - properties: - claims: - description: |- - Claims lists the names of resources, defined in spec.resourceClaims, - that are used by this container. - - This is an alpha field and requires enabling the - DynamicResourceAllocation feature gate. - - This field is immutable. It can only be set for containers. - items: - description: ResourceClaim references one - entry in PodSpec.ResourceClaims. - properties: - name: - description: |- - Name must match the name of one entry in pod.spec.resourceClaims of - the Pod where this field is used. It makes that resource available - inside a container. - type: string - request: - description: |- - Request is the name chosen for a request in the referenced claim. - If empty, everything from the claim is made available, otherwise - only the result of this request. - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - limits: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: |- - Limits describes the maximum amount of compute resources allowed. - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - type: object - requests: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: |- - Requests describes the minimum amount of compute resources required. - If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, - otherwise to an implementation-defined value. Requests cannot exceed Limits. - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - type: object - type: object - restartPolicy: - description: |- - RestartPolicy defines the restart behavior of individual containers in a pod. - This field may only be set for init containers, and the only allowed value is "Always". - For non-init containers or when this field is not specified, - the restart behavior is defined by the Pod's restart policy and the container type. - type: string - securityContext: - description: |- - SecurityContext defines the security options the container should be run with. - If set, the fields of SecurityContext override the equivalent fields of PodSecurityContext. - More info: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/ - properties: - allowPrivilegeEscalation: - description: |- - AllowPrivilegeEscalation controls whether a process can gain more - privileges than its parent process. This bool directly controls if - the no_new_privs flag will be set on the container process. - AllowPrivilegeEscalation is true always when the container is: - 1) run as Privileged - 2) has CAP_SYS_ADMIN - Note that this field cannot be set when spec.os.name is windows. - type: boolean - appArmorProfile: - description: |- - appArmorProfile is the AppArmor options to use by this container. If set, this profile - overrides the pod's appArmorProfile. - Note that this field cannot be set when spec.os.name is windows. - properties: - localhostProfile: - description: |- - localhostProfile indicates a profile loaded on the node that should be used. - The profile must be preconfigured on the node to work. - Must match the loaded name of the profile. - Must be set if and only if type is "Localhost". - type: string - type: - description: |- - type indicates which kind of AppArmor profile will be applied. - Valid options are: - Localhost - a profile pre-loaded on the node. - RuntimeDefault - the container runtime's default profile. - Unconfined - no AppArmor enforcement. - type: string - required: - - type - type: object - capabilities: - description: |- - The capabilities to add/drop when running containers. - Defaults to the default set of capabilities granted by the container runtime. - Note that this field cannot be set when spec.os.name is windows. - properties: - add: - description: Added capabilities - items: - description: Capability represent POSIX - capabilities type - type: string - type: array - x-kubernetes-list-type: atomic - drop: - description: Removed capabilities - items: - description: Capability represent POSIX - capabilities type - type: string - type: array - x-kubernetes-list-type: atomic - type: object - privileged: - description: |- - Run container in privileged mode. - Processes in privileged containers are essentially equivalent to root on the host. - Defaults to false. - Note that this field cannot be set when spec.os.name is windows. - type: boolean - procMount: - description: |- - procMount denotes the type of proc mount to use for the containers. - The default value is Default which uses the container runtime defaults for - readonly paths and masked paths. - This requires the ProcMountType feature flag to be enabled. - Note that this field cannot be set when spec.os.name is windows. - type: string - readOnlyRootFilesystem: - description: |- - Whether this container has a read-only root filesystem. - Default is false. - Note that this field cannot be set when spec.os.name is windows. - type: boolean - runAsGroup: - description: |- - The GID to run the entrypoint of the container process. - Uses runtime default if unset. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is windows. - format: int64 - type: integer - runAsNonRoot: - description: |- - Indicates that the container must run as a non-root user. - If true, the Kubelet will validate the image at runtime to ensure that it - does not run as UID 0 (root) and fail to start the container if it does. - If unset or false, no such validation will be performed. - May also be set in PodSecurityContext. - type: boolean - runAsUser: - description: |- - The UID to run the entrypoint of the container process. - Defaults to user specified in image metadata if unspecified. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is windows. - format: int64 - type: integer - seLinuxOptions: - description: |- - The SELinux context to be applied to the container. - If unspecified, the container runtime will allocate a random SELinux context for each - container. May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is windows. - properties: - level: - description: Level is SELinux level label - that applies to the container. - type: string - role: - description: Role is a SELinux role label - that applies to the container. - type: string - type: - description: Type is a SELinux type label - that applies to the container. - type: string - user: - description: User is a SELinux user label - that applies to the container. - type: string - type: object - seccompProfile: - description: |- - The seccomp options to use by this container. If seccomp options are - provided at both the pod & container level, the container options - override the pod options. - Note that this field cannot be set when spec.os.name is windows. - properties: - localhostProfile: - description: |- - localhostProfile indicates a profile defined in a file on the node should be used. - The profile must be preconfigured on the node to work. - Must be a descending path, relative to the kubelet's configured seccomp profile location. - Must be set if type is "Localhost". Must NOT be set for any other type. - type: string - type: - description: |- - type indicates which kind of seccomp profile will be applied. - Valid options are: - - Localhost - a profile defined in a file on the node should be used. - RuntimeDefault - the container runtime default profile should be used. - Unconfined - no profile should be applied. - type: string - required: - - type - type: object - windowsOptions: - description: |- - The Windows specific settings applied to all containers. - If unspecified, the options from the PodSecurityContext will be used. - If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is linux. - properties: - gmsaCredentialSpec: - description: |- - GMSACredentialSpec is where the GMSA admission webhook - (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the - GMSA credential spec named by the GMSACredentialSpecName field. - type: string - gmsaCredentialSpecName: - description: GMSACredentialSpecName is - the name of the GMSA credential spec - to use. - type: string - hostProcess: - description: |- - HostProcess determines if a container should be run as a 'Host Process' container. - All of a Pod's containers must have the same effective HostProcess value - (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers). - In addition, if HostProcess is true then HostNetwork must also be set to true. - type: boolean - runAsUserName: - description: |- - The UserName in Windows to run the entrypoint of the container process. - Defaults to the user specified in image metadata if unspecified. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - type: string - type: object - type: object - startupProbe: - description: |- - StartupProbe indicates that the Pod has successfully initialized. - If specified, no other probes are executed until this completes successfully. - If this probe fails, the Pod will be restarted, just as if the livenessProbe failed. - properties: - exec: - description: Exec specifies the action to - take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - failureThreshold: - description: |- - Minimum consecutive failures for the probe to be considered failed after having succeeded. - Defaults to 3. Minimum value is 1. - format: int32 - type: integer - grpc: - description: GRPC specifies an action involving - a GRPC port. - properties: - port: - description: Port number of the gRPC service. - Number must be in the range 1 to 65535. - format: int32 - type: integer - service: - default: "" - description: |- - Service is the name of the service to place in the gRPC HealthCheckRequest - (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - - If this is not specified, the default behavior is defined by gRPC. - type: string - required: - - port - type: object - httpGet: - description: HTTPGet specifies the http request - to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set in - the request. HTTP allows repeated headers. - items: - description: HTTPHeader describes a - custom header to be used in HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the HTTP - server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - initialDelaySeconds: - description: |- - Number of seconds after the container has started before liveness probes are initiated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - periodSeconds: - description: |- - How often (in seconds) to perform the probe. - Default to 10 seconds. Minimum value is 1. - format: int32 - type: integer - successThreshold: - description: |- - Minimum consecutive successes for the probe to be considered successful after having failed. - Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. - format: int32 - type: integer - tcpSocket: - description: TCPSocket specifies an action - involving a TCP port. - properties: - host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - description: |- - Optional duration in seconds the pod needs to terminate gracefully upon probe failure. - The grace period is the duration in seconds after the processes running in the pod are sent - a termination signal and the time when the processes are forcibly halted with a kill signal. - Set this value longer than the expected cleanup time for your process. - format: int64 - type: integer - timeoutSeconds: - description: |- - Number of seconds after which the probe times out. - Defaults to 1 second. Minimum value is 1. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - type: object - stdin: - description: |- - Whether this container should allocate a buffer for stdin in the container runtime. If this - is not set, reads from stdin in the container will always result in EOF. - Default is false. - type: boolean - stdinOnce: - description: |- - Whether the container runtime should close the stdin channel after it has been opened by - a single attach. When stdin is true the stdin stream will remain open across multiple attach - sessions. - type: boolean - terminationMessagePath: - description: |- - Optional: Path at which the file to which the container's termination message - will be written is mounted into the container's filesystem. - Message written is intended to be brief final status, such as an assertion failure message. - Will be truncated by the node if greater than 4096 bytes. The total message length across - all containers will be limited to 12kb. - Defaults to /dev/termination-log. - type: string - terminationMessagePolicy: - description: |- - Indicate how the termination message should be populated. File will use the contents of - terminationMessagePath to populate the container status message on both success and failure. - FallbackToLogsOnError will use the last chunk of container log output if the termination - message file is empty and the container exited with an error. - type: string - tty: - description: |- - Whether this container should allocate a TTY for itself, also requires 'stdin' to be true. - Default is false. - type: boolean - volumeDevices: - description: volumeDevices is the list of block - devices to be used by the container. - items: - description: volumeDevice describes a mapping - of a raw block device within a container. - properties: - devicePath: - description: devicePath is the path inside - of the container that the device will - be mapped to. - type: string - name: - description: name must match the name of - a persistentVolumeClaim in the pod - type: string - required: - - devicePath - - name - type: object - type: array - x-kubernetes-list-map-keys: - - devicePath - x-kubernetes-list-type: map - volumeMounts: - description: |- - Pod volumes to mount into the container's filesystem. - Cannot be updated. - items: - description: VolumeMount describes a mounting - of a Volume within a container. - properties: - mountPath: - description: |- - Path within the container at which the volume should be mounted. Must - not contain ':'. - type: string - mountPropagation: - description: |- - mountPropagation determines how mounts are propagated from the host - to container and the other way around. - When not set, MountPropagationNone is used. - This field is beta in 1.10. - When RecursiveReadOnly is set to IfPossible or to Enabled, MountPropagation must be None or unspecified - (which defaults to None). - type: string - name: - description: This must match the Name of - a Volume. - type: string - readOnly: - description: |- - Mounted read-only if true, read-write otherwise (false or unspecified). - Defaults to false. - type: boolean - recursiveReadOnly: - description: |- - RecursiveReadOnly specifies whether read-only mounts should be handled - recursively. - - If ReadOnly is false, this field has no meaning and must be unspecified. - - If ReadOnly is true, and this field is set to Disabled, the mount is not made - recursively read-only. If this field is set to IfPossible, the mount is made - recursively read-only, if it is supported by the container runtime. - type: string - subPath: - description: |- - Path within the volume from which the container's volume should be mounted. - Defaults to "" (volume's root). - type: string - subPathExpr: - description: |- - Expanded path within the volume from which the container's volume should be mounted. - Behaves similarly to SubPath but environment variable references $(VAR_NAME) are expanded using the container's environment. - Defaults to "" (volume's root). - SubPathExpr and SubPath are mutually exclusive. - type: string - required: - - mountPath - - name - type: object - type: array - x-kubernetes-list-map-keys: - - mountPath - x-kubernetes-list-type: map - workingDir: - description: |- - Container's working directory. - If not specified, the container runtime's default will be used, which - might be configured in the container image. - Cannot be updated. - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - nodeName: - description: |- - NodeName indicates in which node this pod is scheduled. - If empty, this pod is a candidate for scheduling by the scheduler defined in schedulerName. - Once this field is set, the kubelet for this node becomes responsible for the lifecycle of this pod. - This field should not be used to express a desire for the pod to be scheduled on a specific node. - https://kubernetes. - type: string - nodeSelector: - additionalProperties: - type: string - description: |- - NodeSelector is a selector which must be true for the pod to fit on a node. - Selector which must match a node's labels for the pod to be scheduled on that node. - More info: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/ - type: object - x-kubernetes-map-type: atomic - os: - description: |- - Specifies the OS of the containers in the pod. - Some pod and container fields are restricted if this is set. - - If the OS field is set to linux, the following fields must be unset: - -securityContext.windowsOptions - - If the OS field is set to windows, following fields must be unset: - - spec.hostPID - - spec.hostIPC - - spec.hostUsers - - spec.securityContext.appArmorProfile - - spec.securityContext. - properties: - name: - description: |- - Name is the name of the operating system. The currently supported values are linux and windows. - Additional value may be defined in future and can be one of: - https://github.com/opencontainers/runtime-spec/blob/master/config.md#platform-specific-configuration - Clients should expect to handle additional values and treat unrecognized values in this field as os: null - type: string - required: - - name - type: object - overhead: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: |- - Overhead represents the resource overhead associated with running a pod for a given RuntimeClass. - This field will be autopopulated at admission time by the RuntimeClass admission controller. If - the RuntimeClass admission controller is enabled, overhead must not be set in Pod create requests. - The RuntimeClass admission controller will reject Pod create requests which have the overhead already - set. - type: object - preemptionPolicy: - description: |- - PreemptionPolicy is the Policy for preempting pods with lower priority. - One of Never, PreemptLowerPriority. - Defaults to PreemptLowerPriority if unset. - type: string - priority: - description: |- - The priority value. Various system components use this field to find the - priority of the pod. When Priority Admission Controller is enabled, it - prevents users from setting this field. The admission controller populates - this field from PriorityClassName. - The higher the value, the higher the priority. - format: int32 - type: integer - priorityClassName: - description: |- - If specified, indicates the pod's priority. "system-node-critical" and - "system-cluster-critical" are two special keywords which indicate the - highest priorities with the former being the highest priority. Any other - name must be defined by creating a PriorityClass object with that name. - If not specified, the pod priority will be default or zero if there is no - default. - type: string - readinessGates: - description: |- - If specified, all readiness gates will be evaluated for pod readiness. - A pod is ready when all its containers are ready AND - all conditions specified in the readiness gates have status equal to "True" - More info: https://git.k8s.io/enhancements/keps/sig-network/580-pod-readiness-gates - items: - description: PodReadinessGate contains the reference - to a pod condition - properties: - conditionType: - description: ConditionType refers to a condition - in the pod's condition list with matching type. - type: string - required: - - conditionType - type: object - type: array - x-kubernetes-list-type: atomic - resourceClaims: - description: |- - ResourceClaims defines which ResourceClaims must be allocated - and reserved before the Pod is allowed to start. The resources - will be made available to those containers which consume them - by name. - - This is an alpha field and requires enabling the - DynamicResourceAllocation feature gate. - - This field is immutable. - items: - description: |- - PodResourceClaim references exactly one ResourceClaim, either directly - or by naming a ResourceClaimTemplate which is then turned into a ResourceClaim - for the pod. - - It adds a name to it that uniquely identifies the ResourceClaim inside the Pod. - Containers that need access to the ResourceClaim reference it with this name. - properties: - name: - description: |- - Name uniquely identifies this resource claim inside the pod. - This must be a DNS_LABEL. - type: string - resourceClaimName: - description: |- - ResourceClaimName is the name of a ResourceClaim object in the same - namespace as this pod. - - Exactly one of ResourceClaimName and ResourceClaimTemplateName must - be set. - type: string - resourceClaimTemplateName: - description: |- - ResourceClaimTemplateName is the name of a ResourceClaimTemplate - object in the same namespace as this pod. - - The template will be used to create a new ResourceClaim, which will - be bound to this pod. When this pod is deleted, the ResourceClaim - will also be deleted. - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - restartPolicy: - description: |- - Restart policy for all containers within the pod. - One of Always, OnFailure, Never. In some contexts, only a subset of those values may be permitted. - Default to Always. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#restart-policy - type: string - runtimeClassName: - description: |- - RuntimeClassName refers to a RuntimeClass object in the node.k8s.io group, which should be used - to run this pod. If no RuntimeClass resource matches the named class, the pod will not be run. - If unset or empty, the "legacy" RuntimeClass will be used, which is an implicit class with an - empty definition that uses the default runtime handler. - More info: https://git.k8s. - type: string - schedulerName: - description: |- - If specified, the pod will be dispatched by specified scheduler. - If not specified, the pod will be dispatched by default scheduler. - type: string - schedulingGates: - description: |- - SchedulingGates is an opaque list of values that if specified will block scheduling the pod. - If schedulingGates is not empty, the pod will stay in the SchedulingGated state and the - scheduler will not attempt to schedule the pod. - - SchedulingGates can only be set at pod creation time, and be removed only afterwards. - items: - description: PodSchedulingGate is associated to a - Pod to guard its scheduling. - properties: - name: - description: |- - Name of the scheduling gate. - Each scheduling gate must have a unique name field. - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - securityContext: - description: |- - SecurityContext holds pod-level security attributes and common container settings. - Optional: Defaults to empty. See type description for default values of each field. - properties: - appArmorProfile: - description: |- - appArmorProfile is the AppArmor options to use by the containers in this pod. - Note that this field cannot be set when spec.os.name is windows. - properties: - localhostProfile: - description: |- - localhostProfile indicates a profile loaded on the node that should be used. - The profile must be preconfigured on the node to work. - Must match the loaded name of the profile. - Must be set if and only if type is "Localhost". - type: string - type: - description: |- - type indicates which kind of AppArmor profile will be applied. - Valid options are: - Localhost - a profile pre-loaded on the node. - RuntimeDefault - the container runtime's default profile. - Unconfined - no AppArmor enforcement. - type: string - required: - - type - type: object - fsGroup: - description: |- - A special supplemental group that applies to all containers in a pod. - Some volume types allow the Kubelet to change the ownership of that volume - to be owned by the pod: - - 1. The owning GID will be the FSGroup - 2. The setgid bit is set (new files created in the volume will be owned by FSGroup) - 3. - format: int64 - type: integer - fsGroupChangePolicy: - description: |- - fsGroupChangePolicy defines behavior of changing ownership and permission of the volume - before being exposed inside Pod. This field will only apply to - volume types which support fsGroup based ownership(and permissions). - It will have no effect on ephemeral volume types such as: secret, configmaps - and emptydir. - Valid values are "OnRootMismatch" and "Always". If not specified, "Always" is used. - type: string - runAsGroup: - description: |- - The GID to run the entrypoint of the container process. - Uses runtime default if unset. - May also be set in SecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence - for that container. - Note that this field cannot be set when spec.os.name is windows. - format: int64 - type: integer - runAsNonRoot: - description: |- - Indicates that the container must run as a non-root user. - If true, the Kubelet will validate the image at runtime to ensure that it - does not run as UID 0 (root) and fail to start the container if it does. - If unset or false, no such validation will be performed. - May also be set in SecurityContext. - type: boolean - runAsUser: - description: |- - The UID to run the entrypoint of the container process. - Defaults to user specified in image metadata if unspecified. - May also be set in SecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence - for that container. - Note that this field cannot be set when spec.os.name is windows. - format: int64 - type: integer - seLinuxOptions: - description: |- - The SELinux context to be applied to all containers. - If unspecified, the container runtime will allocate a random SELinux context for each - container. May also be set in SecurityContext. If set in - both SecurityContext and PodSecurityContext, the value specified in SecurityContext - takes precedence for that container. - Note that this field cannot be set when spec.os.name is windows. - properties: - level: - description: Level is SELinux level label that - applies to the container. - type: string - role: - description: Role is a SELinux role label that - applies to the container. - type: string - type: - description: Type is a SELinux type label that - applies to the container. - type: string - user: - description: User is a SELinux user label that - applies to the container. - type: string - type: object - seccompProfile: - description: |- - The seccomp options to use by the containers in this pod. - Note that this field cannot be set when spec.os.name is windows. - properties: - localhostProfile: - description: |- - localhostProfile indicates a profile defined in a file on the node should be used. - The profile must be preconfigured on the node to work. - Must be a descending path, relative to the kubelet's configured seccomp profile location. - Must be set if type is "Localhost". Must NOT be set for any other type. - type: string - type: - description: |- - type indicates which kind of seccomp profile will be applied. - Valid options are: - - Localhost - a profile defined in a file on the node should be used. - RuntimeDefault - the container runtime default profile should be used. - Unconfined - no profile should be applied. - type: string - required: - - type - type: object - supplementalGroups: - description: |- - A list of groups applied to the first process run in each container, in - addition to the container's primary GID and fsGroup (if specified). If - the SupplementalGroupsPolicy feature is enabled, the - supplementalGroupsPolicy field determines whether these are in addition - to or instead of any group memberships defined in the container image. - items: - format: int64 - type: integer - type: array - x-kubernetes-list-type: atomic - supplementalGroupsPolicy: - description: |- - Defines how supplemental groups of the first container processes are calculated. - Valid values are "Merge" and "Strict". If not specified, "Merge" is used. - (Alpha) Using the field requires the SupplementalGroupsPolicy feature gate to be enabled - and the container runtime must implement support for this feature. - Note that this field cannot be set when spec.os.name is windows. - type: string - sysctls: - description: |- - Sysctls hold a list of namespaced sysctls used for the pod. Pods with unsupported - sysctls (by the container runtime) might fail to launch. - Note that this field cannot be set when spec.os.name is windows. - items: - description: Sysctl defines a kernel parameter - to be set - properties: - name: - description: Name of a property to set - type: string - value: - description: Value of a property to set - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - windowsOptions: - description: |- - The Windows specific settings applied to all containers. - If unspecified, the options within a container's SecurityContext will be used. - If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is linux. - properties: - gmsaCredentialSpec: - description: |- - GMSACredentialSpec is where the GMSA admission webhook - (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the - GMSA credential spec named by the GMSACredentialSpecName field. - type: string - gmsaCredentialSpecName: - description: GMSACredentialSpecName is the name - of the GMSA credential spec to use. - type: string - hostProcess: - description: |- - HostProcess determines if a container should be run as a 'Host Process' container. - All of a Pod's containers must have the same effective HostProcess value - (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers). - In addition, if HostProcess is true then HostNetwork must also be set to true. - type: boolean - runAsUserName: - description: |- - The UserName in Windows to run the entrypoint of the container process. - Defaults to the user specified in image metadata if unspecified. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - type: string - type: object - type: object - serviceAccount: - description: |- - DeprecatedServiceAccount is a deprecated alias for ServiceAccountName. - Deprecated: Use serviceAccountName instead. - type: string - serviceAccountName: - description: |- - ServiceAccountName is the name of the ServiceAccount to use to run this pod. - More info: https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/ - type: string - setHostnameAsFQDN: - description: |- - If true the pod's hostname will be configured as the pod's FQDN, rather than the leaf name (the default). - In Linux containers, this means setting the FQDN in the hostname field of the kernel (the nodename field of struct utsname). - type: boolean - shareProcessNamespace: - description: |- - Share a single process namespace between all of the containers in a pod. - When this is set containers will be able to view and signal processes from other containers - in the same pod, and the first process in each container will not be assigned PID 1. - HostPID and ShareProcessNamespace cannot both be set. - Optional: Default to false. - type: boolean - subdomain: - description: |- - If specified, the fully qualified Pod hostname will be "...svc.". - If not specified, the pod will not have a domainname at all. - type: string - terminationGracePeriodSeconds: - description: |- - Optional duration in seconds the pod needs to terminate gracefully. May be decreased in delete request. - Value must be non-negative integer. The value zero indicates stop immediately via - the kill signal (no opportunity to shut down). - If this value is nil, the default grace period will be used instead. - format: int64 - type: integer - tolerations: - description: If specified, the pod's tolerations. - items: - description: |- - The pod this Toleration is attached to tolerates any taint that matches - the triple using the matching operator . - properties: - effect: - description: |- - Effect indicates the taint effect to match. Empty means match all taint effects. - When specified, allowed values are NoSchedule, PreferNoSchedule and NoExecute. - type: string - key: - description: |- - Key is the taint key that the toleration applies to. Empty means match all taint keys. - If the key is empty, operator must be Exists; this combination means to match all values and all keys. - type: string - operator: - description: |- - Operator represents a key's relationship to the value. - Valid operators are Exists and Equal. Defaults to Equal. - Exists is equivalent to wildcard for value, so that a pod can - tolerate all taints of a particular category. - type: string - tolerationSeconds: - description: |- - TolerationSeconds represents the period of time the toleration (which must be - of effect NoExecute, otherwise this field is ignored) tolerates the taint. By default, - it is not set, which means tolerate the taint forever (do not evict). Zero and - negative values will be treated as 0 (evict immediately) by the system. - format: int64 - type: integer - value: - description: |- - Value is the taint value the toleration matches to. - If the operator is Exists, the value should be empty, otherwise just a regular string. - type: string - type: object - type: array - x-kubernetes-list-type: atomic - topologySpreadConstraints: - description: |- - TopologySpreadConstraints describes how a group of pods ought to spread across topology - domains. Scheduler will schedule pods in a way which abides by the constraints. - All topologySpreadConstraints are ANDed. - items: - description: TopologySpreadConstraint specifies how - to spread matching pods among the given topology. - properties: - labelSelector: - description: |- - LabelSelector is used to find matching pods. - Pods that match this label selector are counted to determine the number of pods - in their corresponding topology domain. - properties: - matchExpressions: - description: matchExpressions is a list of - label selector requirements. The requirements - are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the label key that - the selector applies to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - matchLabelKeys: - description: |- - MatchLabelKeys is a set of pod label keys to select the pods over which - spreading will be calculated. The keys are used to lookup values from the - incoming pod labels, those key-value labels are ANDed with labelSelector - to select the group of existing pods over which spreading will be calculated - for the incoming pod. The same key is forbidden to exist in both MatchLabelKeys and LabelSelector. - items: - type: string - type: array - x-kubernetes-list-type: atomic - maxSkew: - description: |- - MaxSkew describes the degree to which pods may be unevenly distributed. - When `whenUnsatisfiable=DoNotSchedule`, it is the maximum permitted difference - between the number of matching pods in the target topology and the global minimum. - The global minimum is the minimum number of matching pods in an eligible domain - or zero if the number of eligible domains is less than MinDomains. - format: int32 - type: integer - minDomains: - description: |- - MinDomains indicates a minimum number of eligible domains. - When the number of eligible domains with matching topology keys is less than minDomains, - Pod Topology Spread treats "global minimum" as 0, and then the calculation of Skew is performed. - And when the number of eligible domains with matching topology keys equals or greater than minDomains, - this value has no effect on scheduling. - format: int32 - type: integer - nodeAffinityPolicy: - description: |- - NodeAffinityPolicy indicates how we will treat Pod's nodeAffinity/nodeSelector - when calculating pod topology spread skew. Options are: - - Honor: only nodes matching nodeAffinity/nodeSelector are included in the calculations. - - Ignore: nodeAffinity/nodeSelector are ignored. All nodes are included in the calculations. - - If this value is nil, the behavior is equivalent to the Honor policy. - type: string - nodeTaintsPolicy: - description: |- - NodeTaintsPolicy indicates how we will treat node taints when calculating - pod topology spread skew. Options are: - - Honor: nodes without taints, along with tainted nodes for which the incoming pod - has a toleration, are included. - - Ignore: node taints are ignored. All nodes are included. - - If this value is nil, the behavior is equivalent to the Ignore policy. - type: string - topologyKey: - description: |- - TopologyKey is the key of node labels. Nodes that have a label with this key - and identical values are considered to be in the same topology. - We consider each as a "bucket", and try to put balanced number - of pods into each bucket. - We define a domain as a particular instance of a topology. - type: string - whenUnsatisfiable: - description: |- - WhenUnsatisfiable indicates how to deal with a pod if it doesn't satisfy - the spread constraint. - - DoNotSchedule (default) tells the scheduler not to schedule it. - - ScheduleAnyway tells the scheduler to schedule the pod in any location, - but giving higher precedence to topologies that would help reduce the - skew. - type: string - required: - - maxSkew - - topologyKey - - whenUnsatisfiable - type: object - type: array - x-kubernetes-list-map-keys: - - topologyKey - - whenUnsatisfiable - x-kubernetes-list-type: map - volumes: - description: |- - List of volumes that can be mounted by containers belonging to the pod. - More info: https://kubernetes.io/docs/concepts/storage/volumes - items: - description: Volume represents a named volume in a - pod that may be accessed by any container in the - pod. - properties: - awsElasticBlockStore: - description: |- - awsElasticBlockStore represents an AWS Disk resource that is attached to a - kubelet's host machine and then exposed to the pod. - More info: https://kubernetes.io/docs/concepts/storage/volumes#awselasticblockstore - properties: - fsType: - description: |- - fsType is the filesystem type of the volume that you want to mount. - Tip: Ensure that the filesystem type is supported by the host operating system. - Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - More info: https://kubernetes.io/docs/concepts/storage/volumes#awselasticblockstore - type: string - partition: - description: |- - partition is the partition in the volume that you want to mount. - If omitted, the default is to mount by volume name. - Examples: For volume /dev/sda1, you specify the partition as "1". - Similarly, the volume partition for /dev/sda is "0" (or you can leave the property empty). - format: int32 - type: integer - readOnly: - description: |- - readOnly value true will force the readOnly setting in VolumeMounts. - More info: https://kubernetes.io/docs/concepts/storage/volumes#awselasticblockstore - type: boolean - volumeID: - description: |- - volumeID is unique ID of the persistent disk resource in AWS (Amazon EBS volume). - More info: https://kubernetes.io/docs/concepts/storage/volumes#awselasticblockstore - type: string - required: - - volumeID - type: object - azureDisk: - description: azureDisk represents an Azure Data - Disk mount on the host and bind mount to the - pod. - properties: - cachingMode: - description: 'cachingMode is the Host Caching - mode: None, Read Only, Read Write.' - type: string - diskName: - description: diskName is the Name of the data - disk in the blob storage - type: string - diskURI: - description: diskURI is the URI of data disk - in the blob storage - type: string - fsType: - default: ext4 - description: |- - fsType is Filesystem type to mount. - Must be a filesystem type supported by the host operating system. - Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - type: string - kind: - description: 'kind expected values are Shared: - multiple blob disks per storage account Dedicated: - single blob disk per storage account Managed: - azure managed data disk (only in managed - availability set). defaults to shared' - type: string - readOnly: - default: false - description: |- - readOnly Defaults to false (read/write). ReadOnly here will force - the ReadOnly setting in VolumeMounts. - type: boolean - required: - - diskName - - diskURI - type: object - azureFile: - description: azureFile represents an Azure File - Service mount on the host and bind mount to - the pod. - properties: - readOnly: - description: |- - readOnly defaults to false (read/write). ReadOnly here will force - the ReadOnly setting in VolumeMounts. - type: boolean - secretName: - description: secretName is the name of secret - that contains Azure Storage Account Name - and Key - type: string - shareName: - description: shareName is the azure share - Name - type: string - required: - - secretName - - shareName - type: object - cephfs: - description: cephFS represents a Ceph FS mount - on the host that shares a pod's lifetime - properties: - monitors: - description: |- - monitors is Required: Monitors is a collection of Ceph monitors - More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it - items: - type: string - type: array - x-kubernetes-list-type: atomic - path: - description: 'path is Optional: Used as the - mounted root, rather than the full Ceph - tree, default is /' - type: string - readOnly: - description: |- - readOnly is Optional: Defaults to false (read/write). ReadOnly here will force - the ReadOnly setting in VolumeMounts. - More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it - type: boolean - secretFile: - description: |- - secretFile is Optional: SecretFile is the path to key ring for User, default is /etc/ceph/user.secret - More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it - type: string - secretRef: - description: |- - secretRef is Optional: SecretRef is reference to the authentication secret for User, default is empty. - More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - type: object - x-kubernetes-map-type: atomic - user: - description: |- - user is optional: User is the rados user name, default is admin - More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it - type: string - required: - - monitors - type: object - cinder: - description: |- - cinder represents a cinder volume attached and mounted on kubelets host machine. - More info: https://examples.k8s.io/mysql-cinder-pd/README.md - properties: - fsType: - description: |- - fsType is the filesystem type to mount. - Must be a filesystem type supported by the host operating system. - Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - More info: https://examples.k8s.io/mysql-cinder-pd/README.md - type: string - readOnly: - description: |- - readOnly defaults to false (read/write). ReadOnly here will force - the ReadOnly setting in VolumeMounts. - More info: https://examples.k8s.io/mysql-cinder-pd/README.md - type: boolean - secretRef: - description: |- - secretRef is optional: points to a secret object containing parameters used to connect - to OpenStack. - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - type: object - x-kubernetes-map-type: atomic - volumeID: - description: |- - volumeID used to identify the volume in cinder. - More info: https://examples.k8s.io/mysql-cinder-pd/README.md - type: string - required: - - volumeID - type: object - configMap: - description: configMap represents a configMap - that should populate this volume - properties: - defaultMode: - description: |- - defaultMode is optional: mode bits used to set permissions on created files by default. - Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. - YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. - Defaults to 0644. - Directories within the path are not affected by this setting. - format: int32 - type: integer - items: - description: |- - items if unspecified, each key-value pair in the Data field of the referenced - ConfigMap will be projected into the volume as a file whose name is the - key and content is the value. If specified, the listed keys will be - projected into the specified paths, and unlisted keys will not be - present. - items: - description: Maps a string key to a path - within a volume. - properties: - key: - description: key is the key to project. - type: string - mode: - description: |- - mode is Optional: mode bits used to set permissions on this file. - Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. - YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. - If not specified, the volume defaultMode will be used. - format: int32 - type: integer - path: - description: |- - path is the relative path of the file to map the key to. - May not be an absolute path. - May not contain the path element '..'. - May not start with the string '..'. - type: string - required: - - key - - path - type: object - type: array - x-kubernetes-list-type: atomic - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: optional specify whether the - ConfigMap or its keys must be defined - type: boolean - type: object - x-kubernetes-map-type: atomic - csi: - description: csi (Container Storage Interface) - represents ephemeral storage that is handled - by certain external CSI drivers (Beta feature). - properties: - driver: - description: |- - driver is the name of the CSI driver that handles this volume. - Consult with your admin for the correct name as registered in the cluster. - type: string - fsType: - description: |- - fsType to mount. Ex. "ext4", "xfs", "ntfs". - If not provided, the empty value is passed to the associated CSI driver - which will determine the default filesystem to apply. - type: string - nodePublishSecretRef: - description: |- - nodePublishSecretRef is a reference to the secret object containing - sensitive information to pass to the CSI driver to complete the CSI - NodePublishVolume and NodeUnpublishVolume calls. - This field is optional, and may be empty if no secret is required. If the - secret object contains more than one secret, all secret references are passed. - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - type: object - x-kubernetes-map-type: atomic - readOnly: - description: |- - readOnly specifies a read-only configuration for the volume. - Defaults to false (read/write). - type: boolean - volumeAttributes: - additionalProperties: - type: string - description: |- - volumeAttributes stores driver-specific properties that are passed to the CSI - driver. Consult your driver's documentation for supported values. - type: object - required: - - driver - type: object - downwardAPI: - description: downwardAPI represents downward API - about the pod that should populate this volume - properties: - defaultMode: - description: |- - Optional: mode bits to use on created files by default. Must be a - Optional: mode bits used to set permissions on created files by default. - Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. - YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. - Defaults to 0644. - Directories within the path are not affected by this setting. - format: int32 - type: integer - items: - description: Items is a list of downward API - volume file - items: - description: DownwardAPIVolumeFile represents - information to create the file containing - the pod field - properties: - fieldRef: - description: 'Required: Selects a field - of the pod: only annotations, labels, - name, namespace and uid are supported.' - properties: - apiVersion: - description: Version of the schema - the FieldPath is written in terms - of, defaults to "v1". - type: string - fieldPath: - description: Path of the field to - select in the specified API version. - type: string - required: - - fieldPath - type: object - x-kubernetes-map-type: atomic - mode: - description: |- - Optional: mode bits used to set permissions on this file, must be an octal value - between 0000 and 0777 or a decimal value between 0 and 511. - YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. - If not specified, the volume defaultMode will be used. - format: int32 - type: integer - path: - description: 'Required: Path is the - relative path name of the file to - be created. Must not be absolute or - contain the ''..'' path. Must be utf-8 - encoded. The first item of the relative - path must not start with ''..''' - type: string - resourceFieldRef: - description: |- - Selects a resource of the container: only resources limits and requests - (limits.cpu, limits.memory, requests.cpu and requests.memory) are currently supported. - properties: - containerName: - description: 'Container name: required - for volumes, optional for env - vars' - type: string - divisor: - anyOf: - - type: integer - - type: string - description: Specifies the output - format of the exposed resources, - defaults to "1" - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - resource: - description: 'Required: resource - to select' - type: string - required: - - resource - type: object - x-kubernetes-map-type: atomic - required: - - path - type: object - type: array - x-kubernetes-list-type: atomic - type: object - emptyDir: - description: |- - emptyDir represents a temporary directory that shares a pod's lifetime. - More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir - properties: - medium: - description: |- - medium represents what type of storage medium should back this directory. - The default is "" which means to use the node's default medium. - Must be an empty string (default) or Memory. - More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir - type: string - sizeLimit: - anyOf: - - type: integer - - type: string - description: |- - sizeLimit is the total amount of local storage required for this EmptyDir volume. - The size limit is also applicable for memory medium. - The maximum usage on memory medium EmptyDir would be the minimum value between - the SizeLimit specified here and the sum of memory limits of all containers in a pod. - The default is nil which means that the limit is undefined. - More info: https://kubernetes. - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - type: object - ephemeral: - description: |- - ephemeral represents a volume that is handled by a cluster storage driver. - The volume's lifecycle is tied to the pod that defines it - it will be created before the pod starts, - and deleted when the pod is removed. - properties: - volumeClaimTemplate: - description: |- - Will be used to create a stand-alone PVC to provision the volume. - The pod in which this EphemeralVolumeSource is embedded will be the - owner of the PVC, i.e. the PVC will be deleted together with the - pod. The name of the PVC will be `-` where - `` is the name from the `PodSpec.Volumes` array - entry. - properties: - metadata: - description: |- - May contain labels and annotations that will be copied into the PVC - when creating it. No other fields are allowed and will be rejected during - validation. - properties: - annotations: - additionalProperties: - type: string - type: object - finalizers: - items: - type: string - type: array - labels: - additionalProperties: - type: string - type: object - name: - type: string - namespace: - type: string - type: object - spec: - description: |- - The specification for the PersistentVolumeClaim. The entire content is - copied unchanged into the PVC that gets created from this - template. The same fields as in a PersistentVolumeClaim - are also valid here. - properties: - accessModes: - description: |- - accessModes contains the desired access modes the volume should have. - More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#access-modes-1 - items: - type: string - type: array - x-kubernetes-list-type: atomic - dataSource: - description: |- - dataSource field can be used to specify either: - * An existing VolumeSnapshot object (snapshot.storage.k8s.io/VolumeSnapshot) - * An existing PVC (PersistentVolumeClaim) - If the provisioner or an external controller can support the specified data source, - it will create a new volume based on the contents of the specified data source. - properties: - apiGroup: - description: |- - APIGroup is the group for the resource being referenced. - If APIGroup is not specified, the specified Kind must be in the core API group. - For any other third-party types, APIGroup is required. - type: string - kind: - description: Kind is the type - of resource being referenced - type: string - name: - description: Name is the name - of resource being referenced - type: string - required: - - kind - - name - type: object - x-kubernetes-map-type: atomic - dataSourceRef: - description: |- - dataSourceRef specifies the object from which to populate the volume with data, if a non-empty - volume is desired. This may be any object from a non-empty API group (non - core object) or a PersistentVolumeClaim object. - When this field is specified, volume binding will only succeed if the type of - the specified object matches some installed volume populator or dynamic - provisioner. - properties: - apiGroup: - description: |- - APIGroup is the group for the resource being referenced. - If APIGroup is not specified, the specified Kind must be in the core API group. - For any other third-party types, APIGroup is required. - type: string - kind: - description: Kind is the type - of resource being referenced - type: string - name: - description: Name is the name - of resource being referenced - type: string - namespace: - description: |- - Namespace is the namespace of resource being referenced - Note that when a namespace is specified, a gateway.networking.k8s.io/ReferenceGrant object is required in the referent namespace to allow that namespace's owner to accept the reference. See the ReferenceGrant documentation for details. - (Alpha) This field requires the CrossNamespaceVolumeDataSource feature gate to be enabled. - type: string - required: - - kind - - name - type: object - resources: - description: |- - resources represents the minimum resources the volume should have. - If RecoverVolumeExpansionFailure feature is enabled users are allowed to specify resource requirements - that are lower than previous value but must still be higher than capacity recorded in the - status field of the claim. - More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#resources - properties: - limits: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: |- - Limits describes the maximum amount of compute resources allowed. - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - type: object - requests: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: |- - Requests describes the minimum amount of compute resources required. - If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, - otherwise to an implementation-defined value. Requests cannot exceed Limits. - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - type: object - type: object - selector: - description: selector is a label query - over volumes to consider for binding. - properties: - matchExpressions: - description: matchExpressions - is a list of label selector - requirements. The requirements - are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the - label key that the selector - applies to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - storageClassName: - description: |- - storageClassName is the name of the StorageClass required by the claim. - More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#class-1 - type: string - volumeAttributesClassName: - description: |- - volumeAttributesClassName may be used to set the VolumeAttributesClass used by this claim. - If specified, the CSI driver will create or update the volume with the attributes defined - in the corresponding VolumeAttributesClass. This has a different purpose than storageClassName, - it can be changed after the claim is created. - type: string - volumeMode: - description: |- - volumeMode defines what type of volume is required by the claim. - Value of Filesystem is implied when not included in claim spec. - type: string - volumeName: - description: volumeName is the binding - reference to the PersistentVolume - backing this claim. - type: string - type: object - required: - - spec - type: object - type: object - fc: - description: fc represents a Fibre Channel resource - that is attached to a kubelet's host machine - and then exposed to the pod. - properties: - fsType: - description: |- - fsType is the filesystem type to mount. - Must be a filesystem type supported by the host operating system. - Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - type: string - lun: - description: 'lun is Optional: FC target lun - number' - format: int32 - type: integer - readOnly: - description: |- - readOnly is Optional: Defaults to false (read/write). ReadOnly here will force - the ReadOnly setting in VolumeMounts. - type: boolean - targetWWNs: - description: 'targetWWNs is Optional: FC target - worldwide names (WWNs)' - items: - type: string - type: array - x-kubernetes-list-type: atomic - wwids: - description: |- - wwids Optional: FC volume world wide identifiers (wwids) - Either wwids or combination of targetWWNs and lun must be set, but not both simultaneously. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - flexVolume: - description: |- - flexVolume represents a generic volume resource that is - provisioned/attached using an exec based plugin. - properties: - driver: - description: driver is the name of the driver - to use for this volume. - type: string - fsType: - description: |- - fsType is the filesystem type to mount. - Must be a filesystem type supported by the host operating system. - Ex. "ext4", "xfs", "ntfs". The default filesystem depends on FlexVolume script. - type: string - options: - additionalProperties: - type: string - description: 'options is Optional: this field - holds extra command options if any.' - type: object - readOnly: - description: |- - readOnly is Optional: defaults to false (read/write). ReadOnly here will force - the ReadOnly setting in VolumeMounts. - type: boolean - secretRef: - description: |- - secretRef is Optional: secretRef is reference to the secret object containing - sensitive information to pass to the plugin scripts. This may be - empty if no secret object is specified. If the secret object - contains more than one secret, all secrets are passed to the plugin - scripts. - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - type: object - x-kubernetes-map-type: atomic - required: - - driver - type: object - flocker: - description: flocker represents a Flocker volume - attached to a kubelet's host machine. This depends - on the Flocker control service being running - properties: - datasetName: - description: |- - datasetName is Name of the dataset stored as metadata -> name on the dataset for Flocker - should be considered as deprecated - type: string - datasetUUID: - description: datasetUUID is the UUID of the - dataset. This is unique identifier of a - Flocker dataset - type: string - type: object - gcePersistentDisk: - description: |- - gcePersistentDisk represents a GCE Disk resource that is attached to a - kubelet's host machine and then exposed to the pod. - More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk - properties: - fsType: - description: |- - fsType is filesystem type of the volume that you want to mount. - Tip: Ensure that the filesystem type is supported by the host operating system. - Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk - type: string - partition: - description: |- - partition is the partition in the volume that you want to mount. - If omitted, the default is to mount by volume name. - Examples: For volume /dev/sda1, you specify the partition as "1". - Similarly, the volume partition for /dev/sda is "0" (or you can leave the property empty). - More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk - format: int32 - type: integer - pdName: - description: |- - pdName is unique name of the PD resource in GCE. Used to identify the disk in GCE. - More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk - type: string - readOnly: - description: |- - readOnly here will force the ReadOnly setting in VolumeMounts. - Defaults to false. - More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk - type: boolean - required: - - pdName - type: object - gitRepo: - description: |- - gitRepo represents a git repository at a particular revision. - DEPRECATED: GitRepo is deprecated. To provision a container with a git repo, mount an - EmptyDir into an InitContainer that clones the repo using git, then mount the EmptyDir - into the Pod's container. - properties: - directory: - description: |- - directory is the target directory name. - Must not contain or start with '..'. If '.' is supplied, the volume directory will be the - git repository. Otherwise, if specified, the volume will contain the git repository in - the subdirectory with the given name. - type: string - repository: - description: repository is the URL - type: string - revision: - description: revision is the commit hash for - the specified revision. - type: string - required: - - repository - type: object - glusterfs: - description: |- - glusterfs represents a Glusterfs mount on the host that shares a pod's lifetime. - More info: https://examples.k8s.io/volumes/glusterfs/README.md - properties: - endpoints: - description: |- - endpoints is the endpoint name that details Glusterfs topology. - More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod - type: string - path: - description: |- - path is the Glusterfs volume path. - More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod - type: string - readOnly: - description: |- - readOnly here will force the Glusterfs volume to be mounted with read-only permissions. - Defaults to false. - More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod - type: boolean - required: - - endpoints - - path - type: object - hostPath: - description: |- - hostPath represents a pre-existing file or directory on the host - machine that is directly exposed to the container. This is generally - used for system agents or other privileged things that are allowed - to see the host machine. Most containers will NOT need this. - More info: https://kubernetes.io/docs/concepts/storage/volumes#hostpath - properties: - path: - description: |- - path of the directory on the host. - If the path is a symlink, it will follow the link to the real path. - More info: https://kubernetes.io/docs/concepts/storage/volumes#hostpath - type: string - type: - description: |- - type for HostPath Volume - Defaults to "" - More info: https://kubernetes.io/docs/concepts/storage/volumes#hostpath - type: string - required: - - path - type: object - image: - description: |- - image represents an OCI object (a container image or artifact) pulled and mounted on the kubelet's host machine. - The volume is resolved at pod startup depending on which PullPolicy value is provided: - - - Always: the kubelet always attempts to pull the reference. Container creation will fail If the pull fails. - - Never: the kubelet never pulls the reference and only uses a local image or artifact. - properties: - pullPolicy: - description: |- - Policy for pulling OCI objects. Possible values are: - Always: the kubelet always attempts to pull the reference. Container creation will fail If the pull fails. - Never: the kubelet never pulls the reference and only uses a local image or artifact. Container creation will fail if the reference isn't present. - IfNotPresent: the kubelet pulls if the reference isn't already present on disk. - type: string - reference: - description: |- - Required: Image or artifact reference to be used. - Behaves in the same way as pod.spec.containers[*].image. - Pull secrets will be assembled in the same way as for the container image by looking up node credentials, SA image pull secrets, and pod spec image pull secrets. - More info: https://kubernetes. - type: string - type: object - iscsi: - description: |- - iscsi represents an ISCSI Disk resource that is attached to a - kubelet's host machine and then exposed to the pod. - More info: https://examples.k8s.io/volumes/iscsi/README.md - properties: - chapAuthDiscovery: - description: chapAuthDiscovery defines whether - support iSCSI Discovery CHAP authentication - type: boolean - chapAuthSession: - description: chapAuthSession defines whether - support iSCSI Session CHAP authentication - type: boolean - fsType: - description: |- - fsType is the filesystem type of the volume that you want to mount. - Tip: Ensure that the filesystem type is supported by the host operating system. - Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - More info: https://kubernetes.io/docs/concepts/storage/volumes#iscsi - type: string - initiatorName: - description: |- - initiatorName is the custom iSCSI Initiator Name. - If initiatorName is specified with iscsiInterface simultaneously, new iSCSI interface - : will be created for the connection. - type: string - iqn: - description: iqn is the target iSCSI Qualified - Name. - type: string - iscsiInterface: - default: default - description: |- - iscsiInterface is the interface Name that uses an iSCSI transport. - Defaults to 'default' (tcp). - type: string - lun: - description: lun represents iSCSI Target Lun - number. - format: int32 - type: integer - portals: - description: |- - portals is the iSCSI Target Portal List. The portal is either an IP or ip_addr:port if the port - is other than default (typically TCP ports 860 and 3260). - items: - type: string - type: array - x-kubernetes-list-type: atomic - readOnly: - description: |- - readOnly here will force the ReadOnly setting in VolumeMounts. - Defaults to false. - type: boolean - secretRef: - description: secretRef is the CHAP Secret - for iSCSI target and initiator authentication - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - type: object - x-kubernetes-map-type: atomic - targetPortal: - description: |- - targetPortal is iSCSI Target Portal. The Portal is either an IP or ip_addr:port if the port - is other than default (typically TCP ports 860 and 3260). - type: string - required: - - iqn - - lun - - targetPortal - type: object - name: - description: |- - name of the volume. - Must be a DNS_LABEL and unique within the pod. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - nfs: - description: |- - nfs represents an NFS mount on the host that shares a pod's lifetime - More info: https://kubernetes.io/docs/concepts/storage/volumes#nfs - properties: - path: - description: |- - path that is exported by the NFS server. - More info: https://kubernetes.io/docs/concepts/storage/volumes#nfs - type: string - readOnly: - description: |- - readOnly here will force the NFS export to be mounted with read-only permissions. - Defaults to false. - More info: https://kubernetes.io/docs/concepts/storage/volumes#nfs - type: boolean - server: - description: |- - server is the hostname or IP address of the NFS server. - More info: https://kubernetes.io/docs/concepts/storage/volumes#nfs - type: string - required: - - path - - server - type: object - persistentVolumeClaim: - description: |- - persistentVolumeClaimVolumeSource represents a reference to a - PersistentVolumeClaim in the same namespace. - More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#persistentvolumeclaims - properties: - claimName: - description: |- - claimName is the name of a PersistentVolumeClaim in the same namespace as the pod using this volume. - More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#persistentvolumeclaims - type: string - readOnly: - description: |- - readOnly Will force the ReadOnly setting in VolumeMounts. - Default false. - type: boolean - required: - - claimName - type: object - photonPersistentDisk: - description: photonPersistentDisk represents a - PhotonController persistent disk attached and - mounted on kubelets host machine - properties: - fsType: - description: |- - fsType is the filesystem type to mount. - Must be a filesystem type supported by the host operating system. - Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - type: string - pdID: - description: pdID is the ID that identifies - Photon Controller persistent disk - type: string - required: - - pdID - type: object - portworxVolume: - description: portworxVolume represents a portworx - volume attached and mounted on kubelets host - machine - properties: - fsType: - description: |- - fSType represents the filesystem type to mount - Must be a filesystem type supported by the host operating system. - Ex. "ext4", "xfs". Implicitly inferred to be "ext4" if unspecified. - type: string - readOnly: - description: |- - readOnly defaults to false (read/write). ReadOnly here will force - the ReadOnly setting in VolumeMounts. - type: boolean - volumeID: - description: volumeID uniquely identifies - a Portworx volume - type: string - required: - - volumeID - type: object - projected: - description: projected items for all in one resources - secrets, configmaps, and downward API - properties: - defaultMode: - description: |- - defaultMode are the mode bits used to set permissions on created files by default. - Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. - YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. - Directories within the path are not affected by this setting. - format: int32 - type: integer - sources: - description: |- - sources is the list of volume projections. Each entry in this list - handles one source. - items: - description: |- - Projection that may be projected along with other supported volume types. - Exactly one of these fields must be set. - properties: - clusterTrustBundle: - description: |- - ClusterTrustBundle allows a pod to access the `.spec.trustBundle` field - of ClusterTrustBundle objects in an auto-updating file. - - Alpha, gated by the ClusterTrustBundleProjection feature gate. - - ClusterTrustBundle objects can either be selected by name, or by the - combination of signer name and a label selector. - properties: - labelSelector: - description: |- - Select all ClusterTrustBundles that match this label selector. Only has - effect if signerName is set. Mutually-exclusive with name. If unset, - interpreted as "match nothing". If set but empty, interpreted as "match - everything". - properties: - matchExpressions: - description: matchExpressions - is a list of label selector - requirements. The requirements - are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the - label key that the selector - applies to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - name: - description: |- - Select a single ClusterTrustBundle by object name. Mutually-exclusive - with signerName and labelSelector. - type: string - optional: - description: |- - If true, don't block pod startup if the referenced ClusterTrustBundle(s) - aren't available. If using name, then the named ClusterTrustBundle is - allowed not to exist. If using signerName, then the combination of - signerName and labelSelector is allowed to match zero - ClusterTrustBundles. - type: boolean - path: - description: Relative path from - the volume root to write the bundle. - type: string - signerName: - description: |- - Select all ClusterTrustBundles that match this signer name. - Mutually-exclusive with name. The contents of all selected - ClusterTrustBundles will be unified and deduplicated. - type: string - required: - - path - type: object - configMap: - description: configMap information about - the configMap data to project - properties: - items: - description: |- - items if unspecified, each key-value pair in the Data field of the referenced - ConfigMap will be projected into the volume as a file whose name is the - key and content is the value. If specified, the listed keys will be - projected into the specified paths, and unlisted keys will not be - present. - items: - description: Maps a string key - to a path within a volume. - properties: - key: - description: key is the key - to project. - type: string - mode: - description: |- - mode is Optional: mode bits used to set permissions on this file. - Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. - YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. - If not specified, the volume defaultMode will be used. - format: int32 - type: integer - path: - description: |- - path is the relative path of the file to map the key to. - May not be an absolute path. - May not contain the path element '..'. - May not start with the string '..'. - type: string - required: - - key - - path - type: object - type: array - x-kubernetes-list-type: atomic - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: optional specify whether - the ConfigMap or its keys must - be defined - type: boolean - type: object - x-kubernetes-map-type: atomic - downwardAPI: - description: downwardAPI information - about the downwardAPI data to project - properties: - items: - description: Items is a list of - DownwardAPIVolume file - items: - description: DownwardAPIVolumeFile - represents information to create - the file containing the pod - field - properties: - fieldRef: - description: 'Required: Selects - a field of the pod: only - annotations, labels, name, - namespace and uid are supported.' - properties: - apiVersion: - description: Version of - the schema the FieldPath - is written in terms - of, defaults to "v1". - type: string - fieldPath: - description: Path of the - field to select in the - specified API version. - type: string - required: - - fieldPath - type: object - x-kubernetes-map-type: atomic - mode: - description: |- - Optional: mode bits used to set permissions on this file, must be an octal value - between 0000 and 0777 or a decimal value between 0 and 511. - YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. - If not specified, the volume defaultMode will be used. - format: int32 - type: integer - path: - description: 'Required: Path - is the relative path name - of the file to be created. - Must not be absolute or - contain the ''..'' path. - Must be utf-8 encoded. The - first item of the relative - path must not start with - ''..''' - type: string - resourceFieldRef: - description: |- - Selects a resource of the container: only resources limits and requests - (limits.cpu, limits.memory, requests.cpu and requests.memory) are currently supported. - properties: - containerName: - description: 'Container - name: required for volumes, - optional for env vars' - type: string - divisor: - anyOf: - - type: integer - - type: string - description: Specifies - the output format of - the exposed resources, - defaults to "1" - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - resource: - description: 'Required: - resource to select' - type: string - required: - - resource - type: object - x-kubernetes-map-type: atomic - required: - - path - type: object - type: array - x-kubernetes-list-type: atomic - type: object - secret: - description: secret information about - the secret data to project - properties: - items: - description: |- - items if unspecified, each key-value pair in the Data field of the referenced - Secret will be projected into the volume as a file whose name is the - key and content is the value. If specified, the listed keys will be - projected into the specified paths, and unlisted keys will not be - present. - items: - description: Maps a string key - to a path within a volume. - properties: - key: - description: key is the key - to project. - type: string - mode: - description: |- - mode is Optional: mode bits used to set permissions on this file. - Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. - YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. - If not specified, the volume defaultMode will be used. - format: int32 - type: integer - path: - description: |- - path is the relative path of the file to map the key to. - May not be an absolute path. - May not contain the path element '..'. - May not start with the string '..'. - type: string - required: - - key - - path - type: object - type: array - x-kubernetes-list-type: atomic - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: optional field specify - whether the Secret or its key - must be defined - type: boolean - type: object - x-kubernetes-map-type: atomic - serviceAccountToken: - description: serviceAccountToken is - information about the serviceAccountToken - data to project - properties: - audience: - description: |- - audience is the intended audience of the token. A recipient of a token - must identify itself with an identifier specified in the audience of the - token, and otherwise should reject the token. The audience defaults to the - identifier of the apiserver. - type: string - expirationSeconds: - description: |- - expirationSeconds is the requested duration of validity of the service - account token. As the token approaches expiration, the kubelet volume - plugin will proactively rotate the service account token. The kubelet will - start trying to rotate the token if the token is older than 80 percent of - its time to live or if the token is older than 24 hours.Defaults to 1 hour - and must be at least 10 minutes. - format: int64 - type: integer - path: - description: |- - path is the path relative to the mount point of the file to project the - token into. - type: string - required: - - path - type: object - type: object - type: array - x-kubernetes-list-type: atomic - type: object - quobyte: - description: quobyte represents a Quobyte mount - on the host that shares a pod's lifetime - properties: - group: - description: |- - group to map volume access to - Default is no group - type: string - readOnly: - description: |- - readOnly here will force the Quobyte volume to be mounted with read-only permissions. - Defaults to false. - type: boolean - registry: - description: |- - registry represents a single or multiple Quobyte Registry services - specified as a string as host:port pair (multiple entries are separated with commas) - which acts as the central registry for volumes - type: string - tenant: - description: |- - tenant owning the given Quobyte volume in the Backend - Used with dynamically provisioned Quobyte volumes, value is set by the plugin - type: string - user: - description: |- - user to map volume access to - Defaults to serivceaccount user - type: string - volume: - description: volume is a string that references - an already created Quobyte volume by name. - type: string - required: - - registry - - volume - type: object - rbd: - description: |- - rbd represents a Rados Block Device mount on the host that shares a pod's lifetime. - More info: https://examples.k8s.io/volumes/rbd/README.md - properties: - fsType: - description: |- - fsType is the filesystem type of the volume that you want to mount. - Tip: Ensure that the filesystem type is supported by the host operating system. - Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - More info: https://kubernetes.io/docs/concepts/storage/volumes#rbd - type: string - image: - description: |- - image is the rados image name. - More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it - type: string - keyring: - default: /etc/ceph/keyring - description: |- - keyring is the path to key ring for RBDUser. - Default is /etc/ceph/keyring. - More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it - type: string - monitors: - description: |- - monitors is a collection of Ceph monitors. - More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it - items: - type: string - type: array - x-kubernetes-list-type: atomic - pool: - default: rbd - description: |- - pool is the rados pool name. - Default is rbd. - More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it - type: string - readOnly: - description: |- - readOnly here will force the ReadOnly setting in VolumeMounts. - Defaults to false. - More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it - type: boolean - secretRef: - description: |- - secretRef is name of the authentication secret for RBDUser. If provided - overrides keyring. - Default is nil. - More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - type: object - x-kubernetes-map-type: atomic - user: - default: admin - description: |- - user is the rados user name. - Default is admin. - More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it - type: string - required: - - image - - monitors - type: object - scaleIO: - description: scaleIO represents a ScaleIO persistent - volume attached and mounted on Kubernetes nodes. - properties: - fsType: - default: xfs - description: |- - fsType is the filesystem type to mount. - Must be a filesystem type supported by the host operating system. - Ex. "ext4", "xfs", "ntfs". - Default is "xfs". - type: string - gateway: - description: gateway is the host address of - the ScaleIO API Gateway. - type: string - protectionDomain: - description: protectionDomain is the name - of the ScaleIO Protection Domain for the - configured storage. - type: string - readOnly: - description: |- - readOnly Defaults to false (read/write). ReadOnly here will force - the ReadOnly setting in VolumeMounts. - type: boolean - secretRef: - description: |- - secretRef references to the secret for ScaleIO user and other - sensitive information. If this is not provided, Login operation will fail. - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - type: object - x-kubernetes-map-type: atomic - sslEnabled: - description: sslEnabled Flag enable/disable - SSL communication with Gateway, default - false - type: boolean - storageMode: - default: ThinProvisioned - description: |- - storageMode indicates whether the storage for a volume should be ThickProvisioned or ThinProvisioned. - Default is ThinProvisioned. - type: string - storagePool: - description: storagePool is the ScaleIO Storage - Pool associated with the protection domain. - type: string - system: - description: system is the name of the storage - system as configured in ScaleIO. - type: string - volumeName: - description: |- - volumeName is the name of a volume already created in the ScaleIO system - that is associated with this volume source. - type: string - required: - - gateway - - secretRef - - system - type: object - secret: - description: |- - secret represents a secret that should populate this volume. - More info: https://kubernetes.io/docs/concepts/storage/volumes#secret - properties: - defaultMode: - description: |- - defaultMode is Optional: mode bits used to set permissions on created files by default. - Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. - YAML accepts both octal and decimal values, JSON requires decimal values - for mode bits. Defaults to 0644. - Directories within the path are not affected by this setting. - format: int32 - type: integer - items: - description: |- - items If unspecified, each key-value pair in the Data field of the referenced - Secret will be projected into the volume as a file whose name is the - key and content is the value. If specified, the listed keys will be - projected into the specified paths, and unlisted keys will not be - present. - items: - description: Maps a string key to a path - within a volume. - properties: - key: - description: key is the key to project. - type: string - mode: - description: |- - mode is Optional: mode bits used to set permissions on this file. - Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. - YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. - If not specified, the volume defaultMode will be used. - format: int32 - type: integer - path: - description: |- - path is the relative path of the file to map the key to. - May not be an absolute path. - May not contain the path element '..'. - May not start with the string '..'. - type: string - required: - - key - - path - type: object - type: array - x-kubernetes-list-type: atomic - optional: - description: optional field specify whether - the Secret or its keys must be defined - type: boolean - secretName: - description: |- - secretName is the name of the secret in the pod's namespace to use. - More info: https://kubernetes.io/docs/concepts/storage/volumes#secret - type: string - type: object - storageos: - description: storageOS represents a StorageOS - volume attached and mounted on Kubernetes nodes. - properties: - fsType: - description: |- - fsType is the filesystem type to mount. - Must be a filesystem type supported by the host operating system. - Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - type: string - readOnly: - description: |- - readOnly defaults to false (read/write). ReadOnly here will force - the ReadOnly setting in VolumeMounts. - type: boolean - secretRef: - description: |- - secretRef specifies the secret to use for obtaining the StorageOS API - credentials. If not specified, default values will be attempted. - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - type: object - x-kubernetes-map-type: atomic - volumeName: - description: |- - volumeName is the human-readable name of the StorageOS volume. Volume - names are only unique within a namespace. - type: string - volumeNamespace: - description: |- - volumeNamespace specifies the scope of the volume within StorageOS. If no - namespace is specified then the Pod's namespace will be used. This allows the - Kubernetes name scoping to be mirrored within StorageOS for tighter integration. - Set VolumeName to any name to override the default behaviour. - Set to "default" if you are not using namespaces within StorageOS. - type: string - type: object - vsphereVolume: - description: vsphereVolume represents a vSphere - volume attached and mounted on kubelets host - machine - properties: - fsType: - description: |- - fsType is filesystem type to mount. - Must be a filesystem type supported by the host operating system. - Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - type: string - storagePolicyID: - description: storagePolicyID is the storage - Policy Based Management (SPBM) profile ID - associated with the StoragePolicyName. - type: string - storagePolicyName: - description: storagePolicyName is the storage - Policy Based Management (SPBM) profile name. - type: string - volumePath: - description: volumePath is the path that identifies - vSphere volume vmdk - type: string - required: - - volumePath - type: object - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - required: - - containers - type: object - type: object - type: object - description: |- - A map of PyTorchReplicaType (type) to ReplicaSpec (value). Specifies the PyTorch cluster configuration. - For example, - { - "Master": PyTorchReplicaSpec, - "Worker": PyTorchReplicaSpec, - } - type: object - runPolicy: - description: |- - RunPolicy encapsulates various runtime policies of the distributed training - job, for example how to clean up resources and how long the job can stay - active. - properties: - activeDeadlineSeconds: - description: |- - Specifies the duration in seconds relative to the startTime that the job may be active - before the system tries to terminate it; value must be positive integer. - format: int64 - type: integer - backoffLimit: - description: Optional number of retries before marking this job - failed. - format: int32 - type: integer - cleanPodPolicy: - description: |- - CleanPodPolicy defines the policy to kill pods after the job completes. - Default to None. - type: string - managedBy: - description: |- - ManagedBy is used to indicate the controller or entity that manages a job. - The value must be either an empty, 'kubeflow.org/training-operator' or - 'kueue.x-k8s.io/multikueue'. - The training-operator reconciles a job which doesn't have this - field at all or the field value is the reserved string - 'kubeflow.org/training-operator', but delegates reconciling the job - with 'kueue.x-k8s. - type: string - schedulingPolicy: - description: SchedulingPolicy defines the policy related to scheduling, - e.g. gang-scheduling - properties: - minAvailable: - format: int32 - type: integer - minResources: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - type: object - priorityClass: - type: string - queue: - type: string - x-kubernetes-validations: - - message: spec.runPolicy.schedulingPolicy.queue is immutable - rule: self == oldSelf - scheduleTimeoutSeconds: - format: int32 - type: integer - type: object - suspend: - default: false - description: |- - suspend specifies whether the Job controller should create Pods or not. - If a Job is created with suspend set to true, no Pods are created by - the Job controller. If a Job is suspended after creation (i.e. the - flag goes from false to true), the Job controller will delete all - active Pods and PodGroups associated with this Job. - Users must design their workload to gracefully handle this. - type: boolean - ttlSecondsAfterFinished: - description: |- - TTLSecondsAfterFinished is the TTL to clean up jobs. - It may take extra ReconcilePeriod seconds for the cleanup, since - reconcile gets called periodically. - Default to infinite. - format: int32 - type: integer - type: object - required: - - pytorchReplicaSpecs - type: object - status: - description: |- - Most recently observed status of the PyTorchJob. - Read-only (modified by the system). - properties: - completionTime: - description: |- - Represents time when the job was completed. It is not guaranteed to - be set in happens-before order across separate operations. - It is represented in RFC3339 form and is in UTC. - format: date-time - type: string - conditions: - description: Conditions is an array of current observed job conditions. - items: - description: JobCondition describes the state of the job at a certain - point. - properties: - lastTransitionTime: - description: Last time the condition transitioned from one status - to another. - format: date-time - type: string - lastUpdateTime: - description: The last time this condition was updated. - format: date-time - type: string - message: - description: A human readable message indicating details about - the transition. - type: string - reason: - description: The reason for the condition's last transition. - type: string - status: - description: Status of the condition, one of True, False, Unknown. - type: string - type: - description: Type of job condition. - type: string - required: - - status - - type - type: object - type: array - lastReconcileTime: - description: |- - Represents last time when the job was reconciled. It is not guaranteed to - be set in happens-before order across separate operations. - It is represented in RFC3339 form and is in UTC. - format: date-time - type: string - replicaStatuses: - additionalProperties: - description: ReplicaStatus represents the current observed state - of the replica. - properties: - active: - description: The number of actively running pods. - format: int32 - type: integer - failed: - description: The number of pods which reached phase Failed. - format: int32 - type: integer - labelSelector: - description: 'Deprecated: Use Selector instead' - properties: - matchExpressions: - description: matchExpressions is a list of label selector - requirements. The requirements are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the label key that the selector - applies to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - selector: - description: |- - A Selector is a label query over a set of resources. The result of matchLabels and - matchExpressions are ANDed. An empty Selector matches all objects. A null - Selector matches no objects. - type: string - succeeded: - description: The number of pods which reached phase Succeeded. - format: int32 - type: integer - type: object - description: |- - ReplicaStatuses is map of ReplicaType and ReplicaStatus, - specifies the status of each replica. - type: object - startTime: - description: |- - Represents time when the job was acknowledged by the job controller. - It is not guaranteed to be set in happens-before order across separate operations. - It is represented in RFC3339 form and is in UTC. - format: date-time - type: string - type: object - type: object - served: true - storage: true - subresources: - scale: - labelSelectorPath: .status.replicaStatuses.Worker.selector - specReplicasPath: .spec.pytorchReplicaSpecs.Worker.replicas - statusReplicasPath: .status.replicaStatuses.Worker.active - status: {} diff --git a/manifests/base/crds/kubeflow.org_tfjobs.yaml b/manifests/base/crds/kubeflow.org_tfjobs.yaml deleted file mode 100644 index a5e75eea9e..0000000000 --- a/manifests/base/crds/kubeflow.org_tfjobs.yaml +++ /dev/null @@ -1,7907 +0,0 @@ ---- -apiVersion: apiextensions.k8s.io/v1 -kind: CustomResourceDefinition -metadata: - annotations: - controller-gen.kubebuilder.io/version: v0.16.5 - name: tfjobs.kubeflow.org -spec: - group: kubeflow.org - names: - kind: TFJob - listKind: TFJobList - plural: tfjobs - singular: tfjob - scope: Namespaced - versions: - - additionalPrinterColumns: - - jsonPath: .status.conditions[-1:].type - name: State - type: string - - jsonPath: .metadata.creationTimestamp - name: Age - type: date - name: v1 - schema: - openAPIV3Schema: - description: TFJob represents a TFJob resource. - properties: - apiVersion: - description: |- - APIVersion defines the versioned schema of this representation of an object. - Servers should convert recognized schemas to the latest internal value, and - may reject unrecognized values. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources - type: string - kind: - description: |- - Kind is a string value representing the REST resource this object represents. - Servers may infer this from the endpoint the client submits requests to. - Cannot be updated. - In CamelCase. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds - type: string - metadata: - type: object - spec: - description: Specification of the desired state of the TFJob. - properties: - enableDynamicWorker: - description: A switch to enable dynamic worker - type: boolean - runPolicy: - description: |- - RunPolicy encapsulates various runtime policies of the distributed training - job, for example how to clean up resources and how long the job can stay - active. - properties: - activeDeadlineSeconds: - description: |- - Specifies the duration in seconds relative to the startTime that the job may be active - before the system tries to terminate it; value must be positive integer. - format: int64 - type: integer - backoffLimit: - description: Optional number of retries before marking this job - failed. - format: int32 - type: integer - cleanPodPolicy: - description: |- - CleanPodPolicy defines the policy to kill pods after the job completes. - Default to None. - type: string - managedBy: - description: |- - ManagedBy is used to indicate the controller or entity that manages a job. - The value must be either an empty, 'kubeflow.org/training-operator' or - 'kueue.x-k8s.io/multikueue'. - The training-operator reconciles a job which doesn't have this - field at all or the field value is the reserved string - 'kubeflow.org/training-operator', but delegates reconciling the job - with 'kueue.x-k8s. - type: string - schedulingPolicy: - description: SchedulingPolicy defines the policy related to scheduling, - e.g. gang-scheduling - properties: - minAvailable: - format: int32 - type: integer - minResources: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - type: object - priorityClass: - type: string - queue: - type: string - x-kubernetes-validations: - - message: spec.runPolicy.schedulingPolicy.queue is immutable - rule: self == oldSelf - scheduleTimeoutSeconds: - format: int32 - type: integer - type: object - suspend: - default: false - description: |- - suspend specifies whether the Job controller should create Pods or not. - If a Job is created with suspend set to true, no Pods are created by - the Job controller. If a Job is suspended after creation (i.e. the - flag goes from false to true), the Job controller will delete all - active Pods and PodGroups associated with this Job. - Users must design their workload to gracefully handle this. - type: boolean - ttlSecondsAfterFinished: - description: |- - TTLSecondsAfterFinished is the TTL to clean up jobs. - It may take extra ReconcilePeriod seconds for the cleanup, since - reconcile gets called periodically. - Default to infinite. - format: int32 - type: integer - type: object - successPolicy: - description: |- - SuccessPolicy defines the policy to mark the TFJob as succeeded. - Default to "", using the default rules. - type: string - tfReplicaSpecs: - additionalProperties: - description: ReplicaSpec is a description of the replica - properties: - replicas: - description: |- - Replicas is the desired number of replicas of the given template. - If unspecified, defaults to 1. - format: int32 - type: integer - restartPolicy: - description: |- - Restart policy for all replicas within the job. - One of Always, OnFailure, Never and ExitCode. - Default to Never. - type: string - template: - description: |- - Template is the object that describes the pod that - will be created for this replica. RestartPolicy in PodTemplateSpec - will be overide by RestartPolicy in ReplicaSpec - properties: - metadata: - description: |- - Standard object's metadata. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#metadata - properties: - annotations: - additionalProperties: - type: string - type: object - finalizers: - items: - type: string - type: array - labels: - additionalProperties: - type: string - type: object - name: - type: string - namespace: - type: string - type: object - spec: - description: |- - Specification of the desired behavior of the pod. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#spec-and-status - properties: - activeDeadlineSeconds: - description: |- - Optional duration in seconds the pod may be active on the node relative to - StartTime before the system will actively try to mark it failed and kill associated containers. - Value must be a positive integer. - format: int64 - type: integer - affinity: - description: If specified, the pod's scheduling constraints - properties: - nodeAffinity: - description: Describes node affinity scheduling - rules for the pod. - properties: - preferredDuringSchedulingIgnoredDuringExecution: - description: |- - The scheduler will prefer to schedule pods to nodes that satisfy - the affinity expressions specified by this field, but it may choose - a node that violates one or more of the expressions. The node that is - most preferred is the one with the greatest sum of weights, i.e. - for each node that meets all of the scheduling requirements (resource - request, requiredDuringScheduling affinity expressions, etc. - items: - description: |- - An empty preferred scheduling term matches all objects with implicit weight 0 - (i.e. it's a no-op). A null preferred scheduling term matches no objects (i.e. is also a no-op). - properties: - preference: - description: A node selector term, associated - with the corresponding weight. - properties: - matchExpressions: - description: A list of node selector - requirements by node's labels. - items: - description: |- - A node selector requirement is a selector that contains values, a key, and an operator - that relates the key and values. - properties: - key: - description: The label key that - the selector applies to. - type: string - operator: - description: |- - Represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. - type: string - values: - description: |- - An array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. If the operator is Gt or Lt, the values - array must have a single element, which will be interpreted as an integer. - This array is replaced during a strategic merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchFields: - description: A list of node selector - requirements by node's fields. - items: - description: |- - A node selector requirement is a selector that contains values, a key, and an operator - that relates the key and values. - properties: - key: - description: The label key that - the selector applies to. - type: string - operator: - description: |- - Represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. - type: string - values: - description: |- - An array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. If the operator is Gt or Lt, the values - array must have a single element, which will be interpreted as an integer. - This array is replaced during a strategic merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - type: object - x-kubernetes-map-type: atomic - weight: - description: Weight associated with matching - the corresponding nodeSelectorTerm, - in the range 1-100. - format: int32 - type: integer - required: - - preference - - weight - type: object - type: array - x-kubernetes-list-type: atomic - requiredDuringSchedulingIgnoredDuringExecution: - description: |- - If the affinity requirements specified by this field are not met at - scheduling time, the pod will not be scheduled onto the node. - If the affinity requirements specified by this field cease to be met - at some point during pod execution (e.g. due to an update), the system - may or may not try to eventually evict the pod from its node. - properties: - nodeSelectorTerms: - description: Required. A list of node selector - terms. The terms are ORed. - items: - description: |- - A null or empty node selector term matches no objects. The requirements of - them are ANDed. - The TopologySelectorTerm type implements a subset of the NodeSelectorTerm. - properties: - matchExpressions: - description: A list of node selector - requirements by node's labels. - items: - description: |- - A node selector requirement is a selector that contains values, a key, and an operator - that relates the key and values. - properties: - key: - description: The label key that - the selector applies to. - type: string - operator: - description: |- - Represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. - type: string - values: - description: |- - An array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. If the operator is Gt or Lt, the values - array must have a single element, which will be interpreted as an integer. - This array is replaced during a strategic merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchFields: - description: A list of node selector - requirements by node's fields. - items: - description: |- - A node selector requirement is a selector that contains values, a key, and an operator - that relates the key and values. - properties: - key: - description: The label key that - the selector applies to. - type: string - operator: - description: |- - Represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. - type: string - values: - description: |- - An array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. If the operator is Gt or Lt, the values - array must have a single element, which will be interpreted as an integer. - This array is replaced during a strategic merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - type: object - x-kubernetes-map-type: atomic - type: array - x-kubernetes-list-type: atomic - required: - - nodeSelectorTerms - type: object - x-kubernetes-map-type: atomic - type: object - podAffinity: - description: Describes pod affinity scheduling rules - (e.g. co-locate this pod in the same node, zone, - etc. as some other pod(s)). - properties: - preferredDuringSchedulingIgnoredDuringExecution: - description: |- - The scheduler will prefer to schedule pods to nodes that satisfy - the affinity expressions specified by this field, but it may choose - a node that violates one or more of the expressions. The node that is - most preferred is the one with the greatest sum of weights, i.e. - for each node that meets all of the scheduling requirements (resource - request, requiredDuringScheduling affinity expressions, etc. - items: - description: The weights of all of the matched - WeightedPodAffinityTerm fields are added - per-node to find the most preferred node(s) - properties: - podAffinityTerm: - description: Required. A pod affinity - term, associated with the corresponding - weight. - properties: - labelSelector: - description: |- - A label query over a set of resources, in this case pods. - If it's null, this PodAffinityTerm matches with no Pods. - properties: - matchExpressions: - description: matchExpressions - is a list of label selector - requirements. The requirements - are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the - label key that the selector - applies to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - matchLabelKeys: - description: |- - MatchLabelKeys is a set of pod label keys to select which pods will - be taken into consideration. The keys are used to lookup values from the - incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` - to select the group of existing pods which pods will be taken into consideration - for the incoming pod's pod (anti) affinity. - items: - type: string - type: array - x-kubernetes-list-type: atomic - mismatchLabelKeys: - description: |- - MismatchLabelKeys is a set of pod label keys to select which pods will - be taken into consideration. The keys are used to lookup values from the - incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` - to select the group of existing pods which pods will be taken into consideration - for the incoming pod's pod (anti) affinity. - items: - type: string - type: array - x-kubernetes-list-type: atomic - namespaceSelector: - description: |- - A label query over the set of namespaces that the term applies to. - The term is applied to the union of the namespaces selected by this field - and the ones listed in the namespaces field. - null selector and null or empty namespaces list means "this pod's namespace". - An empty selector ({}) matches all namespaces. - properties: - matchExpressions: - description: matchExpressions - is a list of label selector - requirements. The requirements - are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the - label key that the selector - applies to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - namespaces: - description: |- - namespaces specifies a static list of namespace names that the term applies to. - The term is applied to the union of the namespaces listed in this field - and the ones selected by namespaceSelector. - null or empty namespaces list and null namespaceSelector means "this pod's namespace". - items: - type: string - type: array - x-kubernetes-list-type: atomic - topologyKey: - description: |- - This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching - the labelSelector in the specified namespaces, where co-located is defined as running on a node - whose value of the label with key topologyKey matches that of any node on which any of the - selected pods is running. - Empty topologyKey is not allowed. - type: string - required: - - topologyKey - type: object - weight: - description: |- - weight associated with matching the corresponding podAffinityTerm, - in the range 1-100. - format: int32 - type: integer - required: - - podAffinityTerm - - weight - type: object - type: array - x-kubernetes-list-type: atomic - requiredDuringSchedulingIgnoredDuringExecution: - description: |- - If the affinity requirements specified by this field are not met at - scheduling time, the pod will not be scheduled onto the node. - If the affinity requirements specified by this field cease to be met - at some point during pod execution (e.g. due to a pod label update), the - system may or may not try to eventually evict the pod from its node. - items: - description: |- - Defines a set of pods (namely those matching the labelSelector - relative to the given namespace(s)) that this pod should be - co-located (affinity) or not co-located (anti-affinity) with, - where co-located is defined as running on a node whose value of - the label with key matches that of any node on which - a pod of the set of pods is running - properties: - labelSelector: - description: |- - A label query over a set of resources, in this case pods. - If it's null, this PodAffinityTerm matches with no Pods. - properties: - matchExpressions: - description: matchExpressions is a - list of label selector requirements. - The requirements are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the label - key that the selector applies - to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - matchLabelKeys: - description: |- - MatchLabelKeys is a set of pod label keys to select which pods will - be taken into consideration. The keys are used to lookup values from the - incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` - to select the group of existing pods which pods will be taken into consideration - for the incoming pod's pod (anti) affinity. - items: - type: string - type: array - x-kubernetes-list-type: atomic - mismatchLabelKeys: - description: |- - MismatchLabelKeys is a set of pod label keys to select which pods will - be taken into consideration. The keys are used to lookup values from the - incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` - to select the group of existing pods which pods will be taken into consideration - for the incoming pod's pod (anti) affinity. - items: - type: string - type: array - x-kubernetes-list-type: atomic - namespaceSelector: - description: |- - A label query over the set of namespaces that the term applies to. - The term is applied to the union of the namespaces selected by this field - and the ones listed in the namespaces field. - null selector and null or empty namespaces list means "this pod's namespace". - An empty selector ({}) matches all namespaces. - properties: - matchExpressions: - description: matchExpressions is a - list of label selector requirements. - The requirements are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the label - key that the selector applies - to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - namespaces: - description: |- - namespaces specifies a static list of namespace names that the term applies to. - The term is applied to the union of the namespaces listed in this field - and the ones selected by namespaceSelector. - null or empty namespaces list and null namespaceSelector means "this pod's namespace". - items: - type: string - type: array - x-kubernetes-list-type: atomic - topologyKey: - description: |- - This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching - the labelSelector in the specified namespaces, where co-located is defined as running on a node - whose value of the label with key topologyKey matches that of any node on which any of the - selected pods is running. - Empty topologyKey is not allowed. - type: string - required: - - topologyKey - type: object - type: array - x-kubernetes-list-type: atomic - type: object - podAntiAffinity: - description: Describes pod anti-affinity scheduling - rules (e.g. avoid putting this pod in the same - node, zone, etc. as some other pod(s)). - properties: - preferredDuringSchedulingIgnoredDuringExecution: - description: |- - The scheduler will prefer to schedule pods to nodes that satisfy - the anti-affinity expressions specified by this field, but it may choose - a node that violates one or more of the expressions. The node that is - most preferred is the one with the greatest sum of weights, i.e. - items: - description: The weights of all of the matched - WeightedPodAffinityTerm fields are added - per-node to find the most preferred node(s) - properties: - podAffinityTerm: - description: Required. A pod affinity - term, associated with the corresponding - weight. - properties: - labelSelector: - description: |- - A label query over a set of resources, in this case pods. - If it's null, this PodAffinityTerm matches with no Pods. - properties: - matchExpressions: - description: matchExpressions - is a list of label selector - requirements. The requirements - are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the - label key that the selector - applies to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - matchLabelKeys: - description: |- - MatchLabelKeys is a set of pod label keys to select which pods will - be taken into consideration. The keys are used to lookup values from the - incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` - to select the group of existing pods which pods will be taken into consideration - for the incoming pod's pod (anti) affinity. - items: - type: string - type: array - x-kubernetes-list-type: atomic - mismatchLabelKeys: - description: |- - MismatchLabelKeys is a set of pod label keys to select which pods will - be taken into consideration. The keys are used to lookup values from the - incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` - to select the group of existing pods which pods will be taken into consideration - for the incoming pod's pod (anti) affinity. - items: - type: string - type: array - x-kubernetes-list-type: atomic - namespaceSelector: - description: |- - A label query over the set of namespaces that the term applies to. - The term is applied to the union of the namespaces selected by this field - and the ones listed in the namespaces field. - null selector and null or empty namespaces list means "this pod's namespace". - An empty selector ({}) matches all namespaces. - properties: - matchExpressions: - description: matchExpressions - is a list of label selector - requirements. The requirements - are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the - label key that the selector - applies to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - namespaces: - description: |- - namespaces specifies a static list of namespace names that the term applies to. - The term is applied to the union of the namespaces listed in this field - and the ones selected by namespaceSelector. - null or empty namespaces list and null namespaceSelector means "this pod's namespace". - items: - type: string - type: array - x-kubernetes-list-type: atomic - topologyKey: - description: |- - This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching - the labelSelector in the specified namespaces, where co-located is defined as running on a node - whose value of the label with key topologyKey matches that of any node on which any of the - selected pods is running. - Empty topologyKey is not allowed. - type: string - required: - - topologyKey - type: object - weight: - description: |- - weight associated with matching the corresponding podAffinityTerm, - in the range 1-100. - format: int32 - type: integer - required: - - podAffinityTerm - - weight - type: object - type: array - x-kubernetes-list-type: atomic - requiredDuringSchedulingIgnoredDuringExecution: - description: |- - If the anti-affinity requirements specified by this field are not met at - scheduling time, the pod will not be scheduled onto the node. - If the anti-affinity requirements specified by this field cease to be met - at some point during pod execution (e.g. due to a pod label update), the - system may or may not try to eventually evict the pod from its node. - items: - description: |- - Defines a set of pods (namely those matching the labelSelector - relative to the given namespace(s)) that this pod should be - co-located (affinity) or not co-located (anti-affinity) with, - where co-located is defined as running on a node whose value of - the label with key matches that of any node on which - a pod of the set of pods is running - properties: - labelSelector: - description: |- - A label query over a set of resources, in this case pods. - If it's null, this PodAffinityTerm matches with no Pods. - properties: - matchExpressions: - description: matchExpressions is a - list of label selector requirements. - The requirements are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the label - key that the selector applies - to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - matchLabelKeys: - description: |- - MatchLabelKeys is a set of pod label keys to select which pods will - be taken into consideration. The keys are used to lookup values from the - incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` - to select the group of existing pods which pods will be taken into consideration - for the incoming pod's pod (anti) affinity. - items: - type: string - type: array - x-kubernetes-list-type: atomic - mismatchLabelKeys: - description: |- - MismatchLabelKeys is a set of pod label keys to select which pods will - be taken into consideration. The keys are used to lookup values from the - incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` - to select the group of existing pods which pods will be taken into consideration - for the incoming pod's pod (anti) affinity. - items: - type: string - type: array - x-kubernetes-list-type: atomic - namespaceSelector: - description: |- - A label query over the set of namespaces that the term applies to. - The term is applied to the union of the namespaces selected by this field - and the ones listed in the namespaces field. - null selector and null or empty namespaces list means "this pod's namespace". - An empty selector ({}) matches all namespaces. - properties: - matchExpressions: - description: matchExpressions is a - list of label selector requirements. - The requirements are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the label - key that the selector applies - to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - namespaces: - description: |- - namespaces specifies a static list of namespace names that the term applies to. - The term is applied to the union of the namespaces listed in this field - and the ones selected by namespaceSelector. - null or empty namespaces list and null namespaceSelector means "this pod's namespace". - items: - type: string - type: array - x-kubernetes-list-type: atomic - topologyKey: - description: |- - This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching - the labelSelector in the specified namespaces, where co-located is defined as running on a node - whose value of the label with key topologyKey matches that of any node on which any of the - selected pods is running. - Empty topologyKey is not allowed. - type: string - required: - - topologyKey - type: object - type: array - x-kubernetes-list-type: atomic - type: object - type: object - automountServiceAccountToken: - description: AutomountServiceAccountToken indicates - whether a service account token should be automatically - mounted. - type: boolean - containers: - description: |- - List of containers belonging to the pod. - Containers cannot currently be added or removed. - There must be at least one container in a Pod. - Cannot be updated. - items: - description: A single application container that you - want to run within a pod. - properties: - args: - description: |- - Arguments to the entrypoint. - The container image's CMD is used if this is not provided. - Variable references $(VAR_NAME) are expanded using the container's environment. If a variable - cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced - to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. - items: - type: string - type: array - x-kubernetes-list-type: atomic - command: - description: |- - Entrypoint array. Not executed within a shell. - The container image's ENTRYPOINT is used if this is not provided. - Variable references $(VAR_NAME) are expanded using the container's environment. If a variable - cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced - to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. - items: - type: string - type: array - x-kubernetes-list-type: atomic - env: - description: |- - List of environment variables to set in the container. - Cannot be updated. - items: - description: EnvVar represents an environment - variable present in a Container. - properties: - name: - description: Name of the environment variable. - Must be a C_IDENTIFIER. - type: string - value: - description: |- - Variable references $(VAR_NAME) are expanded - using the previously defined environment variables in the container and - any service environment variables. If a variable cannot be resolved, - the reference in the input string will be unchanged. Double $$ are reduced - to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. - "$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)". - type: string - valueFrom: - description: Source for the environment - variable's value. Cannot be used if value - is not empty. - properties: - configMapKeyRef: - description: Selects a key of a ConfigMap. - properties: - key: - description: The key to select. - type: string - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the - ConfigMap or its key must be defined - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - fieldRef: - description: |- - Selects a field of the pod: supports metadata.name, metadata.namespace, `metadata.labels['']`, `metadata.annotations['']`, - spec.nodeName, spec.serviceAccountName, status.hostIP, status.podIP, status.podIPs. - properties: - apiVersion: - description: Version of the schema - the FieldPath is written in terms - of, defaults to "v1". - type: string - fieldPath: - description: Path of the field to - select in the specified API version. - type: string - required: - - fieldPath - type: object - x-kubernetes-map-type: atomic - resourceFieldRef: - description: |- - Selects a resource of the container: only resources limits and requests - (limits.cpu, limits.memory, limits.ephemeral-storage, requests.cpu, requests.memory and requests.ephemeral-storage) are currently supported. - properties: - containerName: - description: 'Container name: required - for volumes, optional for env - vars' - type: string - divisor: - anyOf: - - type: integer - - type: string - description: Specifies the output - format of the exposed resources, - defaults to "1" - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - resource: - description: 'Required: resource - to select' - type: string - required: - - resource - type: object - x-kubernetes-map-type: atomic - secretKeyRef: - description: Selects a key of a secret - in the pod's namespace - properties: - key: - description: The key of the secret - to select from. Must be a valid - secret key. - type: string - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the - Secret or its key must be defined - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - type: object - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - envFrom: - description: |- - List of sources to populate environment variables in the container. - The keys defined within a source must be a C_IDENTIFIER. All invalid keys - will be reported as an event when the container is starting. When a key exists in multiple - sources, the value associated with the last source will take precedence. - Values defined by an Env with a duplicate key will take precedence. - Cannot be updated. - items: - description: EnvFromSource represents the source - of a set of ConfigMaps - properties: - configMapRef: - description: The ConfigMap to select from - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the ConfigMap - must be defined - type: boolean - type: object - x-kubernetes-map-type: atomic - prefix: - description: An optional identifier to prepend - to each key in the ConfigMap. Must be - a C_IDENTIFIER. - type: string - secretRef: - description: The Secret to select from - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the Secret - must be defined - type: boolean - type: object - x-kubernetes-map-type: atomic - type: object - type: array - x-kubernetes-list-type: atomic - image: - description: |- - Container image name. - More info: https://kubernetes.io/docs/concepts/containers/images - This field is optional to allow higher level config management to default or override - container images in workload controllers like Deployments and StatefulSets. - type: string - imagePullPolicy: - description: |- - Image pull policy. - One of Always, Never, IfNotPresent. - Defaults to Always if :latest tag is specified, or IfNotPresent otherwise. - Cannot be updated. - More info: https://kubernetes.io/docs/concepts/containers/images#updating-images - type: string - lifecycle: - description: |- - Actions that the management system should take in response to container lifecycle events. - Cannot be updated. - properties: - postStart: - description: |- - PostStart is called immediately after a container is created. If the handler fails, - the container is terminated and restarted according to its restart policy. - Other management of the container blocks until the hook completes. - More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks - properties: - exec: - description: Exec specifies the action - to take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - httpGet: - description: HTTPGet specifies the http - request to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set - in the request. HTTP allows repeated - headers. - items: - description: HTTPHeader describes - a custom header to be used in - HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field - value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the - HTTP server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - sleep: - description: Sleep represents the duration - that the container should sleep before - being terminated. - properties: - seconds: - description: Seconds is the number - of seconds to sleep. - format: int64 - type: integer - required: - - seconds - type: object - tcpSocket: - description: |- - Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept - for the backward compatibility. There are no validation of this field and - lifecycle hooks will fail in runtime when tcp handler is specified. - properties: - host: - description: 'Optional: Host name - to connect to, defaults to the pod - IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - type: object - preStop: - description: |- - PreStop is called immediately before a container is terminated due to an - API request or management event such as liveness/startup probe failure, - preemption, resource contention, etc. The handler is not called if the - container crashes or exits. The Pod's termination grace period countdown begins before the - PreStop hook is executed. - properties: - exec: - description: Exec specifies the action - to take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - httpGet: - description: HTTPGet specifies the http - request to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set - in the request. HTTP allows repeated - headers. - items: - description: HTTPHeader describes - a custom header to be used in - HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field - value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the - HTTP server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - sleep: - description: Sleep represents the duration - that the container should sleep before - being terminated. - properties: - seconds: - description: Seconds is the number - of seconds to sleep. - format: int64 - type: integer - required: - - seconds - type: object - tcpSocket: - description: |- - Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept - for the backward compatibility. There are no validation of this field and - lifecycle hooks will fail in runtime when tcp handler is specified. - properties: - host: - description: 'Optional: Host name - to connect to, defaults to the pod - IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - type: object - type: object - livenessProbe: - description: |- - Periodic probe of container liveness. - Container will be restarted if the probe fails. - Cannot be updated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - properties: - exec: - description: Exec specifies the action to - take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - failureThreshold: - description: |- - Minimum consecutive failures for the probe to be considered failed after having succeeded. - Defaults to 3. Minimum value is 1. - format: int32 - type: integer - grpc: - description: GRPC specifies an action involving - a GRPC port. - properties: - port: - description: Port number of the gRPC service. - Number must be in the range 1 to 65535. - format: int32 - type: integer - service: - default: "" - description: |- - Service is the name of the service to place in the gRPC HealthCheckRequest - (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - - If this is not specified, the default behavior is defined by gRPC. - type: string - required: - - port - type: object - httpGet: - description: HTTPGet specifies the http request - to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set in - the request. HTTP allows repeated headers. - items: - description: HTTPHeader describes a - custom header to be used in HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the HTTP - server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - initialDelaySeconds: - description: |- - Number of seconds after the container has started before liveness probes are initiated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - periodSeconds: - description: |- - How often (in seconds) to perform the probe. - Default to 10 seconds. Minimum value is 1. - format: int32 - type: integer - successThreshold: - description: |- - Minimum consecutive successes for the probe to be considered successful after having failed. - Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. - format: int32 - type: integer - tcpSocket: - description: TCPSocket specifies an action - involving a TCP port. - properties: - host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - description: |- - Optional duration in seconds the pod needs to terminate gracefully upon probe failure. - The grace period is the duration in seconds after the processes running in the pod are sent - a termination signal and the time when the processes are forcibly halted with a kill signal. - Set this value longer than the expected cleanup time for your process. - format: int64 - type: integer - timeoutSeconds: - description: |- - Number of seconds after which the probe times out. - Defaults to 1 second. Minimum value is 1. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - type: object - name: - description: |- - Name of the container specified as a DNS_LABEL. - Each container in a pod must have a unique name (DNS_LABEL). - Cannot be updated. - type: string - ports: - description: |- - List of ports to expose from the container. Not specifying a port here - DOES NOT prevent that port from being exposed. Any port which is - listening on the default "0.0.0.0" address inside a container will be - accessible from the network. - Modifying this array with strategic merge patch may corrupt the data. - For more information See https://github.com/kubernetes/kubernetes/issues/108255. - items: - description: ContainerPort represents a network - port in a single container. - properties: - containerPort: - description: |- - Number of port to expose on the pod's IP address. - This must be a valid port number, 0 < x < 65536. - format: int32 - type: integer - hostIP: - description: What host IP to bind the external - port to. - type: string - hostPort: - description: |- - Number of port to expose on the host. - If specified, this must be a valid port number, 0 < x < 65536. - If HostNetwork is specified, this must match ContainerPort. - Most containers do not need this. - format: int32 - type: integer - name: - description: |- - If specified, this must be an IANA_SVC_NAME and unique within the pod. Each - named port in a pod must have a unique name. Name for the port that can be - referred to by services. - type: string - protocol: - default: TCP - description: |- - Protocol for port. Must be UDP, TCP, or SCTP. - Defaults to "TCP". - type: string - required: - - containerPort - type: object - type: array - x-kubernetes-list-map-keys: - - containerPort - - protocol - x-kubernetes-list-type: map - readinessProbe: - description: |- - Periodic probe of container service readiness. - Container will be removed from service endpoints if the probe fails. - Cannot be updated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - properties: - exec: - description: Exec specifies the action to - take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - failureThreshold: - description: |- - Minimum consecutive failures for the probe to be considered failed after having succeeded. - Defaults to 3. Minimum value is 1. - format: int32 - type: integer - grpc: - description: GRPC specifies an action involving - a GRPC port. - properties: - port: - description: Port number of the gRPC service. - Number must be in the range 1 to 65535. - format: int32 - type: integer - service: - default: "" - description: |- - Service is the name of the service to place in the gRPC HealthCheckRequest - (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - - If this is not specified, the default behavior is defined by gRPC. - type: string - required: - - port - type: object - httpGet: - description: HTTPGet specifies the http request - to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set in - the request. HTTP allows repeated headers. - items: - description: HTTPHeader describes a - custom header to be used in HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the HTTP - server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - initialDelaySeconds: - description: |- - Number of seconds after the container has started before liveness probes are initiated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - periodSeconds: - description: |- - How often (in seconds) to perform the probe. - Default to 10 seconds. Minimum value is 1. - format: int32 - type: integer - successThreshold: - description: |- - Minimum consecutive successes for the probe to be considered successful after having failed. - Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. - format: int32 - type: integer - tcpSocket: - description: TCPSocket specifies an action - involving a TCP port. - properties: - host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - description: |- - Optional duration in seconds the pod needs to terminate gracefully upon probe failure. - The grace period is the duration in seconds after the processes running in the pod are sent - a termination signal and the time when the processes are forcibly halted with a kill signal. - Set this value longer than the expected cleanup time for your process. - format: int64 - type: integer - timeoutSeconds: - description: |- - Number of seconds after which the probe times out. - Defaults to 1 second. Minimum value is 1. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - type: object - resizePolicy: - description: Resources resize policy for the container. - items: - description: ContainerResizePolicy represents - resource resize policy for the container. - properties: - resourceName: - description: |- - Name of the resource to which this resource resize policy applies. - Supported values: cpu, memory. - type: string - restartPolicy: - description: |- - Restart policy to apply when specified resource is resized. - If not specified, it defaults to NotRequired. - type: string - required: - - resourceName - - restartPolicy - type: object - type: array - x-kubernetes-list-type: atomic - resources: - description: |- - Compute Resources required by this container. - Cannot be updated. - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - properties: - claims: - description: |- - Claims lists the names of resources, defined in spec.resourceClaims, - that are used by this container. - - This is an alpha field and requires enabling the - DynamicResourceAllocation feature gate. - - This field is immutable. It can only be set for containers. - items: - description: ResourceClaim references one - entry in PodSpec.ResourceClaims. - properties: - name: - description: |- - Name must match the name of one entry in pod.spec.resourceClaims of - the Pod where this field is used. It makes that resource available - inside a container. - type: string - request: - description: |- - Request is the name chosen for a request in the referenced claim. - If empty, everything from the claim is made available, otherwise - only the result of this request. - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - limits: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: |- - Limits describes the maximum amount of compute resources allowed. - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - type: object - requests: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: |- - Requests describes the minimum amount of compute resources required. - If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, - otherwise to an implementation-defined value. Requests cannot exceed Limits. - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - type: object - type: object - restartPolicy: - description: |- - RestartPolicy defines the restart behavior of individual containers in a pod. - This field may only be set for init containers, and the only allowed value is "Always". - For non-init containers or when this field is not specified, - the restart behavior is defined by the Pod's restart policy and the container type. - type: string - securityContext: - description: |- - SecurityContext defines the security options the container should be run with. - If set, the fields of SecurityContext override the equivalent fields of PodSecurityContext. - More info: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/ - properties: - allowPrivilegeEscalation: - description: |- - AllowPrivilegeEscalation controls whether a process can gain more - privileges than its parent process. This bool directly controls if - the no_new_privs flag will be set on the container process. - AllowPrivilegeEscalation is true always when the container is: - 1) run as Privileged - 2) has CAP_SYS_ADMIN - Note that this field cannot be set when spec.os.name is windows. - type: boolean - appArmorProfile: - description: |- - appArmorProfile is the AppArmor options to use by this container. If set, this profile - overrides the pod's appArmorProfile. - Note that this field cannot be set when spec.os.name is windows. - properties: - localhostProfile: - description: |- - localhostProfile indicates a profile loaded on the node that should be used. - The profile must be preconfigured on the node to work. - Must match the loaded name of the profile. - Must be set if and only if type is "Localhost". - type: string - type: - description: |- - type indicates which kind of AppArmor profile will be applied. - Valid options are: - Localhost - a profile pre-loaded on the node. - RuntimeDefault - the container runtime's default profile. - Unconfined - no AppArmor enforcement. - type: string - required: - - type - type: object - capabilities: - description: |- - The capabilities to add/drop when running containers. - Defaults to the default set of capabilities granted by the container runtime. - Note that this field cannot be set when spec.os.name is windows. - properties: - add: - description: Added capabilities - items: - description: Capability represent POSIX - capabilities type - type: string - type: array - x-kubernetes-list-type: atomic - drop: - description: Removed capabilities - items: - description: Capability represent POSIX - capabilities type - type: string - type: array - x-kubernetes-list-type: atomic - type: object - privileged: - description: |- - Run container in privileged mode. - Processes in privileged containers are essentially equivalent to root on the host. - Defaults to false. - Note that this field cannot be set when spec.os.name is windows. - type: boolean - procMount: - description: |- - procMount denotes the type of proc mount to use for the containers. - The default value is Default which uses the container runtime defaults for - readonly paths and masked paths. - This requires the ProcMountType feature flag to be enabled. - Note that this field cannot be set when spec.os.name is windows. - type: string - readOnlyRootFilesystem: - description: |- - Whether this container has a read-only root filesystem. - Default is false. - Note that this field cannot be set when spec.os.name is windows. - type: boolean - runAsGroup: - description: |- - The GID to run the entrypoint of the container process. - Uses runtime default if unset. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is windows. - format: int64 - type: integer - runAsNonRoot: - description: |- - Indicates that the container must run as a non-root user. - If true, the Kubelet will validate the image at runtime to ensure that it - does not run as UID 0 (root) and fail to start the container if it does. - If unset or false, no such validation will be performed. - May also be set in PodSecurityContext. - type: boolean - runAsUser: - description: |- - The UID to run the entrypoint of the container process. - Defaults to user specified in image metadata if unspecified. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is windows. - format: int64 - type: integer - seLinuxOptions: - description: |- - The SELinux context to be applied to the container. - If unspecified, the container runtime will allocate a random SELinux context for each - container. May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is windows. - properties: - level: - description: Level is SELinux level label - that applies to the container. - type: string - role: - description: Role is a SELinux role label - that applies to the container. - type: string - type: - description: Type is a SELinux type label - that applies to the container. - type: string - user: - description: User is a SELinux user label - that applies to the container. - type: string - type: object - seccompProfile: - description: |- - The seccomp options to use by this container. If seccomp options are - provided at both the pod & container level, the container options - override the pod options. - Note that this field cannot be set when spec.os.name is windows. - properties: - localhostProfile: - description: |- - localhostProfile indicates a profile defined in a file on the node should be used. - The profile must be preconfigured on the node to work. - Must be a descending path, relative to the kubelet's configured seccomp profile location. - Must be set if type is "Localhost". Must NOT be set for any other type. - type: string - type: - description: |- - type indicates which kind of seccomp profile will be applied. - Valid options are: - - Localhost - a profile defined in a file on the node should be used. - RuntimeDefault - the container runtime default profile should be used. - Unconfined - no profile should be applied. - type: string - required: - - type - type: object - windowsOptions: - description: |- - The Windows specific settings applied to all containers. - If unspecified, the options from the PodSecurityContext will be used. - If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is linux. - properties: - gmsaCredentialSpec: - description: |- - GMSACredentialSpec is where the GMSA admission webhook - (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the - GMSA credential spec named by the GMSACredentialSpecName field. - type: string - gmsaCredentialSpecName: - description: GMSACredentialSpecName is - the name of the GMSA credential spec - to use. - type: string - hostProcess: - description: |- - HostProcess determines if a container should be run as a 'Host Process' container. - All of a Pod's containers must have the same effective HostProcess value - (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers). - In addition, if HostProcess is true then HostNetwork must also be set to true. - type: boolean - runAsUserName: - description: |- - The UserName in Windows to run the entrypoint of the container process. - Defaults to the user specified in image metadata if unspecified. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - type: string - type: object - type: object - startupProbe: - description: |- - StartupProbe indicates that the Pod has successfully initialized. - If specified, no other probes are executed until this completes successfully. - If this probe fails, the Pod will be restarted, just as if the livenessProbe failed. - properties: - exec: - description: Exec specifies the action to - take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - failureThreshold: - description: |- - Minimum consecutive failures for the probe to be considered failed after having succeeded. - Defaults to 3. Minimum value is 1. - format: int32 - type: integer - grpc: - description: GRPC specifies an action involving - a GRPC port. - properties: - port: - description: Port number of the gRPC service. - Number must be in the range 1 to 65535. - format: int32 - type: integer - service: - default: "" - description: |- - Service is the name of the service to place in the gRPC HealthCheckRequest - (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - - If this is not specified, the default behavior is defined by gRPC. - type: string - required: - - port - type: object - httpGet: - description: HTTPGet specifies the http request - to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set in - the request. HTTP allows repeated headers. - items: - description: HTTPHeader describes a - custom header to be used in HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the HTTP - server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - initialDelaySeconds: - description: |- - Number of seconds after the container has started before liveness probes are initiated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - periodSeconds: - description: |- - How often (in seconds) to perform the probe. - Default to 10 seconds. Minimum value is 1. - format: int32 - type: integer - successThreshold: - description: |- - Minimum consecutive successes for the probe to be considered successful after having failed. - Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. - format: int32 - type: integer - tcpSocket: - description: TCPSocket specifies an action - involving a TCP port. - properties: - host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - description: |- - Optional duration in seconds the pod needs to terminate gracefully upon probe failure. - The grace period is the duration in seconds after the processes running in the pod are sent - a termination signal and the time when the processes are forcibly halted with a kill signal. - Set this value longer than the expected cleanup time for your process. - format: int64 - type: integer - timeoutSeconds: - description: |- - Number of seconds after which the probe times out. - Defaults to 1 second. Minimum value is 1. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - type: object - stdin: - description: |- - Whether this container should allocate a buffer for stdin in the container runtime. If this - is not set, reads from stdin in the container will always result in EOF. - Default is false. - type: boolean - stdinOnce: - description: |- - Whether the container runtime should close the stdin channel after it has been opened by - a single attach. When stdin is true the stdin stream will remain open across multiple attach - sessions. - type: boolean - terminationMessagePath: - description: |- - Optional: Path at which the file to which the container's termination message - will be written is mounted into the container's filesystem. - Message written is intended to be brief final status, such as an assertion failure message. - Will be truncated by the node if greater than 4096 bytes. The total message length across - all containers will be limited to 12kb. - Defaults to /dev/termination-log. - type: string - terminationMessagePolicy: - description: |- - Indicate how the termination message should be populated. File will use the contents of - terminationMessagePath to populate the container status message on both success and failure. - FallbackToLogsOnError will use the last chunk of container log output if the termination - message file is empty and the container exited with an error. - type: string - tty: - description: |- - Whether this container should allocate a TTY for itself, also requires 'stdin' to be true. - Default is false. - type: boolean - volumeDevices: - description: volumeDevices is the list of block - devices to be used by the container. - items: - description: volumeDevice describes a mapping - of a raw block device within a container. - properties: - devicePath: - description: devicePath is the path inside - of the container that the device will - be mapped to. - type: string - name: - description: name must match the name of - a persistentVolumeClaim in the pod - type: string - required: - - devicePath - - name - type: object - type: array - x-kubernetes-list-map-keys: - - devicePath - x-kubernetes-list-type: map - volumeMounts: - description: |- - Pod volumes to mount into the container's filesystem. - Cannot be updated. - items: - description: VolumeMount describes a mounting - of a Volume within a container. - properties: - mountPath: - description: |- - Path within the container at which the volume should be mounted. Must - not contain ':'. - type: string - mountPropagation: - description: |- - mountPropagation determines how mounts are propagated from the host - to container and the other way around. - When not set, MountPropagationNone is used. - This field is beta in 1.10. - When RecursiveReadOnly is set to IfPossible or to Enabled, MountPropagation must be None or unspecified - (which defaults to None). - type: string - name: - description: This must match the Name of - a Volume. - type: string - readOnly: - description: |- - Mounted read-only if true, read-write otherwise (false or unspecified). - Defaults to false. - type: boolean - recursiveReadOnly: - description: |- - RecursiveReadOnly specifies whether read-only mounts should be handled - recursively. - - If ReadOnly is false, this field has no meaning and must be unspecified. - - If ReadOnly is true, and this field is set to Disabled, the mount is not made - recursively read-only. If this field is set to IfPossible, the mount is made - recursively read-only, if it is supported by the container runtime. - type: string - subPath: - description: |- - Path within the volume from which the container's volume should be mounted. - Defaults to "" (volume's root). - type: string - subPathExpr: - description: |- - Expanded path within the volume from which the container's volume should be mounted. - Behaves similarly to SubPath but environment variable references $(VAR_NAME) are expanded using the container's environment. - Defaults to "" (volume's root). - SubPathExpr and SubPath are mutually exclusive. - type: string - required: - - mountPath - - name - type: object - type: array - x-kubernetes-list-map-keys: - - mountPath - x-kubernetes-list-type: map - workingDir: - description: |- - Container's working directory. - If not specified, the container runtime's default will be used, which - might be configured in the container image. - Cannot be updated. - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - dnsConfig: - description: |- - Specifies the DNS parameters of a pod. - Parameters specified here will be merged to the generated DNS - configuration based on DNSPolicy. - properties: - nameservers: - description: |- - A list of DNS name server IP addresses. - This will be appended to the base nameservers generated from DNSPolicy. - Duplicated nameservers will be removed. - items: - type: string - type: array - x-kubernetes-list-type: atomic - options: - description: |- - A list of DNS resolver options. - This will be merged with the base options generated from DNSPolicy. - Duplicated entries will be removed. Resolution options given in Options - will override those that appear in the base DNSPolicy. - items: - description: PodDNSConfigOption defines DNS resolver - options of a pod. - properties: - name: - description: Required. - type: string - value: - type: string - type: object - type: array - x-kubernetes-list-type: atomic - searches: - description: |- - A list of DNS search domains for host-name lookup. - This will be appended to the base search paths generated from DNSPolicy. - Duplicated search paths will be removed. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - dnsPolicy: - description: |- - Set DNS policy for the pod. - Defaults to "ClusterFirst". - Valid values are 'ClusterFirstWithHostNet', 'ClusterFirst', 'Default' or 'None'. - DNS parameters given in DNSConfig will be merged with the policy selected with DNSPolicy. - To have DNS options set along with hostNetwork, you have to specify DNS policy - explicitly to 'ClusterFirstWithHostNet'. - type: string - enableServiceLinks: - description: |- - EnableServiceLinks indicates whether information about services should be injected into pod's - environment variables, matching the syntax of Docker links. - Optional: Defaults to true. - type: boolean - ephemeralContainers: - description: |- - List of ephemeral containers run in this pod. Ephemeral containers may be run in an existing - pod to perform user-initiated actions such as debugging. This list cannot be specified when - creating a pod, and it cannot be modified by updating the pod spec. In order to add an - ephemeral container to an existing pod, use the pod's ephemeralcontainers subresource. - items: - description: |- - An EphemeralContainer is a temporary container that you may add to an existing Pod for - user-initiated activities such as debugging. Ephemeral containers have no resource or - scheduling guarantees, and they will not be restarted when they exit or when a Pod is - removed or restarted. The kubelet may evict a Pod if an ephemeral container causes the - Pod to exceed its resource allocation. - properties: - args: - description: |- - Arguments to the entrypoint. - The image's CMD is used if this is not provided. - Variable references $(VAR_NAME) are expanded using the container's environment. If a variable - cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced - to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. "$$(VAR_NAME)" will - produce the string literal "$(VAR_NAME)". - items: - type: string - type: array - x-kubernetes-list-type: atomic - command: - description: |- - Entrypoint array. Not executed within a shell. - The image's ENTRYPOINT is used if this is not provided. - Variable references $(VAR_NAME) are expanded using the container's environment. If a variable - cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced - to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. - items: - type: string - type: array - x-kubernetes-list-type: atomic - env: - description: |- - List of environment variables to set in the container. - Cannot be updated. - items: - description: EnvVar represents an environment - variable present in a Container. - properties: - name: - description: Name of the environment variable. - Must be a C_IDENTIFIER. - type: string - value: - description: |- - Variable references $(VAR_NAME) are expanded - using the previously defined environment variables in the container and - any service environment variables. If a variable cannot be resolved, - the reference in the input string will be unchanged. Double $$ are reduced - to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. - "$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)". - type: string - valueFrom: - description: Source for the environment - variable's value. Cannot be used if value - is not empty. - properties: - configMapKeyRef: - description: Selects a key of a ConfigMap. - properties: - key: - description: The key to select. - type: string - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the - ConfigMap or its key must be defined - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - fieldRef: - description: |- - Selects a field of the pod: supports metadata.name, metadata.namespace, `metadata.labels['']`, `metadata.annotations['']`, - spec.nodeName, spec.serviceAccountName, status.hostIP, status.podIP, status.podIPs. - properties: - apiVersion: - description: Version of the schema - the FieldPath is written in terms - of, defaults to "v1". - type: string - fieldPath: - description: Path of the field to - select in the specified API version. - type: string - required: - - fieldPath - type: object - x-kubernetes-map-type: atomic - resourceFieldRef: - description: |- - Selects a resource of the container: only resources limits and requests - (limits.cpu, limits.memory, limits.ephemeral-storage, requests.cpu, requests.memory and requests.ephemeral-storage) are currently supported. - properties: - containerName: - description: 'Container name: required - for volumes, optional for env - vars' - type: string - divisor: - anyOf: - - type: integer - - type: string - description: Specifies the output - format of the exposed resources, - defaults to "1" - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - resource: - description: 'Required: resource - to select' - type: string - required: - - resource - type: object - x-kubernetes-map-type: atomic - secretKeyRef: - description: Selects a key of a secret - in the pod's namespace - properties: - key: - description: The key of the secret - to select from. Must be a valid - secret key. - type: string - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the - Secret or its key must be defined - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - type: object - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - envFrom: - description: |- - List of sources to populate environment variables in the container. - The keys defined within a source must be a C_IDENTIFIER. All invalid keys - will be reported as an event when the container is starting. When a key exists in multiple - sources, the value associated with the last source will take precedence. - Values defined by an Env with a duplicate key will take precedence. - Cannot be updated. - items: - description: EnvFromSource represents the source - of a set of ConfigMaps - properties: - configMapRef: - description: The ConfigMap to select from - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the ConfigMap - must be defined - type: boolean - type: object - x-kubernetes-map-type: atomic - prefix: - description: An optional identifier to prepend - to each key in the ConfigMap. Must be - a C_IDENTIFIER. - type: string - secretRef: - description: The Secret to select from - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the Secret - must be defined - type: boolean - type: object - x-kubernetes-map-type: atomic - type: object - type: array - x-kubernetes-list-type: atomic - image: - description: |- - Container image name. - More info: https://kubernetes.io/docs/concepts/containers/images - type: string - imagePullPolicy: - description: |- - Image pull policy. - One of Always, Never, IfNotPresent. - Defaults to Always if :latest tag is specified, or IfNotPresent otherwise. - Cannot be updated. - More info: https://kubernetes.io/docs/concepts/containers/images#updating-images - type: string - lifecycle: - description: Lifecycle is not allowed for ephemeral - containers. - properties: - postStart: - description: |- - PostStart is called immediately after a container is created. If the handler fails, - the container is terminated and restarted according to its restart policy. - Other management of the container blocks until the hook completes. - More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks - properties: - exec: - description: Exec specifies the action - to take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - httpGet: - description: HTTPGet specifies the http - request to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set - in the request. HTTP allows repeated - headers. - items: - description: HTTPHeader describes - a custom header to be used in - HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field - value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the - HTTP server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - sleep: - description: Sleep represents the duration - that the container should sleep before - being terminated. - properties: - seconds: - description: Seconds is the number - of seconds to sleep. - format: int64 - type: integer - required: - - seconds - type: object - tcpSocket: - description: |- - Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept - for the backward compatibility. There are no validation of this field and - lifecycle hooks will fail in runtime when tcp handler is specified. - properties: - host: - description: 'Optional: Host name - to connect to, defaults to the pod - IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - type: object - preStop: - description: |- - PreStop is called immediately before a container is terminated due to an - API request or management event such as liveness/startup probe failure, - preemption, resource contention, etc. The handler is not called if the - container crashes or exits. The Pod's termination grace period countdown begins before the - PreStop hook is executed. - properties: - exec: - description: Exec specifies the action - to take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - httpGet: - description: HTTPGet specifies the http - request to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set - in the request. HTTP allows repeated - headers. - items: - description: HTTPHeader describes - a custom header to be used in - HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field - value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the - HTTP server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - sleep: - description: Sleep represents the duration - that the container should sleep before - being terminated. - properties: - seconds: - description: Seconds is the number - of seconds to sleep. - format: int64 - type: integer - required: - - seconds - type: object - tcpSocket: - description: |- - Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept - for the backward compatibility. There are no validation of this field and - lifecycle hooks will fail in runtime when tcp handler is specified. - properties: - host: - description: 'Optional: Host name - to connect to, defaults to the pod - IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - type: object - type: object - livenessProbe: - description: Probes are not allowed for ephemeral - containers. - properties: - exec: - description: Exec specifies the action to - take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - failureThreshold: - description: |- - Minimum consecutive failures for the probe to be considered failed after having succeeded. - Defaults to 3. Minimum value is 1. - format: int32 - type: integer - grpc: - description: GRPC specifies an action involving - a GRPC port. - properties: - port: - description: Port number of the gRPC service. - Number must be in the range 1 to 65535. - format: int32 - type: integer - service: - default: "" - description: |- - Service is the name of the service to place in the gRPC HealthCheckRequest - (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - - If this is not specified, the default behavior is defined by gRPC. - type: string - required: - - port - type: object - httpGet: - description: HTTPGet specifies the http request - to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set in - the request. HTTP allows repeated headers. - items: - description: HTTPHeader describes a - custom header to be used in HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the HTTP - server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - initialDelaySeconds: - description: |- - Number of seconds after the container has started before liveness probes are initiated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - periodSeconds: - description: |- - How often (in seconds) to perform the probe. - Default to 10 seconds. Minimum value is 1. - format: int32 - type: integer - successThreshold: - description: |- - Minimum consecutive successes for the probe to be considered successful after having failed. - Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. - format: int32 - type: integer - tcpSocket: - description: TCPSocket specifies an action - involving a TCP port. - properties: - host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - description: |- - Optional duration in seconds the pod needs to terminate gracefully upon probe failure. - The grace period is the duration in seconds after the processes running in the pod are sent - a termination signal and the time when the processes are forcibly halted with a kill signal. - Set this value longer than the expected cleanup time for your process. - format: int64 - type: integer - timeoutSeconds: - description: |- - Number of seconds after which the probe times out. - Defaults to 1 second. Minimum value is 1. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - type: object - name: - description: |- - Name of the ephemeral container specified as a DNS_LABEL. - This name must be unique among all containers, init containers and ephemeral containers. - type: string - ports: - description: Ports are not allowed for ephemeral - containers. - items: - description: ContainerPort represents a network - port in a single container. - properties: - containerPort: - description: |- - Number of port to expose on the pod's IP address. - This must be a valid port number, 0 < x < 65536. - format: int32 - type: integer - hostIP: - description: What host IP to bind the external - port to. - type: string - hostPort: - description: |- - Number of port to expose on the host. - If specified, this must be a valid port number, 0 < x < 65536. - If HostNetwork is specified, this must match ContainerPort. - Most containers do not need this. - format: int32 - type: integer - name: - description: |- - If specified, this must be an IANA_SVC_NAME and unique within the pod. Each - named port in a pod must have a unique name. Name for the port that can be - referred to by services. - type: string - protocol: - default: TCP - description: |- - Protocol for port. Must be UDP, TCP, or SCTP. - Defaults to "TCP". - type: string - required: - - containerPort - type: object - type: array - x-kubernetes-list-map-keys: - - containerPort - - protocol - x-kubernetes-list-type: map - readinessProbe: - description: Probes are not allowed for ephemeral - containers. - properties: - exec: - description: Exec specifies the action to - take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - failureThreshold: - description: |- - Minimum consecutive failures for the probe to be considered failed after having succeeded. - Defaults to 3. Minimum value is 1. - format: int32 - type: integer - grpc: - description: GRPC specifies an action involving - a GRPC port. - properties: - port: - description: Port number of the gRPC service. - Number must be in the range 1 to 65535. - format: int32 - type: integer - service: - default: "" - description: |- - Service is the name of the service to place in the gRPC HealthCheckRequest - (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - - If this is not specified, the default behavior is defined by gRPC. - type: string - required: - - port - type: object - httpGet: - description: HTTPGet specifies the http request - to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set in - the request. HTTP allows repeated headers. - items: - description: HTTPHeader describes a - custom header to be used in HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the HTTP - server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - initialDelaySeconds: - description: |- - Number of seconds after the container has started before liveness probes are initiated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - periodSeconds: - description: |- - How often (in seconds) to perform the probe. - Default to 10 seconds. Minimum value is 1. - format: int32 - type: integer - successThreshold: - description: |- - Minimum consecutive successes for the probe to be considered successful after having failed. - Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. - format: int32 - type: integer - tcpSocket: - description: TCPSocket specifies an action - involving a TCP port. - properties: - host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - description: |- - Optional duration in seconds the pod needs to terminate gracefully upon probe failure. - The grace period is the duration in seconds after the processes running in the pod are sent - a termination signal and the time when the processes are forcibly halted with a kill signal. - Set this value longer than the expected cleanup time for your process. - format: int64 - type: integer - timeoutSeconds: - description: |- - Number of seconds after which the probe times out. - Defaults to 1 second. Minimum value is 1. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - type: object - resizePolicy: - description: Resources resize policy for the container. - items: - description: ContainerResizePolicy represents - resource resize policy for the container. - properties: - resourceName: - description: |- - Name of the resource to which this resource resize policy applies. - Supported values: cpu, memory. - type: string - restartPolicy: - description: |- - Restart policy to apply when specified resource is resized. - If not specified, it defaults to NotRequired. - type: string - required: - - resourceName - - restartPolicy - type: object - type: array - x-kubernetes-list-type: atomic - resources: - description: |- - Resources are not allowed for ephemeral containers. Ephemeral containers use spare resources - already allocated to the pod. - properties: - claims: - description: |- - Claims lists the names of resources, defined in spec.resourceClaims, - that are used by this container. - - This is an alpha field and requires enabling the - DynamicResourceAllocation feature gate. - - This field is immutable. It can only be set for containers. - items: - description: ResourceClaim references one - entry in PodSpec.ResourceClaims. - properties: - name: - description: |- - Name must match the name of one entry in pod.spec.resourceClaims of - the Pod where this field is used. It makes that resource available - inside a container. - type: string - request: - description: |- - Request is the name chosen for a request in the referenced claim. - If empty, everything from the claim is made available, otherwise - only the result of this request. - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - limits: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: |- - Limits describes the maximum amount of compute resources allowed. - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - type: object - requests: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: |- - Requests describes the minimum amount of compute resources required. - If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, - otherwise to an implementation-defined value. Requests cannot exceed Limits. - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - type: object - type: object - restartPolicy: - description: |- - Restart policy for the container to manage the restart behavior of each - container within a pod. - This may only be set for init containers. You cannot set this field on - ephemeral containers. - type: string - securityContext: - description: |- - Optional: SecurityContext defines the security options the ephemeral container should be run with. - If set, the fields of SecurityContext override the equivalent fields of PodSecurityContext. - properties: - allowPrivilegeEscalation: - description: |- - AllowPrivilegeEscalation controls whether a process can gain more - privileges than its parent process. This bool directly controls if - the no_new_privs flag will be set on the container process. - AllowPrivilegeEscalation is true always when the container is: - 1) run as Privileged - 2) has CAP_SYS_ADMIN - Note that this field cannot be set when spec.os.name is windows. - type: boolean - appArmorProfile: - description: |- - appArmorProfile is the AppArmor options to use by this container. If set, this profile - overrides the pod's appArmorProfile. - Note that this field cannot be set when spec.os.name is windows. - properties: - localhostProfile: - description: |- - localhostProfile indicates a profile loaded on the node that should be used. - The profile must be preconfigured on the node to work. - Must match the loaded name of the profile. - Must be set if and only if type is "Localhost". - type: string - type: - description: |- - type indicates which kind of AppArmor profile will be applied. - Valid options are: - Localhost - a profile pre-loaded on the node. - RuntimeDefault - the container runtime's default profile. - Unconfined - no AppArmor enforcement. - type: string - required: - - type - type: object - capabilities: - description: |- - The capabilities to add/drop when running containers. - Defaults to the default set of capabilities granted by the container runtime. - Note that this field cannot be set when spec.os.name is windows. - properties: - add: - description: Added capabilities - items: - description: Capability represent POSIX - capabilities type - type: string - type: array - x-kubernetes-list-type: atomic - drop: - description: Removed capabilities - items: - description: Capability represent POSIX - capabilities type - type: string - type: array - x-kubernetes-list-type: atomic - type: object - privileged: - description: |- - Run container in privileged mode. - Processes in privileged containers are essentially equivalent to root on the host. - Defaults to false. - Note that this field cannot be set when spec.os.name is windows. - type: boolean - procMount: - description: |- - procMount denotes the type of proc mount to use for the containers. - The default value is Default which uses the container runtime defaults for - readonly paths and masked paths. - This requires the ProcMountType feature flag to be enabled. - Note that this field cannot be set when spec.os.name is windows. - type: string - readOnlyRootFilesystem: - description: |- - Whether this container has a read-only root filesystem. - Default is false. - Note that this field cannot be set when spec.os.name is windows. - type: boolean - runAsGroup: - description: |- - The GID to run the entrypoint of the container process. - Uses runtime default if unset. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is windows. - format: int64 - type: integer - runAsNonRoot: - description: |- - Indicates that the container must run as a non-root user. - If true, the Kubelet will validate the image at runtime to ensure that it - does not run as UID 0 (root) and fail to start the container if it does. - If unset or false, no such validation will be performed. - May also be set in PodSecurityContext. - type: boolean - runAsUser: - description: |- - The UID to run the entrypoint of the container process. - Defaults to user specified in image metadata if unspecified. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is windows. - format: int64 - type: integer - seLinuxOptions: - description: |- - The SELinux context to be applied to the container. - If unspecified, the container runtime will allocate a random SELinux context for each - container. May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is windows. - properties: - level: - description: Level is SELinux level label - that applies to the container. - type: string - role: - description: Role is a SELinux role label - that applies to the container. - type: string - type: - description: Type is a SELinux type label - that applies to the container. - type: string - user: - description: User is a SELinux user label - that applies to the container. - type: string - type: object - seccompProfile: - description: |- - The seccomp options to use by this container. If seccomp options are - provided at both the pod & container level, the container options - override the pod options. - Note that this field cannot be set when spec.os.name is windows. - properties: - localhostProfile: - description: |- - localhostProfile indicates a profile defined in a file on the node should be used. - The profile must be preconfigured on the node to work. - Must be a descending path, relative to the kubelet's configured seccomp profile location. - Must be set if type is "Localhost". Must NOT be set for any other type. - type: string - type: - description: |- - type indicates which kind of seccomp profile will be applied. - Valid options are: - - Localhost - a profile defined in a file on the node should be used. - RuntimeDefault - the container runtime default profile should be used. - Unconfined - no profile should be applied. - type: string - required: - - type - type: object - windowsOptions: - description: |- - The Windows specific settings applied to all containers. - If unspecified, the options from the PodSecurityContext will be used. - If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is linux. - properties: - gmsaCredentialSpec: - description: |- - GMSACredentialSpec is where the GMSA admission webhook - (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the - GMSA credential spec named by the GMSACredentialSpecName field. - type: string - gmsaCredentialSpecName: - description: GMSACredentialSpecName is - the name of the GMSA credential spec - to use. - type: string - hostProcess: - description: |- - HostProcess determines if a container should be run as a 'Host Process' container. - All of a Pod's containers must have the same effective HostProcess value - (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers). - In addition, if HostProcess is true then HostNetwork must also be set to true. - type: boolean - runAsUserName: - description: |- - The UserName in Windows to run the entrypoint of the container process. - Defaults to the user specified in image metadata if unspecified. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - type: string - type: object - type: object - startupProbe: - description: Probes are not allowed for ephemeral - containers. - properties: - exec: - description: Exec specifies the action to - take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - failureThreshold: - description: |- - Minimum consecutive failures for the probe to be considered failed after having succeeded. - Defaults to 3. Minimum value is 1. - format: int32 - type: integer - grpc: - description: GRPC specifies an action involving - a GRPC port. - properties: - port: - description: Port number of the gRPC service. - Number must be in the range 1 to 65535. - format: int32 - type: integer - service: - default: "" - description: |- - Service is the name of the service to place in the gRPC HealthCheckRequest - (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - - If this is not specified, the default behavior is defined by gRPC. - type: string - required: - - port - type: object - httpGet: - description: HTTPGet specifies the http request - to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set in - the request. HTTP allows repeated headers. - items: - description: HTTPHeader describes a - custom header to be used in HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the HTTP - server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - initialDelaySeconds: - description: |- - Number of seconds after the container has started before liveness probes are initiated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - periodSeconds: - description: |- - How often (in seconds) to perform the probe. - Default to 10 seconds. Minimum value is 1. - format: int32 - type: integer - successThreshold: - description: |- - Minimum consecutive successes for the probe to be considered successful after having failed. - Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. - format: int32 - type: integer - tcpSocket: - description: TCPSocket specifies an action - involving a TCP port. - properties: - host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - description: |- - Optional duration in seconds the pod needs to terminate gracefully upon probe failure. - The grace period is the duration in seconds after the processes running in the pod are sent - a termination signal and the time when the processes are forcibly halted with a kill signal. - Set this value longer than the expected cleanup time for your process. - format: int64 - type: integer - timeoutSeconds: - description: |- - Number of seconds after which the probe times out. - Defaults to 1 second. Minimum value is 1. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - type: object - stdin: - description: |- - Whether this container should allocate a buffer for stdin in the container runtime. If this - is not set, reads from stdin in the container will always result in EOF. - Default is false. - type: boolean - stdinOnce: - description: |- - Whether the container runtime should close the stdin channel after it has been opened by - a single attach. When stdin is true the stdin stream will remain open across multiple attach - sessions. - type: boolean - targetContainerName: - description: |- - If set, the name of the container from PodSpec that this ephemeral container targets. - The ephemeral container will be run in the namespaces (IPC, PID, etc) of this container. - If not set then the ephemeral container uses the namespaces configured in the Pod spec. - - The container runtime must implement support for this feature. - type: string - terminationMessagePath: - description: |- - Optional: Path at which the file to which the container's termination message - will be written is mounted into the container's filesystem. - Message written is intended to be brief final status, such as an assertion failure message. - Will be truncated by the node if greater than 4096 bytes. The total message length across - all containers will be limited to 12kb. - Defaults to /dev/termination-log. - type: string - terminationMessagePolicy: - description: |- - Indicate how the termination message should be populated. File will use the contents of - terminationMessagePath to populate the container status message on both success and failure. - FallbackToLogsOnError will use the last chunk of container log output if the termination - message file is empty and the container exited with an error. - type: string - tty: - description: |- - Whether this container should allocate a TTY for itself, also requires 'stdin' to be true. - Default is false. - type: boolean - volumeDevices: - description: volumeDevices is the list of block - devices to be used by the container. - items: - description: volumeDevice describes a mapping - of a raw block device within a container. - properties: - devicePath: - description: devicePath is the path inside - of the container that the device will - be mapped to. - type: string - name: - description: name must match the name of - a persistentVolumeClaim in the pod - type: string - required: - - devicePath - - name - type: object - type: array - x-kubernetes-list-map-keys: - - devicePath - x-kubernetes-list-type: map - volumeMounts: - description: |- - Pod volumes to mount into the container's filesystem. Subpath mounts are not allowed for ephemeral containers. - Cannot be updated. - items: - description: VolumeMount describes a mounting - of a Volume within a container. - properties: - mountPath: - description: |- - Path within the container at which the volume should be mounted. Must - not contain ':'. - type: string - mountPropagation: - description: |- - mountPropagation determines how mounts are propagated from the host - to container and the other way around. - When not set, MountPropagationNone is used. - This field is beta in 1.10. - When RecursiveReadOnly is set to IfPossible or to Enabled, MountPropagation must be None or unspecified - (which defaults to None). - type: string - name: - description: This must match the Name of - a Volume. - type: string - readOnly: - description: |- - Mounted read-only if true, read-write otherwise (false or unspecified). - Defaults to false. - type: boolean - recursiveReadOnly: - description: |- - RecursiveReadOnly specifies whether read-only mounts should be handled - recursively. - - If ReadOnly is false, this field has no meaning and must be unspecified. - - If ReadOnly is true, and this field is set to Disabled, the mount is not made - recursively read-only. If this field is set to IfPossible, the mount is made - recursively read-only, if it is supported by the container runtime. - type: string - subPath: - description: |- - Path within the volume from which the container's volume should be mounted. - Defaults to "" (volume's root). - type: string - subPathExpr: - description: |- - Expanded path within the volume from which the container's volume should be mounted. - Behaves similarly to SubPath but environment variable references $(VAR_NAME) are expanded using the container's environment. - Defaults to "" (volume's root). - SubPathExpr and SubPath are mutually exclusive. - type: string - required: - - mountPath - - name - type: object - type: array - x-kubernetes-list-map-keys: - - mountPath - x-kubernetes-list-type: map - workingDir: - description: |- - Container's working directory. - If not specified, the container runtime's default will be used, which - might be configured in the container image. - Cannot be updated. - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - hostAliases: - description: |- - HostAliases is an optional list of hosts and IPs that will be injected into the pod's hosts - file if specified. - items: - description: |- - HostAlias holds the mapping between IP and hostnames that will be injected as an entry in the - pod's hosts file. - properties: - hostnames: - description: Hostnames for the above IP address. - items: - type: string - type: array - x-kubernetes-list-type: atomic - ip: - description: IP address of the host file entry. - type: string - required: - - ip - type: object - type: array - x-kubernetes-list-map-keys: - - ip - x-kubernetes-list-type: map - hostIPC: - description: |- - Use the host's ipc namespace. - Optional: Default to false. - type: boolean - hostNetwork: - description: |- - Host networking requested for this pod. Use the host's network namespace. - If this option is set, the ports that will be used must be specified. - Default to false. - type: boolean - hostPID: - description: |- - Use the host's pid namespace. - Optional: Default to false. - type: boolean - hostUsers: - description: |- - Use the host's user namespace. - Optional: Default to true. - If set to true or not present, the pod will be run in the host user namespace, useful - for when the pod needs a feature only available to the host user namespace, such as - loading a kernel module with CAP_SYS_MODULE. - When set to false, a new userns is created for the pod. - type: boolean - hostname: - description: |- - Specifies the hostname of the Pod - If not specified, the pod's hostname will be set to a system-defined value. - type: string - imagePullSecrets: - description: |- - ImagePullSecrets is an optional list of references to secrets in the same namespace to use for pulling any of the images used by this PodSpec. - If specified, these secrets will be passed to individual puller implementations for them to use. - More info: https://kubernetes.io/docs/concepts/containers/images#specifying-imagepullsecrets-on-a-pod - items: - description: |- - LocalObjectReference contains enough information to let you locate the - referenced object inside the same namespace. - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - type: object - x-kubernetes-map-type: atomic - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - initContainers: - description: |- - List of initialization containers belonging to the pod. - Init containers are executed in order prior to containers being started. If any - init container fails, the pod is considered to have failed and is handled according - to its restartPolicy. The name for an init container or normal container must be - unique among all containers. - items: - description: A single application container that you - want to run within a pod. - properties: - args: - description: |- - Arguments to the entrypoint. - The container image's CMD is used if this is not provided. - Variable references $(VAR_NAME) are expanded using the container's environment. If a variable - cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced - to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. - items: - type: string - type: array - x-kubernetes-list-type: atomic - command: - description: |- - Entrypoint array. Not executed within a shell. - The container image's ENTRYPOINT is used if this is not provided. - Variable references $(VAR_NAME) are expanded using the container's environment. If a variable - cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced - to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. - items: - type: string - type: array - x-kubernetes-list-type: atomic - env: - description: |- - List of environment variables to set in the container. - Cannot be updated. - items: - description: EnvVar represents an environment - variable present in a Container. - properties: - name: - description: Name of the environment variable. - Must be a C_IDENTIFIER. - type: string - value: - description: |- - Variable references $(VAR_NAME) are expanded - using the previously defined environment variables in the container and - any service environment variables. If a variable cannot be resolved, - the reference in the input string will be unchanged. Double $$ are reduced - to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. - "$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)". - type: string - valueFrom: - description: Source for the environment - variable's value. Cannot be used if value - is not empty. - properties: - configMapKeyRef: - description: Selects a key of a ConfigMap. - properties: - key: - description: The key to select. - type: string - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the - ConfigMap or its key must be defined - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - fieldRef: - description: |- - Selects a field of the pod: supports metadata.name, metadata.namespace, `metadata.labels['']`, `metadata.annotations['']`, - spec.nodeName, spec.serviceAccountName, status.hostIP, status.podIP, status.podIPs. - properties: - apiVersion: - description: Version of the schema - the FieldPath is written in terms - of, defaults to "v1". - type: string - fieldPath: - description: Path of the field to - select in the specified API version. - type: string - required: - - fieldPath - type: object - x-kubernetes-map-type: atomic - resourceFieldRef: - description: |- - Selects a resource of the container: only resources limits and requests - (limits.cpu, limits.memory, limits.ephemeral-storage, requests.cpu, requests.memory and requests.ephemeral-storage) are currently supported. - properties: - containerName: - description: 'Container name: required - for volumes, optional for env - vars' - type: string - divisor: - anyOf: - - type: integer - - type: string - description: Specifies the output - format of the exposed resources, - defaults to "1" - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - resource: - description: 'Required: resource - to select' - type: string - required: - - resource - type: object - x-kubernetes-map-type: atomic - secretKeyRef: - description: Selects a key of a secret - in the pod's namespace - properties: - key: - description: The key of the secret - to select from. Must be a valid - secret key. - type: string - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the - Secret or its key must be defined - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - type: object - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - envFrom: - description: |- - List of sources to populate environment variables in the container. - The keys defined within a source must be a C_IDENTIFIER. All invalid keys - will be reported as an event when the container is starting. When a key exists in multiple - sources, the value associated with the last source will take precedence. - Values defined by an Env with a duplicate key will take precedence. - Cannot be updated. - items: - description: EnvFromSource represents the source - of a set of ConfigMaps - properties: - configMapRef: - description: The ConfigMap to select from - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the ConfigMap - must be defined - type: boolean - type: object - x-kubernetes-map-type: atomic - prefix: - description: An optional identifier to prepend - to each key in the ConfigMap. Must be - a C_IDENTIFIER. - type: string - secretRef: - description: The Secret to select from - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the Secret - must be defined - type: boolean - type: object - x-kubernetes-map-type: atomic - type: object - type: array - x-kubernetes-list-type: atomic - image: - description: |- - Container image name. - More info: https://kubernetes.io/docs/concepts/containers/images - This field is optional to allow higher level config management to default or override - container images in workload controllers like Deployments and StatefulSets. - type: string - imagePullPolicy: - description: |- - Image pull policy. - One of Always, Never, IfNotPresent. - Defaults to Always if :latest tag is specified, or IfNotPresent otherwise. - Cannot be updated. - More info: https://kubernetes.io/docs/concepts/containers/images#updating-images - type: string - lifecycle: - description: |- - Actions that the management system should take in response to container lifecycle events. - Cannot be updated. - properties: - postStart: - description: |- - PostStart is called immediately after a container is created. If the handler fails, - the container is terminated and restarted according to its restart policy. - Other management of the container blocks until the hook completes. - More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks - properties: - exec: - description: Exec specifies the action - to take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - httpGet: - description: HTTPGet specifies the http - request to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set - in the request. HTTP allows repeated - headers. - items: - description: HTTPHeader describes - a custom header to be used in - HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field - value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the - HTTP server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - sleep: - description: Sleep represents the duration - that the container should sleep before - being terminated. - properties: - seconds: - description: Seconds is the number - of seconds to sleep. - format: int64 - type: integer - required: - - seconds - type: object - tcpSocket: - description: |- - Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept - for the backward compatibility. There are no validation of this field and - lifecycle hooks will fail in runtime when tcp handler is specified. - properties: - host: - description: 'Optional: Host name - to connect to, defaults to the pod - IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - type: object - preStop: - description: |- - PreStop is called immediately before a container is terminated due to an - API request or management event such as liveness/startup probe failure, - preemption, resource contention, etc. The handler is not called if the - container crashes or exits. The Pod's termination grace period countdown begins before the - PreStop hook is executed. - properties: - exec: - description: Exec specifies the action - to take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - httpGet: - description: HTTPGet specifies the http - request to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set - in the request. HTTP allows repeated - headers. - items: - description: HTTPHeader describes - a custom header to be used in - HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field - value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the - HTTP server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - sleep: - description: Sleep represents the duration - that the container should sleep before - being terminated. - properties: - seconds: - description: Seconds is the number - of seconds to sleep. - format: int64 - type: integer - required: - - seconds - type: object - tcpSocket: - description: |- - Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept - for the backward compatibility. There are no validation of this field and - lifecycle hooks will fail in runtime when tcp handler is specified. - properties: - host: - description: 'Optional: Host name - to connect to, defaults to the pod - IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - type: object - type: object - livenessProbe: - description: |- - Periodic probe of container liveness. - Container will be restarted if the probe fails. - Cannot be updated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - properties: - exec: - description: Exec specifies the action to - take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - failureThreshold: - description: |- - Minimum consecutive failures for the probe to be considered failed after having succeeded. - Defaults to 3. Minimum value is 1. - format: int32 - type: integer - grpc: - description: GRPC specifies an action involving - a GRPC port. - properties: - port: - description: Port number of the gRPC service. - Number must be in the range 1 to 65535. - format: int32 - type: integer - service: - default: "" - description: |- - Service is the name of the service to place in the gRPC HealthCheckRequest - (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - - If this is not specified, the default behavior is defined by gRPC. - type: string - required: - - port - type: object - httpGet: - description: HTTPGet specifies the http request - to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set in - the request. HTTP allows repeated headers. - items: - description: HTTPHeader describes a - custom header to be used in HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the HTTP - server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - initialDelaySeconds: - description: |- - Number of seconds after the container has started before liveness probes are initiated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - periodSeconds: - description: |- - How often (in seconds) to perform the probe. - Default to 10 seconds. Minimum value is 1. - format: int32 - type: integer - successThreshold: - description: |- - Minimum consecutive successes for the probe to be considered successful after having failed. - Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. - format: int32 - type: integer - tcpSocket: - description: TCPSocket specifies an action - involving a TCP port. - properties: - host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - description: |- - Optional duration in seconds the pod needs to terminate gracefully upon probe failure. - The grace period is the duration in seconds after the processes running in the pod are sent - a termination signal and the time when the processes are forcibly halted with a kill signal. - Set this value longer than the expected cleanup time for your process. - format: int64 - type: integer - timeoutSeconds: - description: |- - Number of seconds after which the probe times out. - Defaults to 1 second. Minimum value is 1. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - type: object - name: - description: |- - Name of the container specified as a DNS_LABEL. - Each container in a pod must have a unique name (DNS_LABEL). - Cannot be updated. - type: string - ports: - description: |- - List of ports to expose from the container. Not specifying a port here - DOES NOT prevent that port from being exposed. Any port which is - listening on the default "0.0.0.0" address inside a container will be - accessible from the network. - Modifying this array with strategic merge patch may corrupt the data. - For more information See https://github.com/kubernetes/kubernetes/issues/108255. - items: - description: ContainerPort represents a network - port in a single container. - properties: - containerPort: - description: |- - Number of port to expose on the pod's IP address. - This must be a valid port number, 0 < x < 65536. - format: int32 - type: integer - hostIP: - description: What host IP to bind the external - port to. - type: string - hostPort: - description: |- - Number of port to expose on the host. - If specified, this must be a valid port number, 0 < x < 65536. - If HostNetwork is specified, this must match ContainerPort. - Most containers do not need this. - format: int32 - type: integer - name: - description: |- - If specified, this must be an IANA_SVC_NAME and unique within the pod. Each - named port in a pod must have a unique name. Name for the port that can be - referred to by services. - type: string - protocol: - default: TCP - description: |- - Protocol for port. Must be UDP, TCP, or SCTP. - Defaults to "TCP". - type: string - required: - - containerPort - type: object - type: array - x-kubernetes-list-map-keys: - - containerPort - - protocol - x-kubernetes-list-type: map - readinessProbe: - description: |- - Periodic probe of container service readiness. - Container will be removed from service endpoints if the probe fails. - Cannot be updated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - properties: - exec: - description: Exec specifies the action to - take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - failureThreshold: - description: |- - Minimum consecutive failures for the probe to be considered failed after having succeeded. - Defaults to 3. Minimum value is 1. - format: int32 - type: integer - grpc: - description: GRPC specifies an action involving - a GRPC port. - properties: - port: - description: Port number of the gRPC service. - Number must be in the range 1 to 65535. - format: int32 - type: integer - service: - default: "" - description: |- - Service is the name of the service to place in the gRPC HealthCheckRequest - (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - - If this is not specified, the default behavior is defined by gRPC. - type: string - required: - - port - type: object - httpGet: - description: HTTPGet specifies the http request - to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set in - the request. HTTP allows repeated headers. - items: - description: HTTPHeader describes a - custom header to be used in HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the HTTP - server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - initialDelaySeconds: - description: |- - Number of seconds after the container has started before liveness probes are initiated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - periodSeconds: - description: |- - How often (in seconds) to perform the probe. - Default to 10 seconds. Minimum value is 1. - format: int32 - type: integer - successThreshold: - description: |- - Minimum consecutive successes for the probe to be considered successful after having failed. - Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. - format: int32 - type: integer - tcpSocket: - description: TCPSocket specifies an action - involving a TCP port. - properties: - host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - description: |- - Optional duration in seconds the pod needs to terminate gracefully upon probe failure. - The grace period is the duration in seconds after the processes running in the pod are sent - a termination signal and the time when the processes are forcibly halted with a kill signal. - Set this value longer than the expected cleanup time for your process. - format: int64 - type: integer - timeoutSeconds: - description: |- - Number of seconds after which the probe times out. - Defaults to 1 second. Minimum value is 1. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - type: object - resizePolicy: - description: Resources resize policy for the container. - items: - description: ContainerResizePolicy represents - resource resize policy for the container. - properties: - resourceName: - description: |- - Name of the resource to which this resource resize policy applies. - Supported values: cpu, memory. - type: string - restartPolicy: - description: |- - Restart policy to apply when specified resource is resized. - If not specified, it defaults to NotRequired. - type: string - required: - - resourceName - - restartPolicy - type: object - type: array - x-kubernetes-list-type: atomic - resources: - description: |- - Compute Resources required by this container. - Cannot be updated. - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - properties: - claims: - description: |- - Claims lists the names of resources, defined in spec.resourceClaims, - that are used by this container. - - This is an alpha field and requires enabling the - DynamicResourceAllocation feature gate. - - This field is immutable. It can only be set for containers. - items: - description: ResourceClaim references one - entry in PodSpec.ResourceClaims. - properties: - name: - description: |- - Name must match the name of one entry in pod.spec.resourceClaims of - the Pod where this field is used. It makes that resource available - inside a container. - type: string - request: - description: |- - Request is the name chosen for a request in the referenced claim. - If empty, everything from the claim is made available, otherwise - only the result of this request. - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - limits: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: |- - Limits describes the maximum amount of compute resources allowed. - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - type: object - requests: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: |- - Requests describes the minimum amount of compute resources required. - If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, - otherwise to an implementation-defined value. Requests cannot exceed Limits. - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - type: object - type: object - restartPolicy: - description: |- - RestartPolicy defines the restart behavior of individual containers in a pod. - This field may only be set for init containers, and the only allowed value is "Always". - For non-init containers or when this field is not specified, - the restart behavior is defined by the Pod's restart policy and the container type. - type: string - securityContext: - description: |- - SecurityContext defines the security options the container should be run with. - If set, the fields of SecurityContext override the equivalent fields of PodSecurityContext. - More info: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/ - properties: - allowPrivilegeEscalation: - description: |- - AllowPrivilegeEscalation controls whether a process can gain more - privileges than its parent process. This bool directly controls if - the no_new_privs flag will be set on the container process. - AllowPrivilegeEscalation is true always when the container is: - 1) run as Privileged - 2) has CAP_SYS_ADMIN - Note that this field cannot be set when spec.os.name is windows. - type: boolean - appArmorProfile: - description: |- - appArmorProfile is the AppArmor options to use by this container. If set, this profile - overrides the pod's appArmorProfile. - Note that this field cannot be set when spec.os.name is windows. - properties: - localhostProfile: - description: |- - localhostProfile indicates a profile loaded on the node that should be used. - The profile must be preconfigured on the node to work. - Must match the loaded name of the profile. - Must be set if and only if type is "Localhost". - type: string - type: - description: |- - type indicates which kind of AppArmor profile will be applied. - Valid options are: - Localhost - a profile pre-loaded on the node. - RuntimeDefault - the container runtime's default profile. - Unconfined - no AppArmor enforcement. - type: string - required: - - type - type: object - capabilities: - description: |- - The capabilities to add/drop when running containers. - Defaults to the default set of capabilities granted by the container runtime. - Note that this field cannot be set when spec.os.name is windows. - properties: - add: - description: Added capabilities - items: - description: Capability represent POSIX - capabilities type - type: string - type: array - x-kubernetes-list-type: atomic - drop: - description: Removed capabilities - items: - description: Capability represent POSIX - capabilities type - type: string - type: array - x-kubernetes-list-type: atomic - type: object - privileged: - description: |- - Run container in privileged mode. - Processes in privileged containers are essentially equivalent to root on the host. - Defaults to false. - Note that this field cannot be set when spec.os.name is windows. - type: boolean - procMount: - description: |- - procMount denotes the type of proc mount to use for the containers. - The default value is Default which uses the container runtime defaults for - readonly paths and masked paths. - This requires the ProcMountType feature flag to be enabled. - Note that this field cannot be set when spec.os.name is windows. - type: string - readOnlyRootFilesystem: - description: |- - Whether this container has a read-only root filesystem. - Default is false. - Note that this field cannot be set when spec.os.name is windows. - type: boolean - runAsGroup: - description: |- - The GID to run the entrypoint of the container process. - Uses runtime default if unset. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is windows. - format: int64 - type: integer - runAsNonRoot: - description: |- - Indicates that the container must run as a non-root user. - If true, the Kubelet will validate the image at runtime to ensure that it - does not run as UID 0 (root) and fail to start the container if it does. - If unset or false, no such validation will be performed. - May also be set in PodSecurityContext. - type: boolean - runAsUser: - description: |- - The UID to run the entrypoint of the container process. - Defaults to user specified in image metadata if unspecified. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is windows. - format: int64 - type: integer - seLinuxOptions: - description: |- - The SELinux context to be applied to the container. - If unspecified, the container runtime will allocate a random SELinux context for each - container. May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is windows. - properties: - level: - description: Level is SELinux level label - that applies to the container. - type: string - role: - description: Role is a SELinux role label - that applies to the container. - type: string - type: - description: Type is a SELinux type label - that applies to the container. - type: string - user: - description: User is a SELinux user label - that applies to the container. - type: string - type: object - seccompProfile: - description: |- - The seccomp options to use by this container. If seccomp options are - provided at both the pod & container level, the container options - override the pod options. - Note that this field cannot be set when spec.os.name is windows. - properties: - localhostProfile: - description: |- - localhostProfile indicates a profile defined in a file on the node should be used. - The profile must be preconfigured on the node to work. - Must be a descending path, relative to the kubelet's configured seccomp profile location. - Must be set if type is "Localhost". Must NOT be set for any other type. - type: string - type: - description: |- - type indicates which kind of seccomp profile will be applied. - Valid options are: - - Localhost - a profile defined in a file on the node should be used. - RuntimeDefault - the container runtime default profile should be used. - Unconfined - no profile should be applied. - type: string - required: - - type - type: object - windowsOptions: - description: |- - The Windows specific settings applied to all containers. - If unspecified, the options from the PodSecurityContext will be used. - If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is linux. - properties: - gmsaCredentialSpec: - description: |- - GMSACredentialSpec is where the GMSA admission webhook - (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the - GMSA credential spec named by the GMSACredentialSpecName field. - type: string - gmsaCredentialSpecName: - description: GMSACredentialSpecName is - the name of the GMSA credential spec - to use. - type: string - hostProcess: - description: |- - HostProcess determines if a container should be run as a 'Host Process' container. - All of a Pod's containers must have the same effective HostProcess value - (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers). - In addition, if HostProcess is true then HostNetwork must also be set to true. - type: boolean - runAsUserName: - description: |- - The UserName in Windows to run the entrypoint of the container process. - Defaults to the user specified in image metadata if unspecified. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - type: string - type: object - type: object - startupProbe: - description: |- - StartupProbe indicates that the Pod has successfully initialized. - If specified, no other probes are executed until this completes successfully. - If this probe fails, the Pod will be restarted, just as if the livenessProbe failed. - properties: - exec: - description: Exec specifies the action to - take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - failureThreshold: - description: |- - Minimum consecutive failures for the probe to be considered failed after having succeeded. - Defaults to 3. Minimum value is 1. - format: int32 - type: integer - grpc: - description: GRPC specifies an action involving - a GRPC port. - properties: - port: - description: Port number of the gRPC service. - Number must be in the range 1 to 65535. - format: int32 - type: integer - service: - default: "" - description: |- - Service is the name of the service to place in the gRPC HealthCheckRequest - (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - - If this is not specified, the default behavior is defined by gRPC. - type: string - required: - - port - type: object - httpGet: - description: HTTPGet specifies the http request - to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set in - the request. HTTP allows repeated headers. - items: - description: HTTPHeader describes a - custom header to be used in HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the HTTP - server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - initialDelaySeconds: - description: |- - Number of seconds after the container has started before liveness probes are initiated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - periodSeconds: - description: |- - How often (in seconds) to perform the probe. - Default to 10 seconds. Minimum value is 1. - format: int32 - type: integer - successThreshold: - description: |- - Minimum consecutive successes for the probe to be considered successful after having failed. - Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. - format: int32 - type: integer - tcpSocket: - description: TCPSocket specifies an action - involving a TCP port. - properties: - host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - description: |- - Optional duration in seconds the pod needs to terminate gracefully upon probe failure. - The grace period is the duration in seconds after the processes running in the pod are sent - a termination signal and the time when the processes are forcibly halted with a kill signal. - Set this value longer than the expected cleanup time for your process. - format: int64 - type: integer - timeoutSeconds: - description: |- - Number of seconds after which the probe times out. - Defaults to 1 second. Minimum value is 1. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - type: object - stdin: - description: |- - Whether this container should allocate a buffer for stdin in the container runtime. If this - is not set, reads from stdin in the container will always result in EOF. - Default is false. - type: boolean - stdinOnce: - description: |- - Whether the container runtime should close the stdin channel after it has been opened by - a single attach. When stdin is true the stdin stream will remain open across multiple attach - sessions. - type: boolean - terminationMessagePath: - description: |- - Optional: Path at which the file to which the container's termination message - will be written is mounted into the container's filesystem. - Message written is intended to be brief final status, such as an assertion failure message. - Will be truncated by the node if greater than 4096 bytes. The total message length across - all containers will be limited to 12kb. - Defaults to /dev/termination-log. - type: string - terminationMessagePolicy: - description: |- - Indicate how the termination message should be populated. File will use the contents of - terminationMessagePath to populate the container status message on both success and failure. - FallbackToLogsOnError will use the last chunk of container log output if the termination - message file is empty and the container exited with an error. - type: string - tty: - description: |- - Whether this container should allocate a TTY for itself, also requires 'stdin' to be true. - Default is false. - type: boolean - volumeDevices: - description: volumeDevices is the list of block - devices to be used by the container. - items: - description: volumeDevice describes a mapping - of a raw block device within a container. - properties: - devicePath: - description: devicePath is the path inside - of the container that the device will - be mapped to. - type: string - name: - description: name must match the name of - a persistentVolumeClaim in the pod - type: string - required: - - devicePath - - name - type: object - type: array - x-kubernetes-list-map-keys: - - devicePath - x-kubernetes-list-type: map - volumeMounts: - description: |- - Pod volumes to mount into the container's filesystem. - Cannot be updated. - items: - description: VolumeMount describes a mounting - of a Volume within a container. - properties: - mountPath: - description: |- - Path within the container at which the volume should be mounted. Must - not contain ':'. - type: string - mountPropagation: - description: |- - mountPropagation determines how mounts are propagated from the host - to container and the other way around. - When not set, MountPropagationNone is used. - This field is beta in 1.10. - When RecursiveReadOnly is set to IfPossible or to Enabled, MountPropagation must be None or unspecified - (which defaults to None). - type: string - name: - description: This must match the Name of - a Volume. - type: string - readOnly: - description: |- - Mounted read-only if true, read-write otherwise (false or unspecified). - Defaults to false. - type: boolean - recursiveReadOnly: - description: |- - RecursiveReadOnly specifies whether read-only mounts should be handled - recursively. - - If ReadOnly is false, this field has no meaning and must be unspecified. - - If ReadOnly is true, and this field is set to Disabled, the mount is not made - recursively read-only. If this field is set to IfPossible, the mount is made - recursively read-only, if it is supported by the container runtime. - type: string - subPath: - description: |- - Path within the volume from which the container's volume should be mounted. - Defaults to "" (volume's root). - type: string - subPathExpr: - description: |- - Expanded path within the volume from which the container's volume should be mounted. - Behaves similarly to SubPath but environment variable references $(VAR_NAME) are expanded using the container's environment. - Defaults to "" (volume's root). - SubPathExpr and SubPath are mutually exclusive. - type: string - required: - - mountPath - - name - type: object - type: array - x-kubernetes-list-map-keys: - - mountPath - x-kubernetes-list-type: map - workingDir: - description: |- - Container's working directory. - If not specified, the container runtime's default will be used, which - might be configured in the container image. - Cannot be updated. - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - nodeName: - description: |- - NodeName indicates in which node this pod is scheduled. - If empty, this pod is a candidate for scheduling by the scheduler defined in schedulerName. - Once this field is set, the kubelet for this node becomes responsible for the lifecycle of this pod. - This field should not be used to express a desire for the pod to be scheduled on a specific node. - https://kubernetes. - type: string - nodeSelector: - additionalProperties: - type: string - description: |- - NodeSelector is a selector which must be true for the pod to fit on a node. - Selector which must match a node's labels for the pod to be scheduled on that node. - More info: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/ - type: object - x-kubernetes-map-type: atomic - os: - description: |- - Specifies the OS of the containers in the pod. - Some pod and container fields are restricted if this is set. - - If the OS field is set to linux, the following fields must be unset: - -securityContext.windowsOptions - - If the OS field is set to windows, following fields must be unset: - - spec.hostPID - - spec.hostIPC - - spec.hostUsers - - spec.securityContext.appArmorProfile - - spec.securityContext. - properties: - name: - description: |- - Name is the name of the operating system. The currently supported values are linux and windows. - Additional value may be defined in future and can be one of: - https://github.com/opencontainers/runtime-spec/blob/master/config.md#platform-specific-configuration - Clients should expect to handle additional values and treat unrecognized values in this field as os: null - type: string - required: - - name - type: object - overhead: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: |- - Overhead represents the resource overhead associated with running a pod for a given RuntimeClass. - This field will be autopopulated at admission time by the RuntimeClass admission controller. If - the RuntimeClass admission controller is enabled, overhead must not be set in Pod create requests. - The RuntimeClass admission controller will reject Pod create requests which have the overhead already - set. - type: object - preemptionPolicy: - description: |- - PreemptionPolicy is the Policy for preempting pods with lower priority. - One of Never, PreemptLowerPriority. - Defaults to PreemptLowerPriority if unset. - type: string - priority: - description: |- - The priority value. Various system components use this field to find the - priority of the pod. When Priority Admission Controller is enabled, it - prevents users from setting this field. The admission controller populates - this field from PriorityClassName. - The higher the value, the higher the priority. - format: int32 - type: integer - priorityClassName: - description: |- - If specified, indicates the pod's priority. "system-node-critical" and - "system-cluster-critical" are two special keywords which indicate the - highest priorities with the former being the highest priority. Any other - name must be defined by creating a PriorityClass object with that name. - If not specified, the pod priority will be default or zero if there is no - default. - type: string - readinessGates: - description: |- - If specified, all readiness gates will be evaluated for pod readiness. - A pod is ready when all its containers are ready AND - all conditions specified in the readiness gates have status equal to "True" - More info: https://git.k8s.io/enhancements/keps/sig-network/580-pod-readiness-gates - items: - description: PodReadinessGate contains the reference - to a pod condition - properties: - conditionType: - description: ConditionType refers to a condition - in the pod's condition list with matching type. - type: string - required: - - conditionType - type: object - type: array - x-kubernetes-list-type: atomic - resourceClaims: - description: |- - ResourceClaims defines which ResourceClaims must be allocated - and reserved before the Pod is allowed to start. The resources - will be made available to those containers which consume them - by name. - - This is an alpha field and requires enabling the - DynamicResourceAllocation feature gate. - - This field is immutable. - items: - description: |- - PodResourceClaim references exactly one ResourceClaim, either directly - or by naming a ResourceClaimTemplate which is then turned into a ResourceClaim - for the pod. - - It adds a name to it that uniquely identifies the ResourceClaim inside the Pod. - Containers that need access to the ResourceClaim reference it with this name. - properties: - name: - description: |- - Name uniquely identifies this resource claim inside the pod. - This must be a DNS_LABEL. - type: string - resourceClaimName: - description: |- - ResourceClaimName is the name of a ResourceClaim object in the same - namespace as this pod. - - Exactly one of ResourceClaimName and ResourceClaimTemplateName must - be set. - type: string - resourceClaimTemplateName: - description: |- - ResourceClaimTemplateName is the name of a ResourceClaimTemplate - object in the same namespace as this pod. - - The template will be used to create a new ResourceClaim, which will - be bound to this pod. When this pod is deleted, the ResourceClaim - will also be deleted. - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - restartPolicy: - description: |- - Restart policy for all containers within the pod. - One of Always, OnFailure, Never. In some contexts, only a subset of those values may be permitted. - Default to Always. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#restart-policy - type: string - runtimeClassName: - description: |- - RuntimeClassName refers to a RuntimeClass object in the node.k8s.io group, which should be used - to run this pod. If no RuntimeClass resource matches the named class, the pod will not be run. - If unset or empty, the "legacy" RuntimeClass will be used, which is an implicit class with an - empty definition that uses the default runtime handler. - More info: https://git.k8s. - type: string - schedulerName: - description: |- - If specified, the pod will be dispatched by specified scheduler. - If not specified, the pod will be dispatched by default scheduler. - type: string - schedulingGates: - description: |- - SchedulingGates is an opaque list of values that if specified will block scheduling the pod. - If schedulingGates is not empty, the pod will stay in the SchedulingGated state and the - scheduler will not attempt to schedule the pod. - - SchedulingGates can only be set at pod creation time, and be removed only afterwards. - items: - description: PodSchedulingGate is associated to a - Pod to guard its scheduling. - properties: - name: - description: |- - Name of the scheduling gate. - Each scheduling gate must have a unique name field. - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - securityContext: - description: |- - SecurityContext holds pod-level security attributes and common container settings. - Optional: Defaults to empty. See type description for default values of each field. - properties: - appArmorProfile: - description: |- - appArmorProfile is the AppArmor options to use by the containers in this pod. - Note that this field cannot be set when spec.os.name is windows. - properties: - localhostProfile: - description: |- - localhostProfile indicates a profile loaded on the node that should be used. - The profile must be preconfigured on the node to work. - Must match the loaded name of the profile. - Must be set if and only if type is "Localhost". - type: string - type: - description: |- - type indicates which kind of AppArmor profile will be applied. - Valid options are: - Localhost - a profile pre-loaded on the node. - RuntimeDefault - the container runtime's default profile. - Unconfined - no AppArmor enforcement. - type: string - required: - - type - type: object - fsGroup: - description: |- - A special supplemental group that applies to all containers in a pod. - Some volume types allow the Kubelet to change the ownership of that volume - to be owned by the pod: - - 1. The owning GID will be the FSGroup - 2. The setgid bit is set (new files created in the volume will be owned by FSGroup) - 3. - format: int64 - type: integer - fsGroupChangePolicy: - description: |- - fsGroupChangePolicy defines behavior of changing ownership and permission of the volume - before being exposed inside Pod. This field will only apply to - volume types which support fsGroup based ownership(and permissions). - It will have no effect on ephemeral volume types such as: secret, configmaps - and emptydir. - Valid values are "OnRootMismatch" and "Always". If not specified, "Always" is used. - type: string - runAsGroup: - description: |- - The GID to run the entrypoint of the container process. - Uses runtime default if unset. - May also be set in SecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence - for that container. - Note that this field cannot be set when spec.os.name is windows. - format: int64 - type: integer - runAsNonRoot: - description: |- - Indicates that the container must run as a non-root user. - If true, the Kubelet will validate the image at runtime to ensure that it - does not run as UID 0 (root) and fail to start the container if it does. - If unset or false, no such validation will be performed. - May also be set in SecurityContext. - type: boolean - runAsUser: - description: |- - The UID to run the entrypoint of the container process. - Defaults to user specified in image metadata if unspecified. - May also be set in SecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence - for that container. - Note that this field cannot be set when spec.os.name is windows. - format: int64 - type: integer - seLinuxOptions: - description: |- - The SELinux context to be applied to all containers. - If unspecified, the container runtime will allocate a random SELinux context for each - container. May also be set in SecurityContext. If set in - both SecurityContext and PodSecurityContext, the value specified in SecurityContext - takes precedence for that container. - Note that this field cannot be set when spec.os.name is windows. - properties: - level: - description: Level is SELinux level label that - applies to the container. - type: string - role: - description: Role is a SELinux role label that - applies to the container. - type: string - type: - description: Type is a SELinux type label that - applies to the container. - type: string - user: - description: User is a SELinux user label that - applies to the container. - type: string - type: object - seccompProfile: - description: |- - The seccomp options to use by the containers in this pod. - Note that this field cannot be set when spec.os.name is windows. - properties: - localhostProfile: - description: |- - localhostProfile indicates a profile defined in a file on the node should be used. - The profile must be preconfigured on the node to work. - Must be a descending path, relative to the kubelet's configured seccomp profile location. - Must be set if type is "Localhost". Must NOT be set for any other type. - type: string - type: - description: |- - type indicates which kind of seccomp profile will be applied. - Valid options are: - - Localhost - a profile defined in a file on the node should be used. - RuntimeDefault - the container runtime default profile should be used. - Unconfined - no profile should be applied. - type: string - required: - - type - type: object - supplementalGroups: - description: |- - A list of groups applied to the first process run in each container, in - addition to the container's primary GID and fsGroup (if specified). If - the SupplementalGroupsPolicy feature is enabled, the - supplementalGroupsPolicy field determines whether these are in addition - to or instead of any group memberships defined in the container image. - items: - format: int64 - type: integer - type: array - x-kubernetes-list-type: atomic - supplementalGroupsPolicy: - description: |- - Defines how supplemental groups of the first container processes are calculated. - Valid values are "Merge" and "Strict". If not specified, "Merge" is used. - (Alpha) Using the field requires the SupplementalGroupsPolicy feature gate to be enabled - and the container runtime must implement support for this feature. - Note that this field cannot be set when spec.os.name is windows. - type: string - sysctls: - description: |- - Sysctls hold a list of namespaced sysctls used for the pod. Pods with unsupported - sysctls (by the container runtime) might fail to launch. - Note that this field cannot be set when spec.os.name is windows. - items: - description: Sysctl defines a kernel parameter - to be set - properties: - name: - description: Name of a property to set - type: string - value: - description: Value of a property to set - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - windowsOptions: - description: |- - The Windows specific settings applied to all containers. - If unspecified, the options within a container's SecurityContext will be used. - If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is linux. - properties: - gmsaCredentialSpec: - description: |- - GMSACredentialSpec is where the GMSA admission webhook - (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the - GMSA credential spec named by the GMSACredentialSpecName field. - type: string - gmsaCredentialSpecName: - description: GMSACredentialSpecName is the name - of the GMSA credential spec to use. - type: string - hostProcess: - description: |- - HostProcess determines if a container should be run as a 'Host Process' container. - All of a Pod's containers must have the same effective HostProcess value - (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers). - In addition, if HostProcess is true then HostNetwork must also be set to true. - type: boolean - runAsUserName: - description: |- - The UserName in Windows to run the entrypoint of the container process. - Defaults to the user specified in image metadata if unspecified. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - type: string - type: object - type: object - serviceAccount: - description: |- - DeprecatedServiceAccount is a deprecated alias for ServiceAccountName. - Deprecated: Use serviceAccountName instead. - type: string - serviceAccountName: - description: |- - ServiceAccountName is the name of the ServiceAccount to use to run this pod. - More info: https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/ - type: string - setHostnameAsFQDN: - description: |- - If true the pod's hostname will be configured as the pod's FQDN, rather than the leaf name (the default). - In Linux containers, this means setting the FQDN in the hostname field of the kernel (the nodename field of struct utsname). - type: boolean - shareProcessNamespace: - description: |- - Share a single process namespace between all of the containers in a pod. - When this is set containers will be able to view and signal processes from other containers - in the same pod, and the first process in each container will not be assigned PID 1. - HostPID and ShareProcessNamespace cannot both be set. - Optional: Default to false. - type: boolean - subdomain: - description: |- - If specified, the fully qualified Pod hostname will be "...svc.". - If not specified, the pod will not have a domainname at all. - type: string - terminationGracePeriodSeconds: - description: |- - Optional duration in seconds the pod needs to terminate gracefully. May be decreased in delete request. - Value must be non-negative integer. The value zero indicates stop immediately via - the kill signal (no opportunity to shut down). - If this value is nil, the default grace period will be used instead. - format: int64 - type: integer - tolerations: - description: If specified, the pod's tolerations. - items: - description: |- - The pod this Toleration is attached to tolerates any taint that matches - the triple using the matching operator . - properties: - effect: - description: |- - Effect indicates the taint effect to match. Empty means match all taint effects. - When specified, allowed values are NoSchedule, PreferNoSchedule and NoExecute. - type: string - key: - description: |- - Key is the taint key that the toleration applies to. Empty means match all taint keys. - If the key is empty, operator must be Exists; this combination means to match all values and all keys. - type: string - operator: - description: |- - Operator represents a key's relationship to the value. - Valid operators are Exists and Equal. Defaults to Equal. - Exists is equivalent to wildcard for value, so that a pod can - tolerate all taints of a particular category. - type: string - tolerationSeconds: - description: |- - TolerationSeconds represents the period of time the toleration (which must be - of effect NoExecute, otherwise this field is ignored) tolerates the taint. By default, - it is not set, which means tolerate the taint forever (do not evict). Zero and - negative values will be treated as 0 (evict immediately) by the system. - format: int64 - type: integer - value: - description: |- - Value is the taint value the toleration matches to. - If the operator is Exists, the value should be empty, otherwise just a regular string. - type: string - type: object - type: array - x-kubernetes-list-type: atomic - topologySpreadConstraints: - description: |- - TopologySpreadConstraints describes how a group of pods ought to spread across topology - domains. Scheduler will schedule pods in a way which abides by the constraints. - All topologySpreadConstraints are ANDed. - items: - description: TopologySpreadConstraint specifies how - to spread matching pods among the given topology. - properties: - labelSelector: - description: |- - LabelSelector is used to find matching pods. - Pods that match this label selector are counted to determine the number of pods - in their corresponding topology domain. - properties: - matchExpressions: - description: matchExpressions is a list of - label selector requirements. The requirements - are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the label key that - the selector applies to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - matchLabelKeys: - description: |- - MatchLabelKeys is a set of pod label keys to select the pods over which - spreading will be calculated. The keys are used to lookup values from the - incoming pod labels, those key-value labels are ANDed with labelSelector - to select the group of existing pods over which spreading will be calculated - for the incoming pod. The same key is forbidden to exist in both MatchLabelKeys and LabelSelector. - items: - type: string - type: array - x-kubernetes-list-type: atomic - maxSkew: - description: |- - MaxSkew describes the degree to which pods may be unevenly distributed. - When `whenUnsatisfiable=DoNotSchedule`, it is the maximum permitted difference - between the number of matching pods in the target topology and the global minimum. - The global minimum is the minimum number of matching pods in an eligible domain - or zero if the number of eligible domains is less than MinDomains. - format: int32 - type: integer - minDomains: - description: |- - MinDomains indicates a minimum number of eligible domains. - When the number of eligible domains with matching topology keys is less than minDomains, - Pod Topology Spread treats "global minimum" as 0, and then the calculation of Skew is performed. - And when the number of eligible domains with matching topology keys equals or greater than minDomains, - this value has no effect on scheduling. - format: int32 - type: integer - nodeAffinityPolicy: - description: |- - NodeAffinityPolicy indicates how we will treat Pod's nodeAffinity/nodeSelector - when calculating pod topology spread skew. Options are: - - Honor: only nodes matching nodeAffinity/nodeSelector are included in the calculations. - - Ignore: nodeAffinity/nodeSelector are ignored. All nodes are included in the calculations. - - If this value is nil, the behavior is equivalent to the Honor policy. - type: string - nodeTaintsPolicy: - description: |- - NodeTaintsPolicy indicates how we will treat node taints when calculating - pod topology spread skew. Options are: - - Honor: nodes without taints, along with tainted nodes for which the incoming pod - has a toleration, are included. - - Ignore: node taints are ignored. All nodes are included. - - If this value is nil, the behavior is equivalent to the Ignore policy. - type: string - topologyKey: - description: |- - TopologyKey is the key of node labels. Nodes that have a label with this key - and identical values are considered to be in the same topology. - We consider each as a "bucket", and try to put balanced number - of pods into each bucket. - We define a domain as a particular instance of a topology. - type: string - whenUnsatisfiable: - description: |- - WhenUnsatisfiable indicates how to deal with a pod if it doesn't satisfy - the spread constraint. - - DoNotSchedule (default) tells the scheduler not to schedule it. - - ScheduleAnyway tells the scheduler to schedule the pod in any location, - but giving higher precedence to topologies that would help reduce the - skew. - type: string - required: - - maxSkew - - topologyKey - - whenUnsatisfiable - type: object - type: array - x-kubernetes-list-map-keys: - - topologyKey - - whenUnsatisfiable - x-kubernetes-list-type: map - volumes: - description: |- - List of volumes that can be mounted by containers belonging to the pod. - More info: https://kubernetes.io/docs/concepts/storage/volumes - items: - description: Volume represents a named volume in a - pod that may be accessed by any container in the - pod. - properties: - awsElasticBlockStore: - description: |- - awsElasticBlockStore represents an AWS Disk resource that is attached to a - kubelet's host machine and then exposed to the pod. - More info: https://kubernetes.io/docs/concepts/storage/volumes#awselasticblockstore - properties: - fsType: - description: |- - fsType is the filesystem type of the volume that you want to mount. - Tip: Ensure that the filesystem type is supported by the host operating system. - Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - More info: https://kubernetes.io/docs/concepts/storage/volumes#awselasticblockstore - type: string - partition: - description: |- - partition is the partition in the volume that you want to mount. - If omitted, the default is to mount by volume name. - Examples: For volume /dev/sda1, you specify the partition as "1". - Similarly, the volume partition for /dev/sda is "0" (or you can leave the property empty). - format: int32 - type: integer - readOnly: - description: |- - readOnly value true will force the readOnly setting in VolumeMounts. - More info: https://kubernetes.io/docs/concepts/storage/volumes#awselasticblockstore - type: boolean - volumeID: - description: |- - volumeID is unique ID of the persistent disk resource in AWS (Amazon EBS volume). - More info: https://kubernetes.io/docs/concepts/storage/volumes#awselasticblockstore - type: string - required: - - volumeID - type: object - azureDisk: - description: azureDisk represents an Azure Data - Disk mount on the host and bind mount to the - pod. - properties: - cachingMode: - description: 'cachingMode is the Host Caching - mode: None, Read Only, Read Write.' - type: string - diskName: - description: diskName is the Name of the data - disk in the blob storage - type: string - diskURI: - description: diskURI is the URI of data disk - in the blob storage - type: string - fsType: - default: ext4 - description: |- - fsType is Filesystem type to mount. - Must be a filesystem type supported by the host operating system. - Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - type: string - kind: - description: 'kind expected values are Shared: - multiple blob disks per storage account Dedicated: - single blob disk per storage account Managed: - azure managed data disk (only in managed - availability set). defaults to shared' - type: string - readOnly: - default: false - description: |- - readOnly Defaults to false (read/write). ReadOnly here will force - the ReadOnly setting in VolumeMounts. - type: boolean - required: - - diskName - - diskURI - type: object - azureFile: - description: azureFile represents an Azure File - Service mount on the host and bind mount to - the pod. - properties: - readOnly: - description: |- - readOnly defaults to false (read/write). ReadOnly here will force - the ReadOnly setting in VolumeMounts. - type: boolean - secretName: - description: secretName is the name of secret - that contains Azure Storage Account Name - and Key - type: string - shareName: - description: shareName is the azure share - Name - type: string - required: - - secretName - - shareName - type: object - cephfs: - description: cephFS represents a Ceph FS mount - on the host that shares a pod's lifetime - properties: - monitors: - description: |- - monitors is Required: Monitors is a collection of Ceph monitors - More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it - items: - type: string - type: array - x-kubernetes-list-type: atomic - path: - description: 'path is Optional: Used as the - mounted root, rather than the full Ceph - tree, default is /' - type: string - readOnly: - description: |- - readOnly is Optional: Defaults to false (read/write). ReadOnly here will force - the ReadOnly setting in VolumeMounts. - More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it - type: boolean - secretFile: - description: |- - secretFile is Optional: SecretFile is the path to key ring for User, default is /etc/ceph/user.secret - More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it - type: string - secretRef: - description: |- - secretRef is Optional: SecretRef is reference to the authentication secret for User, default is empty. - More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - type: object - x-kubernetes-map-type: atomic - user: - description: |- - user is optional: User is the rados user name, default is admin - More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it - type: string - required: - - monitors - type: object - cinder: - description: |- - cinder represents a cinder volume attached and mounted on kubelets host machine. - More info: https://examples.k8s.io/mysql-cinder-pd/README.md - properties: - fsType: - description: |- - fsType is the filesystem type to mount. - Must be a filesystem type supported by the host operating system. - Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - More info: https://examples.k8s.io/mysql-cinder-pd/README.md - type: string - readOnly: - description: |- - readOnly defaults to false (read/write). ReadOnly here will force - the ReadOnly setting in VolumeMounts. - More info: https://examples.k8s.io/mysql-cinder-pd/README.md - type: boolean - secretRef: - description: |- - secretRef is optional: points to a secret object containing parameters used to connect - to OpenStack. - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - type: object - x-kubernetes-map-type: atomic - volumeID: - description: |- - volumeID used to identify the volume in cinder. - More info: https://examples.k8s.io/mysql-cinder-pd/README.md - type: string - required: - - volumeID - type: object - configMap: - description: configMap represents a configMap - that should populate this volume - properties: - defaultMode: - description: |- - defaultMode is optional: mode bits used to set permissions on created files by default. - Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. - YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. - Defaults to 0644. - Directories within the path are not affected by this setting. - format: int32 - type: integer - items: - description: |- - items if unspecified, each key-value pair in the Data field of the referenced - ConfigMap will be projected into the volume as a file whose name is the - key and content is the value. If specified, the listed keys will be - projected into the specified paths, and unlisted keys will not be - present. - items: - description: Maps a string key to a path - within a volume. - properties: - key: - description: key is the key to project. - type: string - mode: - description: |- - mode is Optional: mode bits used to set permissions on this file. - Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. - YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. - If not specified, the volume defaultMode will be used. - format: int32 - type: integer - path: - description: |- - path is the relative path of the file to map the key to. - May not be an absolute path. - May not contain the path element '..'. - May not start with the string '..'. - type: string - required: - - key - - path - type: object - type: array - x-kubernetes-list-type: atomic - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: optional specify whether the - ConfigMap or its keys must be defined - type: boolean - type: object - x-kubernetes-map-type: atomic - csi: - description: csi (Container Storage Interface) - represents ephemeral storage that is handled - by certain external CSI drivers (Beta feature). - properties: - driver: - description: |- - driver is the name of the CSI driver that handles this volume. - Consult with your admin for the correct name as registered in the cluster. - type: string - fsType: - description: |- - fsType to mount. Ex. "ext4", "xfs", "ntfs". - If not provided, the empty value is passed to the associated CSI driver - which will determine the default filesystem to apply. - type: string - nodePublishSecretRef: - description: |- - nodePublishSecretRef is a reference to the secret object containing - sensitive information to pass to the CSI driver to complete the CSI - NodePublishVolume and NodeUnpublishVolume calls. - This field is optional, and may be empty if no secret is required. If the - secret object contains more than one secret, all secret references are passed. - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - type: object - x-kubernetes-map-type: atomic - readOnly: - description: |- - readOnly specifies a read-only configuration for the volume. - Defaults to false (read/write). - type: boolean - volumeAttributes: - additionalProperties: - type: string - description: |- - volumeAttributes stores driver-specific properties that are passed to the CSI - driver. Consult your driver's documentation for supported values. - type: object - required: - - driver - type: object - downwardAPI: - description: downwardAPI represents downward API - about the pod that should populate this volume - properties: - defaultMode: - description: |- - Optional: mode bits to use on created files by default. Must be a - Optional: mode bits used to set permissions on created files by default. - Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. - YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. - Defaults to 0644. - Directories within the path are not affected by this setting. - format: int32 - type: integer - items: - description: Items is a list of downward API - volume file - items: - description: DownwardAPIVolumeFile represents - information to create the file containing - the pod field - properties: - fieldRef: - description: 'Required: Selects a field - of the pod: only annotations, labels, - name, namespace and uid are supported.' - properties: - apiVersion: - description: Version of the schema - the FieldPath is written in terms - of, defaults to "v1". - type: string - fieldPath: - description: Path of the field to - select in the specified API version. - type: string - required: - - fieldPath - type: object - x-kubernetes-map-type: atomic - mode: - description: |- - Optional: mode bits used to set permissions on this file, must be an octal value - between 0000 and 0777 or a decimal value between 0 and 511. - YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. - If not specified, the volume defaultMode will be used. - format: int32 - type: integer - path: - description: 'Required: Path is the - relative path name of the file to - be created. Must not be absolute or - contain the ''..'' path. Must be utf-8 - encoded. The first item of the relative - path must not start with ''..''' - type: string - resourceFieldRef: - description: |- - Selects a resource of the container: only resources limits and requests - (limits.cpu, limits.memory, requests.cpu and requests.memory) are currently supported. - properties: - containerName: - description: 'Container name: required - for volumes, optional for env - vars' - type: string - divisor: - anyOf: - - type: integer - - type: string - description: Specifies the output - format of the exposed resources, - defaults to "1" - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - resource: - description: 'Required: resource - to select' - type: string - required: - - resource - type: object - x-kubernetes-map-type: atomic - required: - - path - type: object - type: array - x-kubernetes-list-type: atomic - type: object - emptyDir: - description: |- - emptyDir represents a temporary directory that shares a pod's lifetime. - More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir - properties: - medium: - description: |- - medium represents what type of storage medium should back this directory. - The default is "" which means to use the node's default medium. - Must be an empty string (default) or Memory. - More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir - type: string - sizeLimit: - anyOf: - - type: integer - - type: string - description: |- - sizeLimit is the total amount of local storage required for this EmptyDir volume. - The size limit is also applicable for memory medium. - The maximum usage on memory medium EmptyDir would be the minimum value between - the SizeLimit specified here and the sum of memory limits of all containers in a pod. - The default is nil which means that the limit is undefined. - More info: https://kubernetes. - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - type: object - ephemeral: - description: |- - ephemeral represents a volume that is handled by a cluster storage driver. - The volume's lifecycle is tied to the pod that defines it - it will be created before the pod starts, - and deleted when the pod is removed. - properties: - volumeClaimTemplate: - description: |- - Will be used to create a stand-alone PVC to provision the volume. - The pod in which this EphemeralVolumeSource is embedded will be the - owner of the PVC, i.e. the PVC will be deleted together with the - pod. The name of the PVC will be `-` where - `` is the name from the `PodSpec.Volumes` array - entry. - properties: - metadata: - description: |- - May contain labels and annotations that will be copied into the PVC - when creating it. No other fields are allowed and will be rejected during - validation. - properties: - annotations: - additionalProperties: - type: string - type: object - finalizers: - items: - type: string - type: array - labels: - additionalProperties: - type: string - type: object - name: - type: string - namespace: - type: string - type: object - spec: - description: |- - The specification for the PersistentVolumeClaim. The entire content is - copied unchanged into the PVC that gets created from this - template. The same fields as in a PersistentVolumeClaim - are also valid here. - properties: - accessModes: - description: |- - accessModes contains the desired access modes the volume should have. - More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#access-modes-1 - items: - type: string - type: array - x-kubernetes-list-type: atomic - dataSource: - description: |- - dataSource field can be used to specify either: - * An existing VolumeSnapshot object (snapshot.storage.k8s.io/VolumeSnapshot) - * An existing PVC (PersistentVolumeClaim) - If the provisioner or an external controller can support the specified data source, - it will create a new volume based on the contents of the specified data source. - properties: - apiGroup: - description: |- - APIGroup is the group for the resource being referenced. - If APIGroup is not specified, the specified Kind must be in the core API group. - For any other third-party types, APIGroup is required. - type: string - kind: - description: Kind is the type - of resource being referenced - type: string - name: - description: Name is the name - of resource being referenced - type: string - required: - - kind - - name - type: object - x-kubernetes-map-type: atomic - dataSourceRef: - description: |- - dataSourceRef specifies the object from which to populate the volume with data, if a non-empty - volume is desired. This may be any object from a non-empty API group (non - core object) or a PersistentVolumeClaim object. - When this field is specified, volume binding will only succeed if the type of - the specified object matches some installed volume populator or dynamic - provisioner. - properties: - apiGroup: - description: |- - APIGroup is the group for the resource being referenced. - If APIGroup is not specified, the specified Kind must be in the core API group. - For any other third-party types, APIGroup is required. - type: string - kind: - description: Kind is the type - of resource being referenced - type: string - name: - description: Name is the name - of resource being referenced - type: string - namespace: - description: |- - Namespace is the namespace of resource being referenced - Note that when a namespace is specified, a gateway.networking.k8s.io/ReferenceGrant object is required in the referent namespace to allow that namespace's owner to accept the reference. See the ReferenceGrant documentation for details. - (Alpha) This field requires the CrossNamespaceVolumeDataSource feature gate to be enabled. - type: string - required: - - kind - - name - type: object - resources: - description: |- - resources represents the minimum resources the volume should have. - If RecoverVolumeExpansionFailure feature is enabled users are allowed to specify resource requirements - that are lower than previous value but must still be higher than capacity recorded in the - status field of the claim. - More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#resources - properties: - limits: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: |- - Limits describes the maximum amount of compute resources allowed. - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - type: object - requests: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: |- - Requests describes the minimum amount of compute resources required. - If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, - otherwise to an implementation-defined value. Requests cannot exceed Limits. - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - type: object - type: object - selector: - description: selector is a label query - over volumes to consider for binding. - properties: - matchExpressions: - description: matchExpressions - is a list of label selector - requirements. The requirements - are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the - label key that the selector - applies to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - storageClassName: - description: |- - storageClassName is the name of the StorageClass required by the claim. - More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#class-1 - type: string - volumeAttributesClassName: - description: |- - volumeAttributesClassName may be used to set the VolumeAttributesClass used by this claim. - If specified, the CSI driver will create or update the volume with the attributes defined - in the corresponding VolumeAttributesClass. This has a different purpose than storageClassName, - it can be changed after the claim is created. - type: string - volumeMode: - description: |- - volumeMode defines what type of volume is required by the claim. - Value of Filesystem is implied when not included in claim spec. - type: string - volumeName: - description: volumeName is the binding - reference to the PersistentVolume - backing this claim. - type: string - type: object - required: - - spec - type: object - type: object - fc: - description: fc represents a Fibre Channel resource - that is attached to a kubelet's host machine - and then exposed to the pod. - properties: - fsType: - description: |- - fsType is the filesystem type to mount. - Must be a filesystem type supported by the host operating system. - Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - type: string - lun: - description: 'lun is Optional: FC target lun - number' - format: int32 - type: integer - readOnly: - description: |- - readOnly is Optional: Defaults to false (read/write). ReadOnly here will force - the ReadOnly setting in VolumeMounts. - type: boolean - targetWWNs: - description: 'targetWWNs is Optional: FC target - worldwide names (WWNs)' - items: - type: string - type: array - x-kubernetes-list-type: atomic - wwids: - description: |- - wwids Optional: FC volume world wide identifiers (wwids) - Either wwids or combination of targetWWNs and lun must be set, but not both simultaneously. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - flexVolume: - description: |- - flexVolume represents a generic volume resource that is - provisioned/attached using an exec based plugin. - properties: - driver: - description: driver is the name of the driver - to use for this volume. - type: string - fsType: - description: |- - fsType is the filesystem type to mount. - Must be a filesystem type supported by the host operating system. - Ex. "ext4", "xfs", "ntfs". The default filesystem depends on FlexVolume script. - type: string - options: - additionalProperties: - type: string - description: 'options is Optional: this field - holds extra command options if any.' - type: object - readOnly: - description: |- - readOnly is Optional: defaults to false (read/write). ReadOnly here will force - the ReadOnly setting in VolumeMounts. - type: boolean - secretRef: - description: |- - secretRef is Optional: secretRef is reference to the secret object containing - sensitive information to pass to the plugin scripts. This may be - empty if no secret object is specified. If the secret object - contains more than one secret, all secrets are passed to the plugin - scripts. - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - type: object - x-kubernetes-map-type: atomic - required: - - driver - type: object - flocker: - description: flocker represents a Flocker volume - attached to a kubelet's host machine. This depends - on the Flocker control service being running - properties: - datasetName: - description: |- - datasetName is Name of the dataset stored as metadata -> name on the dataset for Flocker - should be considered as deprecated - type: string - datasetUUID: - description: datasetUUID is the UUID of the - dataset. This is unique identifier of a - Flocker dataset - type: string - type: object - gcePersistentDisk: - description: |- - gcePersistentDisk represents a GCE Disk resource that is attached to a - kubelet's host machine and then exposed to the pod. - More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk - properties: - fsType: - description: |- - fsType is filesystem type of the volume that you want to mount. - Tip: Ensure that the filesystem type is supported by the host operating system. - Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk - type: string - partition: - description: |- - partition is the partition in the volume that you want to mount. - If omitted, the default is to mount by volume name. - Examples: For volume /dev/sda1, you specify the partition as "1". - Similarly, the volume partition for /dev/sda is "0" (or you can leave the property empty). - More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk - format: int32 - type: integer - pdName: - description: |- - pdName is unique name of the PD resource in GCE. Used to identify the disk in GCE. - More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk - type: string - readOnly: - description: |- - readOnly here will force the ReadOnly setting in VolumeMounts. - Defaults to false. - More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk - type: boolean - required: - - pdName - type: object - gitRepo: - description: |- - gitRepo represents a git repository at a particular revision. - DEPRECATED: GitRepo is deprecated. To provision a container with a git repo, mount an - EmptyDir into an InitContainer that clones the repo using git, then mount the EmptyDir - into the Pod's container. - properties: - directory: - description: |- - directory is the target directory name. - Must not contain or start with '..'. If '.' is supplied, the volume directory will be the - git repository. Otherwise, if specified, the volume will contain the git repository in - the subdirectory with the given name. - type: string - repository: - description: repository is the URL - type: string - revision: - description: revision is the commit hash for - the specified revision. - type: string - required: - - repository - type: object - glusterfs: - description: |- - glusterfs represents a Glusterfs mount on the host that shares a pod's lifetime. - More info: https://examples.k8s.io/volumes/glusterfs/README.md - properties: - endpoints: - description: |- - endpoints is the endpoint name that details Glusterfs topology. - More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod - type: string - path: - description: |- - path is the Glusterfs volume path. - More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod - type: string - readOnly: - description: |- - readOnly here will force the Glusterfs volume to be mounted with read-only permissions. - Defaults to false. - More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod - type: boolean - required: - - endpoints - - path - type: object - hostPath: - description: |- - hostPath represents a pre-existing file or directory on the host - machine that is directly exposed to the container. This is generally - used for system agents or other privileged things that are allowed - to see the host machine. Most containers will NOT need this. - More info: https://kubernetes.io/docs/concepts/storage/volumes#hostpath - properties: - path: - description: |- - path of the directory on the host. - If the path is a symlink, it will follow the link to the real path. - More info: https://kubernetes.io/docs/concepts/storage/volumes#hostpath - type: string - type: - description: |- - type for HostPath Volume - Defaults to "" - More info: https://kubernetes.io/docs/concepts/storage/volumes#hostpath - type: string - required: - - path - type: object - image: - description: |- - image represents an OCI object (a container image or artifact) pulled and mounted on the kubelet's host machine. - The volume is resolved at pod startup depending on which PullPolicy value is provided: - - - Always: the kubelet always attempts to pull the reference. Container creation will fail If the pull fails. - - Never: the kubelet never pulls the reference and only uses a local image or artifact. - properties: - pullPolicy: - description: |- - Policy for pulling OCI objects. Possible values are: - Always: the kubelet always attempts to pull the reference. Container creation will fail If the pull fails. - Never: the kubelet never pulls the reference and only uses a local image or artifact. Container creation will fail if the reference isn't present. - IfNotPresent: the kubelet pulls if the reference isn't already present on disk. - type: string - reference: - description: |- - Required: Image or artifact reference to be used. - Behaves in the same way as pod.spec.containers[*].image. - Pull secrets will be assembled in the same way as for the container image by looking up node credentials, SA image pull secrets, and pod spec image pull secrets. - More info: https://kubernetes. - type: string - type: object - iscsi: - description: |- - iscsi represents an ISCSI Disk resource that is attached to a - kubelet's host machine and then exposed to the pod. - More info: https://examples.k8s.io/volumes/iscsi/README.md - properties: - chapAuthDiscovery: - description: chapAuthDiscovery defines whether - support iSCSI Discovery CHAP authentication - type: boolean - chapAuthSession: - description: chapAuthSession defines whether - support iSCSI Session CHAP authentication - type: boolean - fsType: - description: |- - fsType is the filesystem type of the volume that you want to mount. - Tip: Ensure that the filesystem type is supported by the host operating system. - Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - More info: https://kubernetes.io/docs/concepts/storage/volumes#iscsi - type: string - initiatorName: - description: |- - initiatorName is the custom iSCSI Initiator Name. - If initiatorName is specified with iscsiInterface simultaneously, new iSCSI interface - : will be created for the connection. - type: string - iqn: - description: iqn is the target iSCSI Qualified - Name. - type: string - iscsiInterface: - default: default - description: |- - iscsiInterface is the interface Name that uses an iSCSI transport. - Defaults to 'default' (tcp). - type: string - lun: - description: lun represents iSCSI Target Lun - number. - format: int32 - type: integer - portals: - description: |- - portals is the iSCSI Target Portal List. The portal is either an IP or ip_addr:port if the port - is other than default (typically TCP ports 860 and 3260). - items: - type: string - type: array - x-kubernetes-list-type: atomic - readOnly: - description: |- - readOnly here will force the ReadOnly setting in VolumeMounts. - Defaults to false. - type: boolean - secretRef: - description: secretRef is the CHAP Secret - for iSCSI target and initiator authentication - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - type: object - x-kubernetes-map-type: atomic - targetPortal: - description: |- - targetPortal is iSCSI Target Portal. The Portal is either an IP or ip_addr:port if the port - is other than default (typically TCP ports 860 and 3260). - type: string - required: - - iqn - - lun - - targetPortal - type: object - name: - description: |- - name of the volume. - Must be a DNS_LABEL and unique within the pod. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - nfs: - description: |- - nfs represents an NFS mount on the host that shares a pod's lifetime - More info: https://kubernetes.io/docs/concepts/storage/volumes#nfs - properties: - path: - description: |- - path that is exported by the NFS server. - More info: https://kubernetes.io/docs/concepts/storage/volumes#nfs - type: string - readOnly: - description: |- - readOnly here will force the NFS export to be mounted with read-only permissions. - Defaults to false. - More info: https://kubernetes.io/docs/concepts/storage/volumes#nfs - type: boolean - server: - description: |- - server is the hostname or IP address of the NFS server. - More info: https://kubernetes.io/docs/concepts/storage/volumes#nfs - type: string - required: - - path - - server - type: object - persistentVolumeClaim: - description: |- - persistentVolumeClaimVolumeSource represents a reference to a - PersistentVolumeClaim in the same namespace. - More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#persistentvolumeclaims - properties: - claimName: - description: |- - claimName is the name of a PersistentVolumeClaim in the same namespace as the pod using this volume. - More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#persistentvolumeclaims - type: string - readOnly: - description: |- - readOnly Will force the ReadOnly setting in VolumeMounts. - Default false. - type: boolean - required: - - claimName - type: object - photonPersistentDisk: - description: photonPersistentDisk represents a - PhotonController persistent disk attached and - mounted on kubelets host machine - properties: - fsType: - description: |- - fsType is the filesystem type to mount. - Must be a filesystem type supported by the host operating system. - Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - type: string - pdID: - description: pdID is the ID that identifies - Photon Controller persistent disk - type: string - required: - - pdID - type: object - portworxVolume: - description: portworxVolume represents a portworx - volume attached and mounted on kubelets host - machine - properties: - fsType: - description: |- - fSType represents the filesystem type to mount - Must be a filesystem type supported by the host operating system. - Ex. "ext4", "xfs". Implicitly inferred to be "ext4" if unspecified. - type: string - readOnly: - description: |- - readOnly defaults to false (read/write). ReadOnly here will force - the ReadOnly setting in VolumeMounts. - type: boolean - volumeID: - description: volumeID uniquely identifies - a Portworx volume - type: string - required: - - volumeID - type: object - projected: - description: projected items for all in one resources - secrets, configmaps, and downward API - properties: - defaultMode: - description: |- - defaultMode are the mode bits used to set permissions on created files by default. - Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. - YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. - Directories within the path are not affected by this setting. - format: int32 - type: integer - sources: - description: |- - sources is the list of volume projections. Each entry in this list - handles one source. - items: - description: |- - Projection that may be projected along with other supported volume types. - Exactly one of these fields must be set. - properties: - clusterTrustBundle: - description: |- - ClusterTrustBundle allows a pod to access the `.spec.trustBundle` field - of ClusterTrustBundle objects in an auto-updating file. - - Alpha, gated by the ClusterTrustBundleProjection feature gate. - - ClusterTrustBundle objects can either be selected by name, or by the - combination of signer name and a label selector. - properties: - labelSelector: - description: |- - Select all ClusterTrustBundles that match this label selector. Only has - effect if signerName is set. Mutually-exclusive with name. If unset, - interpreted as "match nothing". If set but empty, interpreted as "match - everything". - properties: - matchExpressions: - description: matchExpressions - is a list of label selector - requirements. The requirements - are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the - label key that the selector - applies to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - name: - description: |- - Select a single ClusterTrustBundle by object name. Mutually-exclusive - with signerName and labelSelector. - type: string - optional: - description: |- - If true, don't block pod startup if the referenced ClusterTrustBundle(s) - aren't available. If using name, then the named ClusterTrustBundle is - allowed not to exist. If using signerName, then the combination of - signerName and labelSelector is allowed to match zero - ClusterTrustBundles. - type: boolean - path: - description: Relative path from - the volume root to write the bundle. - type: string - signerName: - description: |- - Select all ClusterTrustBundles that match this signer name. - Mutually-exclusive with name. The contents of all selected - ClusterTrustBundles will be unified and deduplicated. - type: string - required: - - path - type: object - configMap: - description: configMap information about - the configMap data to project - properties: - items: - description: |- - items if unspecified, each key-value pair in the Data field of the referenced - ConfigMap will be projected into the volume as a file whose name is the - key and content is the value. If specified, the listed keys will be - projected into the specified paths, and unlisted keys will not be - present. - items: - description: Maps a string key - to a path within a volume. - properties: - key: - description: key is the key - to project. - type: string - mode: - description: |- - mode is Optional: mode bits used to set permissions on this file. - Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. - YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. - If not specified, the volume defaultMode will be used. - format: int32 - type: integer - path: - description: |- - path is the relative path of the file to map the key to. - May not be an absolute path. - May not contain the path element '..'. - May not start with the string '..'. - type: string - required: - - key - - path - type: object - type: array - x-kubernetes-list-type: atomic - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: optional specify whether - the ConfigMap or its keys must - be defined - type: boolean - type: object - x-kubernetes-map-type: atomic - downwardAPI: - description: downwardAPI information - about the downwardAPI data to project - properties: - items: - description: Items is a list of - DownwardAPIVolume file - items: - description: DownwardAPIVolumeFile - represents information to create - the file containing the pod - field - properties: - fieldRef: - description: 'Required: Selects - a field of the pod: only - annotations, labels, name, - namespace and uid are supported.' - properties: - apiVersion: - description: Version of - the schema the FieldPath - is written in terms - of, defaults to "v1". - type: string - fieldPath: - description: Path of the - field to select in the - specified API version. - type: string - required: - - fieldPath - type: object - x-kubernetes-map-type: atomic - mode: - description: |- - Optional: mode bits used to set permissions on this file, must be an octal value - between 0000 and 0777 or a decimal value between 0 and 511. - YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. - If not specified, the volume defaultMode will be used. - format: int32 - type: integer - path: - description: 'Required: Path - is the relative path name - of the file to be created. - Must not be absolute or - contain the ''..'' path. - Must be utf-8 encoded. The - first item of the relative - path must not start with - ''..''' - type: string - resourceFieldRef: - description: |- - Selects a resource of the container: only resources limits and requests - (limits.cpu, limits.memory, requests.cpu and requests.memory) are currently supported. - properties: - containerName: - description: 'Container - name: required for volumes, - optional for env vars' - type: string - divisor: - anyOf: - - type: integer - - type: string - description: Specifies - the output format of - the exposed resources, - defaults to "1" - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - resource: - description: 'Required: - resource to select' - type: string - required: - - resource - type: object - x-kubernetes-map-type: atomic - required: - - path - type: object - type: array - x-kubernetes-list-type: atomic - type: object - secret: - description: secret information about - the secret data to project - properties: - items: - description: |- - items if unspecified, each key-value pair in the Data field of the referenced - Secret will be projected into the volume as a file whose name is the - key and content is the value. If specified, the listed keys will be - projected into the specified paths, and unlisted keys will not be - present. - items: - description: Maps a string key - to a path within a volume. - properties: - key: - description: key is the key - to project. - type: string - mode: - description: |- - mode is Optional: mode bits used to set permissions on this file. - Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. - YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. - If not specified, the volume defaultMode will be used. - format: int32 - type: integer - path: - description: |- - path is the relative path of the file to map the key to. - May not be an absolute path. - May not contain the path element '..'. - May not start with the string '..'. - type: string - required: - - key - - path - type: object - type: array - x-kubernetes-list-type: atomic - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: optional field specify - whether the Secret or its key - must be defined - type: boolean - type: object - x-kubernetes-map-type: atomic - serviceAccountToken: - description: serviceAccountToken is - information about the serviceAccountToken - data to project - properties: - audience: - description: |- - audience is the intended audience of the token. A recipient of a token - must identify itself with an identifier specified in the audience of the - token, and otherwise should reject the token. The audience defaults to the - identifier of the apiserver. - type: string - expirationSeconds: - description: |- - expirationSeconds is the requested duration of validity of the service - account token. As the token approaches expiration, the kubelet volume - plugin will proactively rotate the service account token. The kubelet will - start trying to rotate the token if the token is older than 80 percent of - its time to live or if the token is older than 24 hours.Defaults to 1 hour - and must be at least 10 minutes. - format: int64 - type: integer - path: - description: |- - path is the path relative to the mount point of the file to project the - token into. - type: string - required: - - path - type: object - type: object - type: array - x-kubernetes-list-type: atomic - type: object - quobyte: - description: quobyte represents a Quobyte mount - on the host that shares a pod's lifetime - properties: - group: - description: |- - group to map volume access to - Default is no group - type: string - readOnly: - description: |- - readOnly here will force the Quobyte volume to be mounted with read-only permissions. - Defaults to false. - type: boolean - registry: - description: |- - registry represents a single or multiple Quobyte Registry services - specified as a string as host:port pair (multiple entries are separated with commas) - which acts as the central registry for volumes - type: string - tenant: - description: |- - tenant owning the given Quobyte volume in the Backend - Used with dynamically provisioned Quobyte volumes, value is set by the plugin - type: string - user: - description: |- - user to map volume access to - Defaults to serivceaccount user - type: string - volume: - description: volume is a string that references - an already created Quobyte volume by name. - type: string - required: - - registry - - volume - type: object - rbd: - description: |- - rbd represents a Rados Block Device mount on the host that shares a pod's lifetime. - More info: https://examples.k8s.io/volumes/rbd/README.md - properties: - fsType: - description: |- - fsType is the filesystem type of the volume that you want to mount. - Tip: Ensure that the filesystem type is supported by the host operating system. - Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - More info: https://kubernetes.io/docs/concepts/storage/volumes#rbd - type: string - image: - description: |- - image is the rados image name. - More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it - type: string - keyring: - default: /etc/ceph/keyring - description: |- - keyring is the path to key ring for RBDUser. - Default is /etc/ceph/keyring. - More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it - type: string - monitors: - description: |- - monitors is a collection of Ceph monitors. - More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it - items: - type: string - type: array - x-kubernetes-list-type: atomic - pool: - default: rbd - description: |- - pool is the rados pool name. - Default is rbd. - More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it - type: string - readOnly: - description: |- - readOnly here will force the ReadOnly setting in VolumeMounts. - Defaults to false. - More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it - type: boolean - secretRef: - description: |- - secretRef is name of the authentication secret for RBDUser. If provided - overrides keyring. - Default is nil. - More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - type: object - x-kubernetes-map-type: atomic - user: - default: admin - description: |- - user is the rados user name. - Default is admin. - More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it - type: string - required: - - image - - monitors - type: object - scaleIO: - description: scaleIO represents a ScaleIO persistent - volume attached and mounted on Kubernetes nodes. - properties: - fsType: - default: xfs - description: |- - fsType is the filesystem type to mount. - Must be a filesystem type supported by the host operating system. - Ex. "ext4", "xfs", "ntfs". - Default is "xfs". - type: string - gateway: - description: gateway is the host address of - the ScaleIO API Gateway. - type: string - protectionDomain: - description: protectionDomain is the name - of the ScaleIO Protection Domain for the - configured storage. - type: string - readOnly: - description: |- - readOnly Defaults to false (read/write). ReadOnly here will force - the ReadOnly setting in VolumeMounts. - type: boolean - secretRef: - description: |- - secretRef references to the secret for ScaleIO user and other - sensitive information. If this is not provided, Login operation will fail. - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - type: object - x-kubernetes-map-type: atomic - sslEnabled: - description: sslEnabled Flag enable/disable - SSL communication with Gateway, default - false - type: boolean - storageMode: - default: ThinProvisioned - description: |- - storageMode indicates whether the storage for a volume should be ThickProvisioned or ThinProvisioned. - Default is ThinProvisioned. - type: string - storagePool: - description: storagePool is the ScaleIO Storage - Pool associated with the protection domain. - type: string - system: - description: system is the name of the storage - system as configured in ScaleIO. - type: string - volumeName: - description: |- - volumeName is the name of a volume already created in the ScaleIO system - that is associated with this volume source. - type: string - required: - - gateway - - secretRef - - system - type: object - secret: - description: |- - secret represents a secret that should populate this volume. - More info: https://kubernetes.io/docs/concepts/storage/volumes#secret - properties: - defaultMode: - description: |- - defaultMode is Optional: mode bits used to set permissions on created files by default. - Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. - YAML accepts both octal and decimal values, JSON requires decimal values - for mode bits. Defaults to 0644. - Directories within the path are not affected by this setting. - format: int32 - type: integer - items: - description: |- - items If unspecified, each key-value pair in the Data field of the referenced - Secret will be projected into the volume as a file whose name is the - key and content is the value. If specified, the listed keys will be - projected into the specified paths, and unlisted keys will not be - present. - items: - description: Maps a string key to a path - within a volume. - properties: - key: - description: key is the key to project. - type: string - mode: - description: |- - mode is Optional: mode bits used to set permissions on this file. - Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. - YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. - If not specified, the volume defaultMode will be used. - format: int32 - type: integer - path: - description: |- - path is the relative path of the file to map the key to. - May not be an absolute path. - May not contain the path element '..'. - May not start with the string '..'. - type: string - required: - - key - - path - type: object - type: array - x-kubernetes-list-type: atomic - optional: - description: optional field specify whether - the Secret or its keys must be defined - type: boolean - secretName: - description: |- - secretName is the name of the secret in the pod's namespace to use. - More info: https://kubernetes.io/docs/concepts/storage/volumes#secret - type: string - type: object - storageos: - description: storageOS represents a StorageOS - volume attached and mounted on Kubernetes nodes. - properties: - fsType: - description: |- - fsType is the filesystem type to mount. - Must be a filesystem type supported by the host operating system. - Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - type: string - readOnly: - description: |- - readOnly defaults to false (read/write). ReadOnly here will force - the ReadOnly setting in VolumeMounts. - type: boolean - secretRef: - description: |- - secretRef specifies the secret to use for obtaining the StorageOS API - credentials. If not specified, default values will be attempted. - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - type: object - x-kubernetes-map-type: atomic - volumeName: - description: |- - volumeName is the human-readable name of the StorageOS volume. Volume - names are only unique within a namespace. - type: string - volumeNamespace: - description: |- - volumeNamespace specifies the scope of the volume within StorageOS. If no - namespace is specified then the Pod's namespace will be used. This allows the - Kubernetes name scoping to be mirrored within StorageOS for tighter integration. - Set VolumeName to any name to override the default behaviour. - Set to "default" if you are not using namespaces within StorageOS. - type: string - type: object - vsphereVolume: - description: vsphereVolume represents a vSphere - volume attached and mounted on kubelets host - machine - properties: - fsType: - description: |- - fsType is filesystem type to mount. - Must be a filesystem type supported by the host operating system. - Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - type: string - storagePolicyID: - description: storagePolicyID is the storage - Policy Based Management (SPBM) profile ID - associated with the StoragePolicyName. - type: string - storagePolicyName: - description: storagePolicyName is the storage - Policy Based Management (SPBM) profile name. - type: string - volumePath: - description: volumePath is the path that identifies - vSphere volume vmdk - type: string - required: - - volumePath - type: object - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - required: - - containers - type: object - type: object - type: object - description: |- - A map of TFReplicaType (type) to ReplicaSpec (value). Specifies the TF cluster configuration. - For example, - { - "PS": ReplicaSpec, - "Worker": ReplicaSpec, - } - type: object - required: - - tfReplicaSpecs - type: object - status: - description: |- - Most recently observed status of the TFJob. - Populated by the system. - Read-only. - properties: - completionTime: - description: |- - Represents time when the job was completed. It is not guaranteed to - be set in happens-before order across separate operations. - It is represented in RFC3339 form and is in UTC. - format: date-time - type: string - conditions: - description: Conditions is an array of current observed job conditions. - items: - description: JobCondition describes the state of the job at a certain - point. - properties: - lastTransitionTime: - description: Last time the condition transitioned from one status - to another. - format: date-time - type: string - lastUpdateTime: - description: The last time this condition was updated. - format: date-time - type: string - message: - description: A human readable message indicating details about - the transition. - type: string - reason: - description: The reason for the condition's last transition. - type: string - status: - description: Status of the condition, one of True, False, Unknown. - type: string - type: - description: Type of job condition. - type: string - required: - - status - - type - type: object - type: array - lastReconcileTime: - description: |- - Represents last time when the job was reconciled. It is not guaranteed to - be set in happens-before order across separate operations. - It is represented in RFC3339 form and is in UTC. - format: date-time - type: string - replicaStatuses: - additionalProperties: - description: ReplicaStatus represents the current observed state - of the replica. - properties: - active: - description: The number of actively running pods. - format: int32 - type: integer - failed: - description: The number of pods which reached phase Failed. - format: int32 - type: integer - labelSelector: - description: 'Deprecated: Use Selector instead' - properties: - matchExpressions: - description: matchExpressions is a list of label selector - requirements. The requirements are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the label key that the selector - applies to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - selector: - description: |- - A Selector is a label query over a set of resources. The result of matchLabels and - matchExpressions are ANDed. An empty Selector matches all objects. A null - Selector matches no objects. - type: string - succeeded: - description: The number of pods which reached phase Succeeded. - format: int32 - type: integer - type: object - description: |- - ReplicaStatuses is map of ReplicaType and ReplicaStatus, - specifies the status of each replica. - type: object - startTime: - description: |- - Represents time when the job was acknowledged by the job controller. - It is not guaranteed to be set in happens-before order across separate operations. - It is represented in RFC3339 form and is in UTC. - format: date-time - type: string - type: object - type: object - served: true - storage: true - subresources: - status: {} diff --git a/manifests/base/crds/kubeflow.org_xgboostjobs.yaml b/manifests/base/crds/kubeflow.org_xgboostjobs.yaml deleted file mode 100644 index accb08a4a2..0000000000 --- a/manifests/base/crds/kubeflow.org_xgboostjobs.yaml +++ /dev/null @@ -1,7889 +0,0 @@ ---- -apiVersion: apiextensions.k8s.io/v1 -kind: CustomResourceDefinition -metadata: - annotations: - controller-gen.kubebuilder.io/version: v0.16.5 - name: xgboostjobs.kubeflow.org -spec: - group: kubeflow.org - names: - kind: XGBoostJob - listKind: XGBoostJobList - plural: xgboostjobs - singular: xgboostjob - scope: Namespaced - versions: - - additionalPrinterColumns: - - jsonPath: .status.conditions[-1:].type - name: State - type: string - - jsonPath: .metadata.creationTimestamp - name: Age - type: date - name: v1 - schema: - openAPIV3Schema: - description: XGBoostJob is the Schema for the xgboostjobs API - properties: - apiVersion: - description: |- - APIVersion defines the versioned schema of this representation of an object. - Servers should convert recognized schemas to the latest internal value, and - may reject unrecognized values. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources - type: string - kind: - description: |- - Kind is a string value representing the REST resource this object represents. - Servers may infer this from the endpoint the client submits requests to. - Cannot be updated. - In CamelCase. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds - type: string - metadata: - type: object - spec: - description: XGBoostJobSpec defines the desired state of XGBoostJob - properties: - runPolicy: - description: |- - INSERT ADDITIONAL SPEC FIELDS - desired state of cluster - Important: Run "make" to regenerate code after modifying this file - properties: - activeDeadlineSeconds: - description: |- - Specifies the duration in seconds relative to the startTime that the job may be active - before the system tries to terminate it; value must be positive integer. - format: int64 - type: integer - backoffLimit: - description: Optional number of retries before marking this job - failed. - format: int32 - type: integer - cleanPodPolicy: - description: |- - CleanPodPolicy defines the policy to kill pods after the job completes. - Default to None. - type: string - managedBy: - description: |- - ManagedBy is used to indicate the controller or entity that manages a job. - The value must be either an empty, 'kubeflow.org/training-operator' or - 'kueue.x-k8s.io/multikueue'. - The training-operator reconciles a job which doesn't have this - field at all or the field value is the reserved string - 'kubeflow.org/training-operator', but delegates reconciling the job - with 'kueue.x-k8s. - type: string - schedulingPolicy: - description: SchedulingPolicy defines the policy related to scheduling, - e.g. gang-scheduling - properties: - minAvailable: - format: int32 - type: integer - minResources: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - type: object - priorityClass: - type: string - queue: - type: string - x-kubernetes-validations: - - message: spec.runPolicy.schedulingPolicy.queue is immutable - rule: self == oldSelf - scheduleTimeoutSeconds: - format: int32 - type: integer - type: object - suspend: - default: false - description: |- - suspend specifies whether the Job controller should create Pods or not. - If a Job is created with suspend set to true, no Pods are created by - the Job controller. If a Job is suspended after creation (i.e. the - flag goes from false to true), the Job controller will delete all - active Pods and PodGroups associated with this Job. - Users must design their workload to gracefully handle this. - type: boolean - ttlSecondsAfterFinished: - description: |- - TTLSecondsAfterFinished is the TTL to clean up jobs. - It may take extra ReconcilePeriod seconds for the cleanup, since - reconcile gets called periodically. - Default to infinite. - format: int32 - type: integer - type: object - xgbReplicaSpecs: - additionalProperties: - description: ReplicaSpec is a description of the replica - properties: - replicas: - description: |- - Replicas is the desired number of replicas of the given template. - If unspecified, defaults to 1. - format: int32 - type: integer - restartPolicy: - description: |- - Restart policy for all replicas within the job. - One of Always, OnFailure, Never and ExitCode. - Default to Never. - type: string - template: - description: |- - Template is the object that describes the pod that - will be created for this replica. RestartPolicy in PodTemplateSpec - will be overide by RestartPolicy in ReplicaSpec - properties: - metadata: - description: |- - Standard object's metadata. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#metadata - properties: - annotations: - additionalProperties: - type: string - type: object - finalizers: - items: - type: string - type: array - labels: - additionalProperties: - type: string - type: object - name: - type: string - namespace: - type: string - type: object - spec: - description: |- - Specification of the desired behavior of the pod. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#spec-and-status - properties: - activeDeadlineSeconds: - description: |- - Optional duration in seconds the pod may be active on the node relative to - StartTime before the system will actively try to mark it failed and kill associated containers. - Value must be a positive integer. - format: int64 - type: integer - affinity: - description: If specified, the pod's scheduling constraints - properties: - nodeAffinity: - description: Describes node affinity scheduling - rules for the pod. - properties: - preferredDuringSchedulingIgnoredDuringExecution: - description: |- - The scheduler will prefer to schedule pods to nodes that satisfy - the affinity expressions specified by this field, but it may choose - a node that violates one or more of the expressions. The node that is - most preferred is the one with the greatest sum of weights, i.e. - for each node that meets all of the scheduling requirements (resource - request, requiredDuringScheduling affinity expressions, etc. - items: - description: |- - An empty preferred scheduling term matches all objects with implicit weight 0 - (i.e. it's a no-op). A null preferred scheduling term matches no objects (i.e. is also a no-op). - properties: - preference: - description: A node selector term, associated - with the corresponding weight. - properties: - matchExpressions: - description: A list of node selector - requirements by node's labels. - items: - description: |- - A node selector requirement is a selector that contains values, a key, and an operator - that relates the key and values. - properties: - key: - description: The label key that - the selector applies to. - type: string - operator: - description: |- - Represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. - type: string - values: - description: |- - An array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. If the operator is Gt or Lt, the values - array must have a single element, which will be interpreted as an integer. - This array is replaced during a strategic merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchFields: - description: A list of node selector - requirements by node's fields. - items: - description: |- - A node selector requirement is a selector that contains values, a key, and an operator - that relates the key and values. - properties: - key: - description: The label key that - the selector applies to. - type: string - operator: - description: |- - Represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. - type: string - values: - description: |- - An array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. If the operator is Gt or Lt, the values - array must have a single element, which will be interpreted as an integer. - This array is replaced during a strategic merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - type: object - x-kubernetes-map-type: atomic - weight: - description: Weight associated with matching - the corresponding nodeSelectorTerm, - in the range 1-100. - format: int32 - type: integer - required: - - preference - - weight - type: object - type: array - x-kubernetes-list-type: atomic - requiredDuringSchedulingIgnoredDuringExecution: - description: |- - If the affinity requirements specified by this field are not met at - scheduling time, the pod will not be scheduled onto the node. - If the affinity requirements specified by this field cease to be met - at some point during pod execution (e.g. due to an update), the system - may or may not try to eventually evict the pod from its node. - properties: - nodeSelectorTerms: - description: Required. A list of node selector - terms. The terms are ORed. - items: - description: |- - A null or empty node selector term matches no objects. The requirements of - them are ANDed. - The TopologySelectorTerm type implements a subset of the NodeSelectorTerm. - properties: - matchExpressions: - description: A list of node selector - requirements by node's labels. - items: - description: |- - A node selector requirement is a selector that contains values, a key, and an operator - that relates the key and values. - properties: - key: - description: The label key that - the selector applies to. - type: string - operator: - description: |- - Represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. - type: string - values: - description: |- - An array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. If the operator is Gt or Lt, the values - array must have a single element, which will be interpreted as an integer. - This array is replaced during a strategic merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchFields: - description: A list of node selector - requirements by node's fields. - items: - description: |- - A node selector requirement is a selector that contains values, a key, and an operator - that relates the key and values. - properties: - key: - description: The label key that - the selector applies to. - type: string - operator: - description: |- - Represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. - type: string - values: - description: |- - An array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. If the operator is Gt or Lt, the values - array must have a single element, which will be interpreted as an integer. - This array is replaced during a strategic merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - type: object - x-kubernetes-map-type: atomic - type: array - x-kubernetes-list-type: atomic - required: - - nodeSelectorTerms - type: object - x-kubernetes-map-type: atomic - type: object - podAffinity: - description: Describes pod affinity scheduling rules - (e.g. co-locate this pod in the same node, zone, - etc. as some other pod(s)). - properties: - preferredDuringSchedulingIgnoredDuringExecution: - description: |- - The scheduler will prefer to schedule pods to nodes that satisfy - the affinity expressions specified by this field, but it may choose - a node that violates one or more of the expressions. The node that is - most preferred is the one with the greatest sum of weights, i.e. - for each node that meets all of the scheduling requirements (resource - request, requiredDuringScheduling affinity expressions, etc. - items: - description: The weights of all of the matched - WeightedPodAffinityTerm fields are added - per-node to find the most preferred node(s) - properties: - podAffinityTerm: - description: Required. A pod affinity - term, associated with the corresponding - weight. - properties: - labelSelector: - description: |- - A label query over a set of resources, in this case pods. - If it's null, this PodAffinityTerm matches with no Pods. - properties: - matchExpressions: - description: matchExpressions - is a list of label selector - requirements. The requirements - are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the - label key that the selector - applies to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - matchLabelKeys: - description: |- - MatchLabelKeys is a set of pod label keys to select which pods will - be taken into consideration. The keys are used to lookup values from the - incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` - to select the group of existing pods which pods will be taken into consideration - for the incoming pod's pod (anti) affinity. - items: - type: string - type: array - x-kubernetes-list-type: atomic - mismatchLabelKeys: - description: |- - MismatchLabelKeys is a set of pod label keys to select which pods will - be taken into consideration. The keys are used to lookup values from the - incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` - to select the group of existing pods which pods will be taken into consideration - for the incoming pod's pod (anti) affinity. - items: - type: string - type: array - x-kubernetes-list-type: atomic - namespaceSelector: - description: |- - A label query over the set of namespaces that the term applies to. - The term is applied to the union of the namespaces selected by this field - and the ones listed in the namespaces field. - null selector and null or empty namespaces list means "this pod's namespace". - An empty selector ({}) matches all namespaces. - properties: - matchExpressions: - description: matchExpressions - is a list of label selector - requirements. The requirements - are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the - label key that the selector - applies to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - namespaces: - description: |- - namespaces specifies a static list of namespace names that the term applies to. - The term is applied to the union of the namespaces listed in this field - and the ones selected by namespaceSelector. - null or empty namespaces list and null namespaceSelector means "this pod's namespace". - items: - type: string - type: array - x-kubernetes-list-type: atomic - topologyKey: - description: |- - This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching - the labelSelector in the specified namespaces, where co-located is defined as running on a node - whose value of the label with key topologyKey matches that of any node on which any of the - selected pods is running. - Empty topologyKey is not allowed. - type: string - required: - - topologyKey - type: object - weight: - description: |- - weight associated with matching the corresponding podAffinityTerm, - in the range 1-100. - format: int32 - type: integer - required: - - podAffinityTerm - - weight - type: object - type: array - x-kubernetes-list-type: atomic - requiredDuringSchedulingIgnoredDuringExecution: - description: |- - If the affinity requirements specified by this field are not met at - scheduling time, the pod will not be scheduled onto the node. - If the affinity requirements specified by this field cease to be met - at some point during pod execution (e.g. due to a pod label update), the - system may or may not try to eventually evict the pod from its node. - items: - description: |- - Defines a set of pods (namely those matching the labelSelector - relative to the given namespace(s)) that this pod should be - co-located (affinity) or not co-located (anti-affinity) with, - where co-located is defined as running on a node whose value of - the label with key matches that of any node on which - a pod of the set of pods is running - properties: - labelSelector: - description: |- - A label query over a set of resources, in this case pods. - If it's null, this PodAffinityTerm matches with no Pods. - properties: - matchExpressions: - description: matchExpressions is a - list of label selector requirements. - The requirements are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the label - key that the selector applies - to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - matchLabelKeys: - description: |- - MatchLabelKeys is a set of pod label keys to select which pods will - be taken into consideration. The keys are used to lookup values from the - incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` - to select the group of existing pods which pods will be taken into consideration - for the incoming pod's pod (anti) affinity. - items: - type: string - type: array - x-kubernetes-list-type: atomic - mismatchLabelKeys: - description: |- - MismatchLabelKeys is a set of pod label keys to select which pods will - be taken into consideration. The keys are used to lookup values from the - incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` - to select the group of existing pods which pods will be taken into consideration - for the incoming pod's pod (anti) affinity. - items: - type: string - type: array - x-kubernetes-list-type: atomic - namespaceSelector: - description: |- - A label query over the set of namespaces that the term applies to. - The term is applied to the union of the namespaces selected by this field - and the ones listed in the namespaces field. - null selector and null or empty namespaces list means "this pod's namespace". - An empty selector ({}) matches all namespaces. - properties: - matchExpressions: - description: matchExpressions is a - list of label selector requirements. - The requirements are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the label - key that the selector applies - to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - namespaces: - description: |- - namespaces specifies a static list of namespace names that the term applies to. - The term is applied to the union of the namespaces listed in this field - and the ones selected by namespaceSelector. - null or empty namespaces list and null namespaceSelector means "this pod's namespace". - items: - type: string - type: array - x-kubernetes-list-type: atomic - topologyKey: - description: |- - This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching - the labelSelector in the specified namespaces, where co-located is defined as running on a node - whose value of the label with key topologyKey matches that of any node on which any of the - selected pods is running. - Empty topologyKey is not allowed. - type: string - required: - - topologyKey - type: object - type: array - x-kubernetes-list-type: atomic - type: object - podAntiAffinity: - description: Describes pod anti-affinity scheduling - rules (e.g. avoid putting this pod in the same - node, zone, etc. as some other pod(s)). - properties: - preferredDuringSchedulingIgnoredDuringExecution: - description: |- - The scheduler will prefer to schedule pods to nodes that satisfy - the anti-affinity expressions specified by this field, but it may choose - a node that violates one or more of the expressions. The node that is - most preferred is the one with the greatest sum of weights, i.e. - items: - description: The weights of all of the matched - WeightedPodAffinityTerm fields are added - per-node to find the most preferred node(s) - properties: - podAffinityTerm: - description: Required. A pod affinity - term, associated with the corresponding - weight. - properties: - labelSelector: - description: |- - A label query over a set of resources, in this case pods. - If it's null, this PodAffinityTerm matches with no Pods. - properties: - matchExpressions: - description: matchExpressions - is a list of label selector - requirements. The requirements - are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the - label key that the selector - applies to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - matchLabelKeys: - description: |- - MatchLabelKeys is a set of pod label keys to select which pods will - be taken into consideration. The keys are used to lookup values from the - incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` - to select the group of existing pods which pods will be taken into consideration - for the incoming pod's pod (anti) affinity. - items: - type: string - type: array - x-kubernetes-list-type: atomic - mismatchLabelKeys: - description: |- - MismatchLabelKeys is a set of pod label keys to select which pods will - be taken into consideration. The keys are used to lookup values from the - incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` - to select the group of existing pods which pods will be taken into consideration - for the incoming pod's pod (anti) affinity. - items: - type: string - type: array - x-kubernetes-list-type: atomic - namespaceSelector: - description: |- - A label query over the set of namespaces that the term applies to. - The term is applied to the union of the namespaces selected by this field - and the ones listed in the namespaces field. - null selector and null or empty namespaces list means "this pod's namespace". - An empty selector ({}) matches all namespaces. - properties: - matchExpressions: - description: matchExpressions - is a list of label selector - requirements. The requirements - are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the - label key that the selector - applies to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - namespaces: - description: |- - namespaces specifies a static list of namespace names that the term applies to. - The term is applied to the union of the namespaces listed in this field - and the ones selected by namespaceSelector. - null or empty namespaces list and null namespaceSelector means "this pod's namespace". - items: - type: string - type: array - x-kubernetes-list-type: atomic - topologyKey: - description: |- - This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching - the labelSelector in the specified namespaces, where co-located is defined as running on a node - whose value of the label with key topologyKey matches that of any node on which any of the - selected pods is running. - Empty topologyKey is not allowed. - type: string - required: - - topologyKey - type: object - weight: - description: |- - weight associated with matching the corresponding podAffinityTerm, - in the range 1-100. - format: int32 - type: integer - required: - - podAffinityTerm - - weight - type: object - type: array - x-kubernetes-list-type: atomic - requiredDuringSchedulingIgnoredDuringExecution: - description: |- - If the anti-affinity requirements specified by this field are not met at - scheduling time, the pod will not be scheduled onto the node. - If the anti-affinity requirements specified by this field cease to be met - at some point during pod execution (e.g. due to a pod label update), the - system may or may not try to eventually evict the pod from its node. - items: - description: |- - Defines a set of pods (namely those matching the labelSelector - relative to the given namespace(s)) that this pod should be - co-located (affinity) or not co-located (anti-affinity) with, - where co-located is defined as running on a node whose value of - the label with key matches that of any node on which - a pod of the set of pods is running - properties: - labelSelector: - description: |- - A label query over a set of resources, in this case pods. - If it's null, this PodAffinityTerm matches with no Pods. - properties: - matchExpressions: - description: matchExpressions is a - list of label selector requirements. - The requirements are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the label - key that the selector applies - to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - matchLabelKeys: - description: |- - MatchLabelKeys is a set of pod label keys to select which pods will - be taken into consideration. The keys are used to lookup values from the - incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` - to select the group of existing pods which pods will be taken into consideration - for the incoming pod's pod (anti) affinity. - items: - type: string - type: array - x-kubernetes-list-type: atomic - mismatchLabelKeys: - description: |- - MismatchLabelKeys is a set of pod label keys to select which pods will - be taken into consideration. The keys are used to lookup values from the - incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` - to select the group of existing pods which pods will be taken into consideration - for the incoming pod's pod (anti) affinity. - items: - type: string - type: array - x-kubernetes-list-type: atomic - namespaceSelector: - description: |- - A label query over the set of namespaces that the term applies to. - The term is applied to the union of the namespaces selected by this field - and the ones listed in the namespaces field. - null selector and null or empty namespaces list means "this pod's namespace". - An empty selector ({}) matches all namespaces. - properties: - matchExpressions: - description: matchExpressions is a - list of label selector requirements. - The requirements are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the label - key that the selector applies - to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - namespaces: - description: |- - namespaces specifies a static list of namespace names that the term applies to. - The term is applied to the union of the namespaces listed in this field - and the ones selected by namespaceSelector. - null or empty namespaces list and null namespaceSelector means "this pod's namespace". - items: - type: string - type: array - x-kubernetes-list-type: atomic - topologyKey: - description: |- - This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching - the labelSelector in the specified namespaces, where co-located is defined as running on a node - whose value of the label with key topologyKey matches that of any node on which any of the - selected pods is running. - Empty topologyKey is not allowed. - type: string - required: - - topologyKey - type: object - type: array - x-kubernetes-list-type: atomic - type: object - type: object - automountServiceAccountToken: - description: AutomountServiceAccountToken indicates - whether a service account token should be automatically - mounted. - type: boolean - containers: - description: |- - List of containers belonging to the pod. - Containers cannot currently be added or removed. - There must be at least one container in a Pod. - Cannot be updated. - items: - description: A single application container that you - want to run within a pod. - properties: - args: - description: |- - Arguments to the entrypoint. - The container image's CMD is used if this is not provided. - Variable references $(VAR_NAME) are expanded using the container's environment. If a variable - cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced - to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. - items: - type: string - type: array - x-kubernetes-list-type: atomic - command: - description: |- - Entrypoint array. Not executed within a shell. - The container image's ENTRYPOINT is used if this is not provided. - Variable references $(VAR_NAME) are expanded using the container's environment. If a variable - cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced - to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. - items: - type: string - type: array - x-kubernetes-list-type: atomic - env: - description: |- - List of environment variables to set in the container. - Cannot be updated. - items: - description: EnvVar represents an environment - variable present in a Container. - properties: - name: - description: Name of the environment variable. - Must be a C_IDENTIFIER. - type: string - value: - description: |- - Variable references $(VAR_NAME) are expanded - using the previously defined environment variables in the container and - any service environment variables. If a variable cannot be resolved, - the reference in the input string will be unchanged. Double $$ are reduced - to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. - "$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)". - type: string - valueFrom: - description: Source for the environment - variable's value. Cannot be used if value - is not empty. - properties: - configMapKeyRef: - description: Selects a key of a ConfigMap. - properties: - key: - description: The key to select. - type: string - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the - ConfigMap or its key must be defined - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - fieldRef: - description: |- - Selects a field of the pod: supports metadata.name, metadata.namespace, `metadata.labels['']`, `metadata.annotations['']`, - spec.nodeName, spec.serviceAccountName, status.hostIP, status.podIP, status.podIPs. - properties: - apiVersion: - description: Version of the schema - the FieldPath is written in terms - of, defaults to "v1". - type: string - fieldPath: - description: Path of the field to - select in the specified API version. - type: string - required: - - fieldPath - type: object - x-kubernetes-map-type: atomic - resourceFieldRef: - description: |- - Selects a resource of the container: only resources limits and requests - (limits.cpu, limits.memory, limits.ephemeral-storage, requests.cpu, requests.memory and requests.ephemeral-storage) are currently supported. - properties: - containerName: - description: 'Container name: required - for volumes, optional for env - vars' - type: string - divisor: - anyOf: - - type: integer - - type: string - description: Specifies the output - format of the exposed resources, - defaults to "1" - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - resource: - description: 'Required: resource - to select' - type: string - required: - - resource - type: object - x-kubernetes-map-type: atomic - secretKeyRef: - description: Selects a key of a secret - in the pod's namespace - properties: - key: - description: The key of the secret - to select from. Must be a valid - secret key. - type: string - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the - Secret or its key must be defined - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - type: object - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - envFrom: - description: |- - List of sources to populate environment variables in the container. - The keys defined within a source must be a C_IDENTIFIER. All invalid keys - will be reported as an event when the container is starting. When a key exists in multiple - sources, the value associated with the last source will take precedence. - Values defined by an Env with a duplicate key will take precedence. - Cannot be updated. - items: - description: EnvFromSource represents the source - of a set of ConfigMaps - properties: - configMapRef: - description: The ConfigMap to select from - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the ConfigMap - must be defined - type: boolean - type: object - x-kubernetes-map-type: atomic - prefix: - description: An optional identifier to prepend - to each key in the ConfigMap. Must be - a C_IDENTIFIER. - type: string - secretRef: - description: The Secret to select from - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the Secret - must be defined - type: boolean - type: object - x-kubernetes-map-type: atomic - type: object - type: array - x-kubernetes-list-type: atomic - image: - description: |- - Container image name. - More info: https://kubernetes.io/docs/concepts/containers/images - This field is optional to allow higher level config management to default or override - container images in workload controllers like Deployments and StatefulSets. - type: string - imagePullPolicy: - description: |- - Image pull policy. - One of Always, Never, IfNotPresent. - Defaults to Always if :latest tag is specified, or IfNotPresent otherwise. - Cannot be updated. - More info: https://kubernetes.io/docs/concepts/containers/images#updating-images - type: string - lifecycle: - description: |- - Actions that the management system should take in response to container lifecycle events. - Cannot be updated. - properties: - postStart: - description: |- - PostStart is called immediately after a container is created. If the handler fails, - the container is terminated and restarted according to its restart policy. - Other management of the container blocks until the hook completes. - More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks - properties: - exec: - description: Exec specifies the action - to take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - httpGet: - description: HTTPGet specifies the http - request to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set - in the request. HTTP allows repeated - headers. - items: - description: HTTPHeader describes - a custom header to be used in - HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field - value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the - HTTP server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - sleep: - description: Sleep represents the duration - that the container should sleep before - being terminated. - properties: - seconds: - description: Seconds is the number - of seconds to sleep. - format: int64 - type: integer - required: - - seconds - type: object - tcpSocket: - description: |- - Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept - for the backward compatibility. There are no validation of this field and - lifecycle hooks will fail in runtime when tcp handler is specified. - properties: - host: - description: 'Optional: Host name - to connect to, defaults to the pod - IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - type: object - preStop: - description: |- - PreStop is called immediately before a container is terminated due to an - API request or management event such as liveness/startup probe failure, - preemption, resource contention, etc. The handler is not called if the - container crashes or exits. The Pod's termination grace period countdown begins before the - PreStop hook is executed. - properties: - exec: - description: Exec specifies the action - to take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - httpGet: - description: HTTPGet specifies the http - request to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set - in the request. HTTP allows repeated - headers. - items: - description: HTTPHeader describes - a custom header to be used in - HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field - value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the - HTTP server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - sleep: - description: Sleep represents the duration - that the container should sleep before - being terminated. - properties: - seconds: - description: Seconds is the number - of seconds to sleep. - format: int64 - type: integer - required: - - seconds - type: object - tcpSocket: - description: |- - Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept - for the backward compatibility. There are no validation of this field and - lifecycle hooks will fail in runtime when tcp handler is specified. - properties: - host: - description: 'Optional: Host name - to connect to, defaults to the pod - IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - type: object - type: object - livenessProbe: - description: |- - Periodic probe of container liveness. - Container will be restarted if the probe fails. - Cannot be updated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - properties: - exec: - description: Exec specifies the action to - take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - failureThreshold: - description: |- - Minimum consecutive failures for the probe to be considered failed after having succeeded. - Defaults to 3. Minimum value is 1. - format: int32 - type: integer - grpc: - description: GRPC specifies an action involving - a GRPC port. - properties: - port: - description: Port number of the gRPC service. - Number must be in the range 1 to 65535. - format: int32 - type: integer - service: - default: "" - description: |- - Service is the name of the service to place in the gRPC HealthCheckRequest - (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - - If this is not specified, the default behavior is defined by gRPC. - type: string - required: - - port - type: object - httpGet: - description: HTTPGet specifies the http request - to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set in - the request. HTTP allows repeated headers. - items: - description: HTTPHeader describes a - custom header to be used in HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the HTTP - server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - initialDelaySeconds: - description: |- - Number of seconds after the container has started before liveness probes are initiated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - periodSeconds: - description: |- - How often (in seconds) to perform the probe. - Default to 10 seconds. Minimum value is 1. - format: int32 - type: integer - successThreshold: - description: |- - Minimum consecutive successes for the probe to be considered successful after having failed. - Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. - format: int32 - type: integer - tcpSocket: - description: TCPSocket specifies an action - involving a TCP port. - properties: - host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - description: |- - Optional duration in seconds the pod needs to terminate gracefully upon probe failure. - The grace period is the duration in seconds after the processes running in the pod are sent - a termination signal and the time when the processes are forcibly halted with a kill signal. - Set this value longer than the expected cleanup time for your process. - format: int64 - type: integer - timeoutSeconds: - description: |- - Number of seconds after which the probe times out. - Defaults to 1 second. Minimum value is 1. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - type: object - name: - description: |- - Name of the container specified as a DNS_LABEL. - Each container in a pod must have a unique name (DNS_LABEL). - Cannot be updated. - type: string - ports: - description: |- - List of ports to expose from the container. Not specifying a port here - DOES NOT prevent that port from being exposed. Any port which is - listening on the default "0.0.0.0" address inside a container will be - accessible from the network. - Modifying this array with strategic merge patch may corrupt the data. - For more information See https://github.com/kubernetes/kubernetes/issues/108255. - items: - description: ContainerPort represents a network - port in a single container. - properties: - containerPort: - description: |- - Number of port to expose on the pod's IP address. - This must be a valid port number, 0 < x < 65536. - format: int32 - type: integer - hostIP: - description: What host IP to bind the external - port to. - type: string - hostPort: - description: |- - Number of port to expose on the host. - If specified, this must be a valid port number, 0 < x < 65536. - If HostNetwork is specified, this must match ContainerPort. - Most containers do not need this. - format: int32 - type: integer - name: - description: |- - If specified, this must be an IANA_SVC_NAME and unique within the pod. Each - named port in a pod must have a unique name. Name for the port that can be - referred to by services. - type: string - protocol: - default: TCP - description: |- - Protocol for port. Must be UDP, TCP, or SCTP. - Defaults to "TCP". - type: string - required: - - containerPort - type: object - type: array - x-kubernetes-list-map-keys: - - containerPort - - protocol - x-kubernetes-list-type: map - readinessProbe: - description: |- - Periodic probe of container service readiness. - Container will be removed from service endpoints if the probe fails. - Cannot be updated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - properties: - exec: - description: Exec specifies the action to - take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - failureThreshold: - description: |- - Minimum consecutive failures for the probe to be considered failed after having succeeded. - Defaults to 3. Minimum value is 1. - format: int32 - type: integer - grpc: - description: GRPC specifies an action involving - a GRPC port. - properties: - port: - description: Port number of the gRPC service. - Number must be in the range 1 to 65535. - format: int32 - type: integer - service: - default: "" - description: |- - Service is the name of the service to place in the gRPC HealthCheckRequest - (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - - If this is not specified, the default behavior is defined by gRPC. - type: string - required: - - port - type: object - httpGet: - description: HTTPGet specifies the http request - to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set in - the request. HTTP allows repeated headers. - items: - description: HTTPHeader describes a - custom header to be used in HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the HTTP - server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - initialDelaySeconds: - description: |- - Number of seconds after the container has started before liveness probes are initiated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - periodSeconds: - description: |- - How often (in seconds) to perform the probe. - Default to 10 seconds. Minimum value is 1. - format: int32 - type: integer - successThreshold: - description: |- - Minimum consecutive successes for the probe to be considered successful after having failed. - Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. - format: int32 - type: integer - tcpSocket: - description: TCPSocket specifies an action - involving a TCP port. - properties: - host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - description: |- - Optional duration in seconds the pod needs to terminate gracefully upon probe failure. - The grace period is the duration in seconds after the processes running in the pod are sent - a termination signal and the time when the processes are forcibly halted with a kill signal. - Set this value longer than the expected cleanup time for your process. - format: int64 - type: integer - timeoutSeconds: - description: |- - Number of seconds after which the probe times out. - Defaults to 1 second. Minimum value is 1. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - type: object - resizePolicy: - description: Resources resize policy for the container. - items: - description: ContainerResizePolicy represents - resource resize policy for the container. - properties: - resourceName: - description: |- - Name of the resource to which this resource resize policy applies. - Supported values: cpu, memory. - type: string - restartPolicy: - description: |- - Restart policy to apply when specified resource is resized. - If not specified, it defaults to NotRequired. - type: string - required: - - resourceName - - restartPolicy - type: object - type: array - x-kubernetes-list-type: atomic - resources: - description: |- - Compute Resources required by this container. - Cannot be updated. - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - properties: - claims: - description: |- - Claims lists the names of resources, defined in spec.resourceClaims, - that are used by this container. - - This is an alpha field and requires enabling the - DynamicResourceAllocation feature gate. - - This field is immutable. It can only be set for containers. - items: - description: ResourceClaim references one - entry in PodSpec.ResourceClaims. - properties: - name: - description: |- - Name must match the name of one entry in pod.spec.resourceClaims of - the Pod where this field is used. It makes that resource available - inside a container. - type: string - request: - description: |- - Request is the name chosen for a request in the referenced claim. - If empty, everything from the claim is made available, otherwise - only the result of this request. - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - limits: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: |- - Limits describes the maximum amount of compute resources allowed. - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - type: object - requests: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: |- - Requests describes the minimum amount of compute resources required. - If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, - otherwise to an implementation-defined value. Requests cannot exceed Limits. - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - type: object - type: object - restartPolicy: - description: |- - RestartPolicy defines the restart behavior of individual containers in a pod. - This field may only be set for init containers, and the only allowed value is "Always". - For non-init containers or when this field is not specified, - the restart behavior is defined by the Pod's restart policy and the container type. - type: string - securityContext: - description: |- - SecurityContext defines the security options the container should be run with. - If set, the fields of SecurityContext override the equivalent fields of PodSecurityContext. - More info: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/ - properties: - allowPrivilegeEscalation: - description: |- - AllowPrivilegeEscalation controls whether a process can gain more - privileges than its parent process. This bool directly controls if - the no_new_privs flag will be set on the container process. - AllowPrivilegeEscalation is true always when the container is: - 1) run as Privileged - 2) has CAP_SYS_ADMIN - Note that this field cannot be set when spec.os.name is windows. - type: boolean - appArmorProfile: - description: |- - appArmorProfile is the AppArmor options to use by this container. If set, this profile - overrides the pod's appArmorProfile. - Note that this field cannot be set when spec.os.name is windows. - properties: - localhostProfile: - description: |- - localhostProfile indicates a profile loaded on the node that should be used. - The profile must be preconfigured on the node to work. - Must match the loaded name of the profile. - Must be set if and only if type is "Localhost". - type: string - type: - description: |- - type indicates which kind of AppArmor profile will be applied. - Valid options are: - Localhost - a profile pre-loaded on the node. - RuntimeDefault - the container runtime's default profile. - Unconfined - no AppArmor enforcement. - type: string - required: - - type - type: object - capabilities: - description: |- - The capabilities to add/drop when running containers. - Defaults to the default set of capabilities granted by the container runtime. - Note that this field cannot be set when spec.os.name is windows. - properties: - add: - description: Added capabilities - items: - description: Capability represent POSIX - capabilities type - type: string - type: array - x-kubernetes-list-type: atomic - drop: - description: Removed capabilities - items: - description: Capability represent POSIX - capabilities type - type: string - type: array - x-kubernetes-list-type: atomic - type: object - privileged: - description: |- - Run container in privileged mode. - Processes in privileged containers are essentially equivalent to root on the host. - Defaults to false. - Note that this field cannot be set when spec.os.name is windows. - type: boolean - procMount: - description: |- - procMount denotes the type of proc mount to use for the containers. - The default value is Default which uses the container runtime defaults for - readonly paths and masked paths. - This requires the ProcMountType feature flag to be enabled. - Note that this field cannot be set when spec.os.name is windows. - type: string - readOnlyRootFilesystem: - description: |- - Whether this container has a read-only root filesystem. - Default is false. - Note that this field cannot be set when spec.os.name is windows. - type: boolean - runAsGroup: - description: |- - The GID to run the entrypoint of the container process. - Uses runtime default if unset. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is windows. - format: int64 - type: integer - runAsNonRoot: - description: |- - Indicates that the container must run as a non-root user. - If true, the Kubelet will validate the image at runtime to ensure that it - does not run as UID 0 (root) and fail to start the container if it does. - If unset or false, no such validation will be performed. - May also be set in PodSecurityContext. - type: boolean - runAsUser: - description: |- - The UID to run the entrypoint of the container process. - Defaults to user specified in image metadata if unspecified. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is windows. - format: int64 - type: integer - seLinuxOptions: - description: |- - The SELinux context to be applied to the container. - If unspecified, the container runtime will allocate a random SELinux context for each - container. May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is windows. - properties: - level: - description: Level is SELinux level label - that applies to the container. - type: string - role: - description: Role is a SELinux role label - that applies to the container. - type: string - type: - description: Type is a SELinux type label - that applies to the container. - type: string - user: - description: User is a SELinux user label - that applies to the container. - type: string - type: object - seccompProfile: - description: |- - The seccomp options to use by this container. If seccomp options are - provided at both the pod & container level, the container options - override the pod options. - Note that this field cannot be set when spec.os.name is windows. - properties: - localhostProfile: - description: |- - localhostProfile indicates a profile defined in a file on the node should be used. - The profile must be preconfigured on the node to work. - Must be a descending path, relative to the kubelet's configured seccomp profile location. - Must be set if type is "Localhost". Must NOT be set for any other type. - type: string - type: - description: |- - type indicates which kind of seccomp profile will be applied. - Valid options are: - - Localhost - a profile defined in a file on the node should be used. - RuntimeDefault - the container runtime default profile should be used. - Unconfined - no profile should be applied. - type: string - required: - - type - type: object - windowsOptions: - description: |- - The Windows specific settings applied to all containers. - If unspecified, the options from the PodSecurityContext will be used. - If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is linux. - properties: - gmsaCredentialSpec: - description: |- - GMSACredentialSpec is where the GMSA admission webhook - (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the - GMSA credential spec named by the GMSACredentialSpecName field. - type: string - gmsaCredentialSpecName: - description: GMSACredentialSpecName is - the name of the GMSA credential spec - to use. - type: string - hostProcess: - description: |- - HostProcess determines if a container should be run as a 'Host Process' container. - All of a Pod's containers must have the same effective HostProcess value - (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers). - In addition, if HostProcess is true then HostNetwork must also be set to true. - type: boolean - runAsUserName: - description: |- - The UserName in Windows to run the entrypoint of the container process. - Defaults to the user specified in image metadata if unspecified. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - type: string - type: object - type: object - startupProbe: - description: |- - StartupProbe indicates that the Pod has successfully initialized. - If specified, no other probes are executed until this completes successfully. - If this probe fails, the Pod will be restarted, just as if the livenessProbe failed. - properties: - exec: - description: Exec specifies the action to - take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - failureThreshold: - description: |- - Minimum consecutive failures for the probe to be considered failed after having succeeded. - Defaults to 3. Minimum value is 1. - format: int32 - type: integer - grpc: - description: GRPC specifies an action involving - a GRPC port. - properties: - port: - description: Port number of the gRPC service. - Number must be in the range 1 to 65535. - format: int32 - type: integer - service: - default: "" - description: |- - Service is the name of the service to place in the gRPC HealthCheckRequest - (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - - If this is not specified, the default behavior is defined by gRPC. - type: string - required: - - port - type: object - httpGet: - description: HTTPGet specifies the http request - to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set in - the request. HTTP allows repeated headers. - items: - description: HTTPHeader describes a - custom header to be used in HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the HTTP - server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - initialDelaySeconds: - description: |- - Number of seconds after the container has started before liveness probes are initiated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - periodSeconds: - description: |- - How often (in seconds) to perform the probe. - Default to 10 seconds. Minimum value is 1. - format: int32 - type: integer - successThreshold: - description: |- - Minimum consecutive successes for the probe to be considered successful after having failed. - Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. - format: int32 - type: integer - tcpSocket: - description: TCPSocket specifies an action - involving a TCP port. - properties: - host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - description: |- - Optional duration in seconds the pod needs to terminate gracefully upon probe failure. - The grace period is the duration in seconds after the processes running in the pod are sent - a termination signal and the time when the processes are forcibly halted with a kill signal. - Set this value longer than the expected cleanup time for your process. - format: int64 - type: integer - timeoutSeconds: - description: |- - Number of seconds after which the probe times out. - Defaults to 1 second. Minimum value is 1. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - type: object - stdin: - description: |- - Whether this container should allocate a buffer for stdin in the container runtime. If this - is not set, reads from stdin in the container will always result in EOF. - Default is false. - type: boolean - stdinOnce: - description: |- - Whether the container runtime should close the stdin channel after it has been opened by - a single attach. When stdin is true the stdin stream will remain open across multiple attach - sessions. - type: boolean - terminationMessagePath: - description: |- - Optional: Path at which the file to which the container's termination message - will be written is mounted into the container's filesystem. - Message written is intended to be brief final status, such as an assertion failure message. - Will be truncated by the node if greater than 4096 bytes. The total message length across - all containers will be limited to 12kb. - Defaults to /dev/termination-log. - type: string - terminationMessagePolicy: - description: |- - Indicate how the termination message should be populated. File will use the contents of - terminationMessagePath to populate the container status message on both success and failure. - FallbackToLogsOnError will use the last chunk of container log output if the termination - message file is empty and the container exited with an error. - type: string - tty: - description: |- - Whether this container should allocate a TTY for itself, also requires 'stdin' to be true. - Default is false. - type: boolean - volumeDevices: - description: volumeDevices is the list of block - devices to be used by the container. - items: - description: volumeDevice describes a mapping - of a raw block device within a container. - properties: - devicePath: - description: devicePath is the path inside - of the container that the device will - be mapped to. - type: string - name: - description: name must match the name of - a persistentVolumeClaim in the pod - type: string - required: - - devicePath - - name - type: object - type: array - x-kubernetes-list-map-keys: - - devicePath - x-kubernetes-list-type: map - volumeMounts: - description: |- - Pod volumes to mount into the container's filesystem. - Cannot be updated. - items: - description: VolumeMount describes a mounting - of a Volume within a container. - properties: - mountPath: - description: |- - Path within the container at which the volume should be mounted. Must - not contain ':'. - type: string - mountPropagation: - description: |- - mountPropagation determines how mounts are propagated from the host - to container and the other way around. - When not set, MountPropagationNone is used. - This field is beta in 1.10. - When RecursiveReadOnly is set to IfPossible or to Enabled, MountPropagation must be None or unspecified - (which defaults to None). - type: string - name: - description: This must match the Name of - a Volume. - type: string - readOnly: - description: |- - Mounted read-only if true, read-write otherwise (false or unspecified). - Defaults to false. - type: boolean - recursiveReadOnly: - description: |- - RecursiveReadOnly specifies whether read-only mounts should be handled - recursively. - - If ReadOnly is false, this field has no meaning and must be unspecified. - - If ReadOnly is true, and this field is set to Disabled, the mount is not made - recursively read-only. If this field is set to IfPossible, the mount is made - recursively read-only, if it is supported by the container runtime. - type: string - subPath: - description: |- - Path within the volume from which the container's volume should be mounted. - Defaults to "" (volume's root). - type: string - subPathExpr: - description: |- - Expanded path within the volume from which the container's volume should be mounted. - Behaves similarly to SubPath but environment variable references $(VAR_NAME) are expanded using the container's environment. - Defaults to "" (volume's root). - SubPathExpr and SubPath are mutually exclusive. - type: string - required: - - mountPath - - name - type: object - type: array - x-kubernetes-list-map-keys: - - mountPath - x-kubernetes-list-type: map - workingDir: - description: |- - Container's working directory. - If not specified, the container runtime's default will be used, which - might be configured in the container image. - Cannot be updated. - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - dnsConfig: - description: |- - Specifies the DNS parameters of a pod. - Parameters specified here will be merged to the generated DNS - configuration based on DNSPolicy. - properties: - nameservers: - description: |- - A list of DNS name server IP addresses. - This will be appended to the base nameservers generated from DNSPolicy. - Duplicated nameservers will be removed. - items: - type: string - type: array - x-kubernetes-list-type: atomic - options: - description: |- - A list of DNS resolver options. - This will be merged with the base options generated from DNSPolicy. - Duplicated entries will be removed. Resolution options given in Options - will override those that appear in the base DNSPolicy. - items: - description: PodDNSConfigOption defines DNS resolver - options of a pod. - properties: - name: - description: Required. - type: string - value: - type: string - type: object - type: array - x-kubernetes-list-type: atomic - searches: - description: |- - A list of DNS search domains for host-name lookup. - This will be appended to the base search paths generated from DNSPolicy. - Duplicated search paths will be removed. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - dnsPolicy: - description: |- - Set DNS policy for the pod. - Defaults to "ClusterFirst". - Valid values are 'ClusterFirstWithHostNet', 'ClusterFirst', 'Default' or 'None'. - DNS parameters given in DNSConfig will be merged with the policy selected with DNSPolicy. - To have DNS options set along with hostNetwork, you have to specify DNS policy - explicitly to 'ClusterFirstWithHostNet'. - type: string - enableServiceLinks: - description: |- - EnableServiceLinks indicates whether information about services should be injected into pod's - environment variables, matching the syntax of Docker links. - Optional: Defaults to true. - type: boolean - ephemeralContainers: - description: |- - List of ephemeral containers run in this pod. Ephemeral containers may be run in an existing - pod to perform user-initiated actions such as debugging. This list cannot be specified when - creating a pod, and it cannot be modified by updating the pod spec. In order to add an - ephemeral container to an existing pod, use the pod's ephemeralcontainers subresource. - items: - description: |- - An EphemeralContainer is a temporary container that you may add to an existing Pod for - user-initiated activities such as debugging. Ephemeral containers have no resource or - scheduling guarantees, and they will not be restarted when they exit or when a Pod is - removed or restarted. The kubelet may evict a Pod if an ephemeral container causes the - Pod to exceed its resource allocation. - properties: - args: - description: |- - Arguments to the entrypoint. - The image's CMD is used if this is not provided. - Variable references $(VAR_NAME) are expanded using the container's environment. If a variable - cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced - to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. "$$(VAR_NAME)" will - produce the string literal "$(VAR_NAME)". - items: - type: string - type: array - x-kubernetes-list-type: atomic - command: - description: |- - Entrypoint array. Not executed within a shell. - The image's ENTRYPOINT is used if this is not provided. - Variable references $(VAR_NAME) are expanded using the container's environment. If a variable - cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced - to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. - items: - type: string - type: array - x-kubernetes-list-type: atomic - env: - description: |- - List of environment variables to set in the container. - Cannot be updated. - items: - description: EnvVar represents an environment - variable present in a Container. - properties: - name: - description: Name of the environment variable. - Must be a C_IDENTIFIER. - type: string - value: - description: |- - Variable references $(VAR_NAME) are expanded - using the previously defined environment variables in the container and - any service environment variables. If a variable cannot be resolved, - the reference in the input string will be unchanged. Double $$ are reduced - to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. - "$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)". - type: string - valueFrom: - description: Source for the environment - variable's value. Cannot be used if value - is not empty. - properties: - configMapKeyRef: - description: Selects a key of a ConfigMap. - properties: - key: - description: The key to select. - type: string - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the - ConfigMap or its key must be defined - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - fieldRef: - description: |- - Selects a field of the pod: supports metadata.name, metadata.namespace, `metadata.labels['']`, `metadata.annotations['']`, - spec.nodeName, spec.serviceAccountName, status.hostIP, status.podIP, status.podIPs. - properties: - apiVersion: - description: Version of the schema - the FieldPath is written in terms - of, defaults to "v1". - type: string - fieldPath: - description: Path of the field to - select in the specified API version. - type: string - required: - - fieldPath - type: object - x-kubernetes-map-type: atomic - resourceFieldRef: - description: |- - Selects a resource of the container: only resources limits and requests - (limits.cpu, limits.memory, limits.ephemeral-storage, requests.cpu, requests.memory and requests.ephemeral-storage) are currently supported. - properties: - containerName: - description: 'Container name: required - for volumes, optional for env - vars' - type: string - divisor: - anyOf: - - type: integer - - type: string - description: Specifies the output - format of the exposed resources, - defaults to "1" - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - resource: - description: 'Required: resource - to select' - type: string - required: - - resource - type: object - x-kubernetes-map-type: atomic - secretKeyRef: - description: Selects a key of a secret - in the pod's namespace - properties: - key: - description: The key of the secret - to select from. Must be a valid - secret key. - type: string - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the - Secret or its key must be defined - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - type: object - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - envFrom: - description: |- - List of sources to populate environment variables in the container. - The keys defined within a source must be a C_IDENTIFIER. All invalid keys - will be reported as an event when the container is starting. When a key exists in multiple - sources, the value associated with the last source will take precedence. - Values defined by an Env with a duplicate key will take precedence. - Cannot be updated. - items: - description: EnvFromSource represents the source - of a set of ConfigMaps - properties: - configMapRef: - description: The ConfigMap to select from - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the ConfigMap - must be defined - type: boolean - type: object - x-kubernetes-map-type: atomic - prefix: - description: An optional identifier to prepend - to each key in the ConfigMap. Must be - a C_IDENTIFIER. - type: string - secretRef: - description: The Secret to select from - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the Secret - must be defined - type: boolean - type: object - x-kubernetes-map-type: atomic - type: object - type: array - x-kubernetes-list-type: atomic - image: - description: |- - Container image name. - More info: https://kubernetes.io/docs/concepts/containers/images - type: string - imagePullPolicy: - description: |- - Image pull policy. - One of Always, Never, IfNotPresent. - Defaults to Always if :latest tag is specified, or IfNotPresent otherwise. - Cannot be updated. - More info: https://kubernetes.io/docs/concepts/containers/images#updating-images - type: string - lifecycle: - description: Lifecycle is not allowed for ephemeral - containers. - properties: - postStart: - description: |- - PostStart is called immediately after a container is created. If the handler fails, - the container is terminated and restarted according to its restart policy. - Other management of the container blocks until the hook completes. - More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks - properties: - exec: - description: Exec specifies the action - to take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - httpGet: - description: HTTPGet specifies the http - request to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set - in the request. HTTP allows repeated - headers. - items: - description: HTTPHeader describes - a custom header to be used in - HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field - value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the - HTTP server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - sleep: - description: Sleep represents the duration - that the container should sleep before - being terminated. - properties: - seconds: - description: Seconds is the number - of seconds to sleep. - format: int64 - type: integer - required: - - seconds - type: object - tcpSocket: - description: |- - Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept - for the backward compatibility. There are no validation of this field and - lifecycle hooks will fail in runtime when tcp handler is specified. - properties: - host: - description: 'Optional: Host name - to connect to, defaults to the pod - IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - type: object - preStop: - description: |- - PreStop is called immediately before a container is terminated due to an - API request or management event such as liveness/startup probe failure, - preemption, resource contention, etc. The handler is not called if the - container crashes or exits. The Pod's termination grace period countdown begins before the - PreStop hook is executed. - properties: - exec: - description: Exec specifies the action - to take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - httpGet: - description: HTTPGet specifies the http - request to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set - in the request. HTTP allows repeated - headers. - items: - description: HTTPHeader describes - a custom header to be used in - HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field - value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the - HTTP server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - sleep: - description: Sleep represents the duration - that the container should sleep before - being terminated. - properties: - seconds: - description: Seconds is the number - of seconds to sleep. - format: int64 - type: integer - required: - - seconds - type: object - tcpSocket: - description: |- - Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept - for the backward compatibility. There are no validation of this field and - lifecycle hooks will fail in runtime when tcp handler is specified. - properties: - host: - description: 'Optional: Host name - to connect to, defaults to the pod - IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - type: object - type: object - livenessProbe: - description: Probes are not allowed for ephemeral - containers. - properties: - exec: - description: Exec specifies the action to - take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - failureThreshold: - description: |- - Minimum consecutive failures for the probe to be considered failed after having succeeded. - Defaults to 3. Minimum value is 1. - format: int32 - type: integer - grpc: - description: GRPC specifies an action involving - a GRPC port. - properties: - port: - description: Port number of the gRPC service. - Number must be in the range 1 to 65535. - format: int32 - type: integer - service: - default: "" - description: |- - Service is the name of the service to place in the gRPC HealthCheckRequest - (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - - If this is not specified, the default behavior is defined by gRPC. - type: string - required: - - port - type: object - httpGet: - description: HTTPGet specifies the http request - to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set in - the request. HTTP allows repeated headers. - items: - description: HTTPHeader describes a - custom header to be used in HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the HTTP - server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - initialDelaySeconds: - description: |- - Number of seconds after the container has started before liveness probes are initiated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - periodSeconds: - description: |- - How often (in seconds) to perform the probe. - Default to 10 seconds. Minimum value is 1. - format: int32 - type: integer - successThreshold: - description: |- - Minimum consecutive successes for the probe to be considered successful after having failed. - Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. - format: int32 - type: integer - tcpSocket: - description: TCPSocket specifies an action - involving a TCP port. - properties: - host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - description: |- - Optional duration in seconds the pod needs to terminate gracefully upon probe failure. - The grace period is the duration in seconds after the processes running in the pod are sent - a termination signal and the time when the processes are forcibly halted with a kill signal. - Set this value longer than the expected cleanup time for your process. - format: int64 - type: integer - timeoutSeconds: - description: |- - Number of seconds after which the probe times out. - Defaults to 1 second. Minimum value is 1. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - type: object - name: - description: |- - Name of the ephemeral container specified as a DNS_LABEL. - This name must be unique among all containers, init containers and ephemeral containers. - type: string - ports: - description: Ports are not allowed for ephemeral - containers. - items: - description: ContainerPort represents a network - port in a single container. - properties: - containerPort: - description: |- - Number of port to expose on the pod's IP address. - This must be a valid port number, 0 < x < 65536. - format: int32 - type: integer - hostIP: - description: What host IP to bind the external - port to. - type: string - hostPort: - description: |- - Number of port to expose on the host. - If specified, this must be a valid port number, 0 < x < 65536. - If HostNetwork is specified, this must match ContainerPort. - Most containers do not need this. - format: int32 - type: integer - name: - description: |- - If specified, this must be an IANA_SVC_NAME and unique within the pod. Each - named port in a pod must have a unique name. Name for the port that can be - referred to by services. - type: string - protocol: - default: TCP - description: |- - Protocol for port. Must be UDP, TCP, or SCTP. - Defaults to "TCP". - type: string - required: - - containerPort - type: object - type: array - x-kubernetes-list-map-keys: - - containerPort - - protocol - x-kubernetes-list-type: map - readinessProbe: - description: Probes are not allowed for ephemeral - containers. - properties: - exec: - description: Exec specifies the action to - take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - failureThreshold: - description: |- - Minimum consecutive failures for the probe to be considered failed after having succeeded. - Defaults to 3. Minimum value is 1. - format: int32 - type: integer - grpc: - description: GRPC specifies an action involving - a GRPC port. - properties: - port: - description: Port number of the gRPC service. - Number must be in the range 1 to 65535. - format: int32 - type: integer - service: - default: "" - description: |- - Service is the name of the service to place in the gRPC HealthCheckRequest - (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - - If this is not specified, the default behavior is defined by gRPC. - type: string - required: - - port - type: object - httpGet: - description: HTTPGet specifies the http request - to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set in - the request. HTTP allows repeated headers. - items: - description: HTTPHeader describes a - custom header to be used in HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the HTTP - server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - initialDelaySeconds: - description: |- - Number of seconds after the container has started before liveness probes are initiated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - periodSeconds: - description: |- - How often (in seconds) to perform the probe. - Default to 10 seconds. Minimum value is 1. - format: int32 - type: integer - successThreshold: - description: |- - Minimum consecutive successes for the probe to be considered successful after having failed. - Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. - format: int32 - type: integer - tcpSocket: - description: TCPSocket specifies an action - involving a TCP port. - properties: - host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - description: |- - Optional duration in seconds the pod needs to terminate gracefully upon probe failure. - The grace period is the duration in seconds after the processes running in the pod are sent - a termination signal and the time when the processes are forcibly halted with a kill signal. - Set this value longer than the expected cleanup time for your process. - format: int64 - type: integer - timeoutSeconds: - description: |- - Number of seconds after which the probe times out. - Defaults to 1 second. Minimum value is 1. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - type: object - resizePolicy: - description: Resources resize policy for the container. - items: - description: ContainerResizePolicy represents - resource resize policy for the container. - properties: - resourceName: - description: |- - Name of the resource to which this resource resize policy applies. - Supported values: cpu, memory. - type: string - restartPolicy: - description: |- - Restart policy to apply when specified resource is resized. - If not specified, it defaults to NotRequired. - type: string - required: - - resourceName - - restartPolicy - type: object - type: array - x-kubernetes-list-type: atomic - resources: - description: |- - Resources are not allowed for ephemeral containers. Ephemeral containers use spare resources - already allocated to the pod. - properties: - claims: - description: |- - Claims lists the names of resources, defined in spec.resourceClaims, - that are used by this container. - - This is an alpha field and requires enabling the - DynamicResourceAllocation feature gate. - - This field is immutable. It can only be set for containers. - items: - description: ResourceClaim references one - entry in PodSpec.ResourceClaims. - properties: - name: - description: |- - Name must match the name of one entry in pod.spec.resourceClaims of - the Pod where this field is used. It makes that resource available - inside a container. - type: string - request: - description: |- - Request is the name chosen for a request in the referenced claim. - If empty, everything from the claim is made available, otherwise - only the result of this request. - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - limits: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: |- - Limits describes the maximum amount of compute resources allowed. - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - type: object - requests: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: |- - Requests describes the minimum amount of compute resources required. - If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, - otherwise to an implementation-defined value. Requests cannot exceed Limits. - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - type: object - type: object - restartPolicy: - description: |- - Restart policy for the container to manage the restart behavior of each - container within a pod. - This may only be set for init containers. You cannot set this field on - ephemeral containers. - type: string - securityContext: - description: |- - Optional: SecurityContext defines the security options the ephemeral container should be run with. - If set, the fields of SecurityContext override the equivalent fields of PodSecurityContext. - properties: - allowPrivilegeEscalation: - description: |- - AllowPrivilegeEscalation controls whether a process can gain more - privileges than its parent process. This bool directly controls if - the no_new_privs flag will be set on the container process. - AllowPrivilegeEscalation is true always when the container is: - 1) run as Privileged - 2) has CAP_SYS_ADMIN - Note that this field cannot be set when spec.os.name is windows. - type: boolean - appArmorProfile: - description: |- - appArmorProfile is the AppArmor options to use by this container. If set, this profile - overrides the pod's appArmorProfile. - Note that this field cannot be set when spec.os.name is windows. - properties: - localhostProfile: - description: |- - localhostProfile indicates a profile loaded on the node that should be used. - The profile must be preconfigured on the node to work. - Must match the loaded name of the profile. - Must be set if and only if type is "Localhost". - type: string - type: - description: |- - type indicates which kind of AppArmor profile will be applied. - Valid options are: - Localhost - a profile pre-loaded on the node. - RuntimeDefault - the container runtime's default profile. - Unconfined - no AppArmor enforcement. - type: string - required: - - type - type: object - capabilities: - description: |- - The capabilities to add/drop when running containers. - Defaults to the default set of capabilities granted by the container runtime. - Note that this field cannot be set when spec.os.name is windows. - properties: - add: - description: Added capabilities - items: - description: Capability represent POSIX - capabilities type - type: string - type: array - x-kubernetes-list-type: atomic - drop: - description: Removed capabilities - items: - description: Capability represent POSIX - capabilities type - type: string - type: array - x-kubernetes-list-type: atomic - type: object - privileged: - description: |- - Run container in privileged mode. - Processes in privileged containers are essentially equivalent to root on the host. - Defaults to false. - Note that this field cannot be set when spec.os.name is windows. - type: boolean - procMount: - description: |- - procMount denotes the type of proc mount to use for the containers. - The default value is Default which uses the container runtime defaults for - readonly paths and masked paths. - This requires the ProcMountType feature flag to be enabled. - Note that this field cannot be set when spec.os.name is windows. - type: string - readOnlyRootFilesystem: - description: |- - Whether this container has a read-only root filesystem. - Default is false. - Note that this field cannot be set when spec.os.name is windows. - type: boolean - runAsGroup: - description: |- - The GID to run the entrypoint of the container process. - Uses runtime default if unset. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is windows. - format: int64 - type: integer - runAsNonRoot: - description: |- - Indicates that the container must run as a non-root user. - If true, the Kubelet will validate the image at runtime to ensure that it - does not run as UID 0 (root) and fail to start the container if it does. - If unset or false, no such validation will be performed. - May also be set in PodSecurityContext. - type: boolean - runAsUser: - description: |- - The UID to run the entrypoint of the container process. - Defaults to user specified in image metadata if unspecified. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is windows. - format: int64 - type: integer - seLinuxOptions: - description: |- - The SELinux context to be applied to the container. - If unspecified, the container runtime will allocate a random SELinux context for each - container. May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is windows. - properties: - level: - description: Level is SELinux level label - that applies to the container. - type: string - role: - description: Role is a SELinux role label - that applies to the container. - type: string - type: - description: Type is a SELinux type label - that applies to the container. - type: string - user: - description: User is a SELinux user label - that applies to the container. - type: string - type: object - seccompProfile: - description: |- - The seccomp options to use by this container. If seccomp options are - provided at both the pod & container level, the container options - override the pod options. - Note that this field cannot be set when spec.os.name is windows. - properties: - localhostProfile: - description: |- - localhostProfile indicates a profile defined in a file on the node should be used. - The profile must be preconfigured on the node to work. - Must be a descending path, relative to the kubelet's configured seccomp profile location. - Must be set if type is "Localhost". Must NOT be set for any other type. - type: string - type: - description: |- - type indicates which kind of seccomp profile will be applied. - Valid options are: - - Localhost - a profile defined in a file on the node should be used. - RuntimeDefault - the container runtime default profile should be used. - Unconfined - no profile should be applied. - type: string - required: - - type - type: object - windowsOptions: - description: |- - The Windows specific settings applied to all containers. - If unspecified, the options from the PodSecurityContext will be used. - If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is linux. - properties: - gmsaCredentialSpec: - description: |- - GMSACredentialSpec is where the GMSA admission webhook - (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the - GMSA credential spec named by the GMSACredentialSpecName field. - type: string - gmsaCredentialSpecName: - description: GMSACredentialSpecName is - the name of the GMSA credential spec - to use. - type: string - hostProcess: - description: |- - HostProcess determines if a container should be run as a 'Host Process' container. - All of a Pod's containers must have the same effective HostProcess value - (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers). - In addition, if HostProcess is true then HostNetwork must also be set to true. - type: boolean - runAsUserName: - description: |- - The UserName in Windows to run the entrypoint of the container process. - Defaults to the user specified in image metadata if unspecified. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - type: string - type: object - type: object - startupProbe: - description: Probes are not allowed for ephemeral - containers. - properties: - exec: - description: Exec specifies the action to - take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - failureThreshold: - description: |- - Minimum consecutive failures for the probe to be considered failed after having succeeded. - Defaults to 3. Minimum value is 1. - format: int32 - type: integer - grpc: - description: GRPC specifies an action involving - a GRPC port. - properties: - port: - description: Port number of the gRPC service. - Number must be in the range 1 to 65535. - format: int32 - type: integer - service: - default: "" - description: |- - Service is the name of the service to place in the gRPC HealthCheckRequest - (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - - If this is not specified, the default behavior is defined by gRPC. - type: string - required: - - port - type: object - httpGet: - description: HTTPGet specifies the http request - to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set in - the request. HTTP allows repeated headers. - items: - description: HTTPHeader describes a - custom header to be used in HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the HTTP - server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - initialDelaySeconds: - description: |- - Number of seconds after the container has started before liveness probes are initiated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - periodSeconds: - description: |- - How often (in seconds) to perform the probe. - Default to 10 seconds. Minimum value is 1. - format: int32 - type: integer - successThreshold: - description: |- - Minimum consecutive successes for the probe to be considered successful after having failed. - Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. - format: int32 - type: integer - tcpSocket: - description: TCPSocket specifies an action - involving a TCP port. - properties: - host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - description: |- - Optional duration in seconds the pod needs to terminate gracefully upon probe failure. - The grace period is the duration in seconds after the processes running in the pod are sent - a termination signal and the time when the processes are forcibly halted with a kill signal. - Set this value longer than the expected cleanup time for your process. - format: int64 - type: integer - timeoutSeconds: - description: |- - Number of seconds after which the probe times out. - Defaults to 1 second. Minimum value is 1. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - type: object - stdin: - description: |- - Whether this container should allocate a buffer for stdin in the container runtime. If this - is not set, reads from stdin in the container will always result in EOF. - Default is false. - type: boolean - stdinOnce: - description: |- - Whether the container runtime should close the stdin channel after it has been opened by - a single attach. When stdin is true the stdin stream will remain open across multiple attach - sessions. - type: boolean - targetContainerName: - description: |- - If set, the name of the container from PodSpec that this ephemeral container targets. - The ephemeral container will be run in the namespaces (IPC, PID, etc) of this container. - If not set then the ephemeral container uses the namespaces configured in the Pod spec. - - The container runtime must implement support for this feature. - type: string - terminationMessagePath: - description: |- - Optional: Path at which the file to which the container's termination message - will be written is mounted into the container's filesystem. - Message written is intended to be brief final status, such as an assertion failure message. - Will be truncated by the node if greater than 4096 bytes. The total message length across - all containers will be limited to 12kb. - Defaults to /dev/termination-log. - type: string - terminationMessagePolicy: - description: |- - Indicate how the termination message should be populated. File will use the contents of - terminationMessagePath to populate the container status message on both success and failure. - FallbackToLogsOnError will use the last chunk of container log output if the termination - message file is empty and the container exited with an error. - type: string - tty: - description: |- - Whether this container should allocate a TTY for itself, also requires 'stdin' to be true. - Default is false. - type: boolean - volumeDevices: - description: volumeDevices is the list of block - devices to be used by the container. - items: - description: volumeDevice describes a mapping - of a raw block device within a container. - properties: - devicePath: - description: devicePath is the path inside - of the container that the device will - be mapped to. - type: string - name: - description: name must match the name of - a persistentVolumeClaim in the pod - type: string - required: - - devicePath - - name - type: object - type: array - x-kubernetes-list-map-keys: - - devicePath - x-kubernetes-list-type: map - volumeMounts: - description: |- - Pod volumes to mount into the container's filesystem. Subpath mounts are not allowed for ephemeral containers. - Cannot be updated. - items: - description: VolumeMount describes a mounting - of a Volume within a container. - properties: - mountPath: - description: |- - Path within the container at which the volume should be mounted. Must - not contain ':'. - type: string - mountPropagation: - description: |- - mountPropagation determines how mounts are propagated from the host - to container and the other way around. - When not set, MountPropagationNone is used. - This field is beta in 1.10. - When RecursiveReadOnly is set to IfPossible or to Enabled, MountPropagation must be None or unspecified - (which defaults to None). - type: string - name: - description: This must match the Name of - a Volume. - type: string - readOnly: - description: |- - Mounted read-only if true, read-write otherwise (false or unspecified). - Defaults to false. - type: boolean - recursiveReadOnly: - description: |- - RecursiveReadOnly specifies whether read-only mounts should be handled - recursively. - - If ReadOnly is false, this field has no meaning and must be unspecified. - - If ReadOnly is true, and this field is set to Disabled, the mount is not made - recursively read-only. If this field is set to IfPossible, the mount is made - recursively read-only, if it is supported by the container runtime. - type: string - subPath: - description: |- - Path within the volume from which the container's volume should be mounted. - Defaults to "" (volume's root). - type: string - subPathExpr: - description: |- - Expanded path within the volume from which the container's volume should be mounted. - Behaves similarly to SubPath but environment variable references $(VAR_NAME) are expanded using the container's environment. - Defaults to "" (volume's root). - SubPathExpr and SubPath are mutually exclusive. - type: string - required: - - mountPath - - name - type: object - type: array - x-kubernetes-list-map-keys: - - mountPath - x-kubernetes-list-type: map - workingDir: - description: |- - Container's working directory. - If not specified, the container runtime's default will be used, which - might be configured in the container image. - Cannot be updated. - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - hostAliases: - description: |- - HostAliases is an optional list of hosts and IPs that will be injected into the pod's hosts - file if specified. - items: - description: |- - HostAlias holds the mapping between IP and hostnames that will be injected as an entry in the - pod's hosts file. - properties: - hostnames: - description: Hostnames for the above IP address. - items: - type: string - type: array - x-kubernetes-list-type: atomic - ip: - description: IP address of the host file entry. - type: string - required: - - ip - type: object - type: array - x-kubernetes-list-map-keys: - - ip - x-kubernetes-list-type: map - hostIPC: - description: |- - Use the host's ipc namespace. - Optional: Default to false. - type: boolean - hostNetwork: - description: |- - Host networking requested for this pod. Use the host's network namespace. - If this option is set, the ports that will be used must be specified. - Default to false. - type: boolean - hostPID: - description: |- - Use the host's pid namespace. - Optional: Default to false. - type: boolean - hostUsers: - description: |- - Use the host's user namespace. - Optional: Default to true. - If set to true or not present, the pod will be run in the host user namespace, useful - for when the pod needs a feature only available to the host user namespace, such as - loading a kernel module with CAP_SYS_MODULE. - When set to false, a new userns is created for the pod. - type: boolean - hostname: - description: |- - Specifies the hostname of the Pod - If not specified, the pod's hostname will be set to a system-defined value. - type: string - imagePullSecrets: - description: |- - ImagePullSecrets is an optional list of references to secrets in the same namespace to use for pulling any of the images used by this PodSpec. - If specified, these secrets will be passed to individual puller implementations for them to use. - More info: https://kubernetes.io/docs/concepts/containers/images#specifying-imagepullsecrets-on-a-pod - items: - description: |- - LocalObjectReference contains enough information to let you locate the - referenced object inside the same namespace. - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - type: object - x-kubernetes-map-type: atomic - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - initContainers: - description: |- - List of initialization containers belonging to the pod. - Init containers are executed in order prior to containers being started. If any - init container fails, the pod is considered to have failed and is handled according - to its restartPolicy. The name for an init container or normal container must be - unique among all containers. - items: - description: A single application container that you - want to run within a pod. - properties: - args: - description: |- - Arguments to the entrypoint. - The container image's CMD is used if this is not provided. - Variable references $(VAR_NAME) are expanded using the container's environment. If a variable - cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced - to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. - items: - type: string - type: array - x-kubernetes-list-type: atomic - command: - description: |- - Entrypoint array. Not executed within a shell. - The container image's ENTRYPOINT is used if this is not provided. - Variable references $(VAR_NAME) are expanded using the container's environment. If a variable - cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced - to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. - items: - type: string - type: array - x-kubernetes-list-type: atomic - env: - description: |- - List of environment variables to set in the container. - Cannot be updated. - items: - description: EnvVar represents an environment - variable present in a Container. - properties: - name: - description: Name of the environment variable. - Must be a C_IDENTIFIER. - type: string - value: - description: |- - Variable references $(VAR_NAME) are expanded - using the previously defined environment variables in the container and - any service environment variables. If a variable cannot be resolved, - the reference in the input string will be unchanged. Double $$ are reduced - to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. - "$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)". - type: string - valueFrom: - description: Source for the environment - variable's value. Cannot be used if value - is not empty. - properties: - configMapKeyRef: - description: Selects a key of a ConfigMap. - properties: - key: - description: The key to select. - type: string - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the - ConfigMap or its key must be defined - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - fieldRef: - description: |- - Selects a field of the pod: supports metadata.name, metadata.namespace, `metadata.labels['']`, `metadata.annotations['']`, - spec.nodeName, spec.serviceAccountName, status.hostIP, status.podIP, status.podIPs. - properties: - apiVersion: - description: Version of the schema - the FieldPath is written in terms - of, defaults to "v1". - type: string - fieldPath: - description: Path of the field to - select in the specified API version. - type: string - required: - - fieldPath - type: object - x-kubernetes-map-type: atomic - resourceFieldRef: - description: |- - Selects a resource of the container: only resources limits and requests - (limits.cpu, limits.memory, limits.ephemeral-storage, requests.cpu, requests.memory and requests.ephemeral-storage) are currently supported. - properties: - containerName: - description: 'Container name: required - for volumes, optional for env - vars' - type: string - divisor: - anyOf: - - type: integer - - type: string - description: Specifies the output - format of the exposed resources, - defaults to "1" - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - resource: - description: 'Required: resource - to select' - type: string - required: - - resource - type: object - x-kubernetes-map-type: atomic - secretKeyRef: - description: Selects a key of a secret - in the pod's namespace - properties: - key: - description: The key of the secret - to select from. Must be a valid - secret key. - type: string - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the - Secret or its key must be defined - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - type: object - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - envFrom: - description: |- - List of sources to populate environment variables in the container. - The keys defined within a source must be a C_IDENTIFIER. All invalid keys - will be reported as an event when the container is starting. When a key exists in multiple - sources, the value associated with the last source will take precedence. - Values defined by an Env with a duplicate key will take precedence. - Cannot be updated. - items: - description: EnvFromSource represents the source - of a set of ConfigMaps - properties: - configMapRef: - description: The ConfigMap to select from - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the ConfigMap - must be defined - type: boolean - type: object - x-kubernetes-map-type: atomic - prefix: - description: An optional identifier to prepend - to each key in the ConfigMap. Must be - a C_IDENTIFIER. - type: string - secretRef: - description: The Secret to select from - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: Specify whether the Secret - must be defined - type: boolean - type: object - x-kubernetes-map-type: atomic - type: object - type: array - x-kubernetes-list-type: atomic - image: - description: |- - Container image name. - More info: https://kubernetes.io/docs/concepts/containers/images - This field is optional to allow higher level config management to default or override - container images in workload controllers like Deployments and StatefulSets. - type: string - imagePullPolicy: - description: |- - Image pull policy. - One of Always, Never, IfNotPresent. - Defaults to Always if :latest tag is specified, or IfNotPresent otherwise. - Cannot be updated. - More info: https://kubernetes.io/docs/concepts/containers/images#updating-images - type: string - lifecycle: - description: |- - Actions that the management system should take in response to container lifecycle events. - Cannot be updated. - properties: - postStart: - description: |- - PostStart is called immediately after a container is created. If the handler fails, - the container is terminated and restarted according to its restart policy. - Other management of the container blocks until the hook completes. - More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks - properties: - exec: - description: Exec specifies the action - to take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - httpGet: - description: HTTPGet specifies the http - request to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set - in the request. HTTP allows repeated - headers. - items: - description: HTTPHeader describes - a custom header to be used in - HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field - value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the - HTTP server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - sleep: - description: Sleep represents the duration - that the container should sleep before - being terminated. - properties: - seconds: - description: Seconds is the number - of seconds to sleep. - format: int64 - type: integer - required: - - seconds - type: object - tcpSocket: - description: |- - Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept - for the backward compatibility. There are no validation of this field and - lifecycle hooks will fail in runtime when tcp handler is specified. - properties: - host: - description: 'Optional: Host name - to connect to, defaults to the pod - IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - type: object - preStop: - description: |- - PreStop is called immediately before a container is terminated due to an - API request or management event such as liveness/startup probe failure, - preemption, resource contention, etc. The handler is not called if the - container crashes or exits. The Pod's termination grace period countdown begins before the - PreStop hook is executed. - properties: - exec: - description: Exec specifies the action - to take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - httpGet: - description: HTTPGet specifies the http - request to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set - in the request. HTTP allows repeated - headers. - items: - description: HTTPHeader describes - a custom header to be used in - HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field - value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the - HTTP server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - sleep: - description: Sleep represents the duration - that the container should sleep before - being terminated. - properties: - seconds: - description: Seconds is the number - of seconds to sleep. - format: int64 - type: integer - required: - - seconds - type: object - tcpSocket: - description: |- - Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept - for the backward compatibility. There are no validation of this field and - lifecycle hooks will fail in runtime when tcp handler is specified. - properties: - host: - description: 'Optional: Host name - to connect to, defaults to the pod - IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - type: object - type: object - livenessProbe: - description: |- - Periodic probe of container liveness. - Container will be restarted if the probe fails. - Cannot be updated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - properties: - exec: - description: Exec specifies the action to - take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - failureThreshold: - description: |- - Minimum consecutive failures for the probe to be considered failed after having succeeded. - Defaults to 3. Minimum value is 1. - format: int32 - type: integer - grpc: - description: GRPC specifies an action involving - a GRPC port. - properties: - port: - description: Port number of the gRPC service. - Number must be in the range 1 to 65535. - format: int32 - type: integer - service: - default: "" - description: |- - Service is the name of the service to place in the gRPC HealthCheckRequest - (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - - If this is not specified, the default behavior is defined by gRPC. - type: string - required: - - port - type: object - httpGet: - description: HTTPGet specifies the http request - to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set in - the request. HTTP allows repeated headers. - items: - description: HTTPHeader describes a - custom header to be used in HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the HTTP - server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - initialDelaySeconds: - description: |- - Number of seconds after the container has started before liveness probes are initiated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - periodSeconds: - description: |- - How often (in seconds) to perform the probe. - Default to 10 seconds. Minimum value is 1. - format: int32 - type: integer - successThreshold: - description: |- - Minimum consecutive successes for the probe to be considered successful after having failed. - Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. - format: int32 - type: integer - tcpSocket: - description: TCPSocket specifies an action - involving a TCP port. - properties: - host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - description: |- - Optional duration in seconds the pod needs to terminate gracefully upon probe failure. - The grace period is the duration in seconds after the processes running in the pod are sent - a termination signal and the time when the processes are forcibly halted with a kill signal. - Set this value longer than the expected cleanup time for your process. - format: int64 - type: integer - timeoutSeconds: - description: |- - Number of seconds after which the probe times out. - Defaults to 1 second. Minimum value is 1. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - type: object - name: - description: |- - Name of the container specified as a DNS_LABEL. - Each container in a pod must have a unique name (DNS_LABEL). - Cannot be updated. - type: string - ports: - description: |- - List of ports to expose from the container. Not specifying a port here - DOES NOT prevent that port from being exposed. Any port which is - listening on the default "0.0.0.0" address inside a container will be - accessible from the network. - Modifying this array with strategic merge patch may corrupt the data. - For more information See https://github.com/kubernetes/kubernetes/issues/108255. - items: - description: ContainerPort represents a network - port in a single container. - properties: - containerPort: - description: |- - Number of port to expose on the pod's IP address. - This must be a valid port number, 0 < x < 65536. - format: int32 - type: integer - hostIP: - description: What host IP to bind the external - port to. - type: string - hostPort: - description: |- - Number of port to expose on the host. - If specified, this must be a valid port number, 0 < x < 65536. - If HostNetwork is specified, this must match ContainerPort. - Most containers do not need this. - format: int32 - type: integer - name: - description: |- - If specified, this must be an IANA_SVC_NAME and unique within the pod. Each - named port in a pod must have a unique name. Name for the port that can be - referred to by services. - type: string - protocol: - default: TCP - description: |- - Protocol for port. Must be UDP, TCP, or SCTP. - Defaults to "TCP". - type: string - required: - - containerPort - type: object - type: array - x-kubernetes-list-map-keys: - - containerPort - - protocol - x-kubernetes-list-type: map - readinessProbe: - description: |- - Periodic probe of container service readiness. - Container will be removed from service endpoints if the probe fails. - Cannot be updated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - properties: - exec: - description: Exec specifies the action to - take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - failureThreshold: - description: |- - Minimum consecutive failures for the probe to be considered failed after having succeeded. - Defaults to 3. Minimum value is 1. - format: int32 - type: integer - grpc: - description: GRPC specifies an action involving - a GRPC port. - properties: - port: - description: Port number of the gRPC service. - Number must be in the range 1 to 65535. - format: int32 - type: integer - service: - default: "" - description: |- - Service is the name of the service to place in the gRPC HealthCheckRequest - (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - - If this is not specified, the default behavior is defined by gRPC. - type: string - required: - - port - type: object - httpGet: - description: HTTPGet specifies the http request - to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set in - the request. HTTP allows repeated headers. - items: - description: HTTPHeader describes a - custom header to be used in HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the HTTP - server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - initialDelaySeconds: - description: |- - Number of seconds after the container has started before liveness probes are initiated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - periodSeconds: - description: |- - How often (in seconds) to perform the probe. - Default to 10 seconds. Minimum value is 1. - format: int32 - type: integer - successThreshold: - description: |- - Minimum consecutive successes for the probe to be considered successful after having failed. - Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. - format: int32 - type: integer - tcpSocket: - description: TCPSocket specifies an action - involving a TCP port. - properties: - host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - description: |- - Optional duration in seconds the pod needs to terminate gracefully upon probe failure. - The grace period is the duration in seconds after the processes running in the pod are sent - a termination signal and the time when the processes are forcibly halted with a kill signal. - Set this value longer than the expected cleanup time for your process. - format: int64 - type: integer - timeoutSeconds: - description: |- - Number of seconds after which the probe times out. - Defaults to 1 second. Minimum value is 1. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - type: object - resizePolicy: - description: Resources resize policy for the container. - items: - description: ContainerResizePolicy represents - resource resize policy for the container. - properties: - resourceName: - description: |- - Name of the resource to which this resource resize policy applies. - Supported values: cpu, memory. - type: string - restartPolicy: - description: |- - Restart policy to apply when specified resource is resized. - If not specified, it defaults to NotRequired. - type: string - required: - - resourceName - - restartPolicy - type: object - type: array - x-kubernetes-list-type: atomic - resources: - description: |- - Compute Resources required by this container. - Cannot be updated. - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - properties: - claims: - description: |- - Claims lists the names of resources, defined in spec.resourceClaims, - that are used by this container. - - This is an alpha field and requires enabling the - DynamicResourceAllocation feature gate. - - This field is immutable. It can only be set for containers. - items: - description: ResourceClaim references one - entry in PodSpec.ResourceClaims. - properties: - name: - description: |- - Name must match the name of one entry in pod.spec.resourceClaims of - the Pod where this field is used. It makes that resource available - inside a container. - type: string - request: - description: |- - Request is the name chosen for a request in the referenced claim. - If empty, everything from the claim is made available, otherwise - only the result of this request. - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - limits: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: |- - Limits describes the maximum amount of compute resources allowed. - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - type: object - requests: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: |- - Requests describes the minimum amount of compute resources required. - If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, - otherwise to an implementation-defined value. Requests cannot exceed Limits. - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - type: object - type: object - restartPolicy: - description: |- - RestartPolicy defines the restart behavior of individual containers in a pod. - This field may only be set for init containers, and the only allowed value is "Always". - For non-init containers or when this field is not specified, - the restart behavior is defined by the Pod's restart policy and the container type. - type: string - securityContext: - description: |- - SecurityContext defines the security options the container should be run with. - If set, the fields of SecurityContext override the equivalent fields of PodSecurityContext. - More info: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/ - properties: - allowPrivilegeEscalation: - description: |- - AllowPrivilegeEscalation controls whether a process can gain more - privileges than its parent process. This bool directly controls if - the no_new_privs flag will be set on the container process. - AllowPrivilegeEscalation is true always when the container is: - 1) run as Privileged - 2) has CAP_SYS_ADMIN - Note that this field cannot be set when spec.os.name is windows. - type: boolean - appArmorProfile: - description: |- - appArmorProfile is the AppArmor options to use by this container. If set, this profile - overrides the pod's appArmorProfile. - Note that this field cannot be set when spec.os.name is windows. - properties: - localhostProfile: - description: |- - localhostProfile indicates a profile loaded on the node that should be used. - The profile must be preconfigured on the node to work. - Must match the loaded name of the profile. - Must be set if and only if type is "Localhost". - type: string - type: - description: |- - type indicates which kind of AppArmor profile will be applied. - Valid options are: - Localhost - a profile pre-loaded on the node. - RuntimeDefault - the container runtime's default profile. - Unconfined - no AppArmor enforcement. - type: string - required: - - type - type: object - capabilities: - description: |- - The capabilities to add/drop when running containers. - Defaults to the default set of capabilities granted by the container runtime. - Note that this field cannot be set when spec.os.name is windows. - properties: - add: - description: Added capabilities - items: - description: Capability represent POSIX - capabilities type - type: string - type: array - x-kubernetes-list-type: atomic - drop: - description: Removed capabilities - items: - description: Capability represent POSIX - capabilities type - type: string - type: array - x-kubernetes-list-type: atomic - type: object - privileged: - description: |- - Run container in privileged mode. - Processes in privileged containers are essentially equivalent to root on the host. - Defaults to false. - Note that this field cannot be set when spec.os.name is windows. - type: boolean - procMount: - description: |- - procMount denotes the type of proc mount to use for the containers. - The default value is Default which uses the container runtime defaults for - readonly paths and masked paths. - This requires the ProcMountType feature flag to be enabled. - Note that this field cannot be set when spec.os.name is windows. - type: string - readOnlyRootFilesystem: - description: |- - Whether this container has a read-only root filesystem. - Default is false. - Note that this field cannot be set when spec.os.name is windows. - type: boolean - runAsGroup: - description: |- - The GID to run the entrypoint of the container process. - Uses runtime default if unset. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is windows. - format: int64 - type: integer - runAsNonRoot: - description: |- - Indicates that the container must run as a non-root user. - If true, the Kubelet will validate the image at runtime to ensure that it - does not run as UID 0 (root) and fail to start the container if it does. - If unset or false, no such validation will be performed. - May also be set in PodSecurityContext. - type: boolean - runAsUser: - description: |- - The UID to run the entrypoint of the container process. - Defaults to user specified in image metadata if unspecified. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is windows. - format: int64 - type: integer - seLinuxOptions: - description: |- - The SELinux context to be applied to the container. - If unspecified, the container runtime will allocate a random SELinux context for each - container. May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is windows. - properties: - level: - description: Level is SELinux level label - that applies to the container. - type: string - role: - description: Role is a SELinux role label - that applies to the container. - type: string - type: - description: Type is a SELinux type label - that applies to the container. - type: string - user: - description: User is a SELinux user label - that applies to the container. - type: string - type: object - seccompProfile: - description: |- - The seccomp options to use by this container. If seccomp options are - provided at both the pod & container level, the container options - override the pod options. - Note that this field cannot be set when spec.os.name is windows. - properties: - localhostProfile: - description: |- - localhostProfile indicates a profile defined in a file on the node should be used. - The profile must be preconfigured on the node to work. - Must be a descending path, relative to the kubelet's configured seccomp profile location. - Must be set if type is "Localhost". Must NOT be set for any other type. - type: string - type: - description: |- - type indicates which kind of seccomp profile will be applied. - Valid options are: - - Localhost - a profile defined in a file on the node should be used. - RuntimeDefault - the container runtime default profile should be used. - Unconfined - no profile should be applied. - type: string - required: - - type - type: object - windowsOptions: - description: |- - The Windows specific settings applied to all containers. - If unspecified, the options from the PodSecurityContext will be used. - If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is linux. - properties: - gmsaCredentialSpec: - description: |- - GMSACredentialSpec is where the GMSA admission webhook - (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the - GMSA credential spec named by the GMSACredentialSpecName field. - type: string - gmsaCredentialSpecName: - description: GMSACredentialSpecName is - the name of the GMSA credential spec - to use. - type: string - hostProcess: - description: |- - HostProcess determines if a container should be run as a 'Host Process' container. - All of a Pod's containers must have the same effective HostProcess value - (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers). - In addition, if HostProcess is true then HostNetwork must also be set to true. - type: boolean - runAsUserName: - description: |- - The UserName in Windows to run the entrypoint of the container process. - Defaults to the user specified in image metadata if unspecified. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - type: string - type: object - type: object - startupProbe: - description: |- - StartupProbe indicates that the Pod has successfully initialized. - If specified, no other probes are executed until this completes successfully. - If this probe fails, the Pod will be restarted, just as if the livenessProbe failed. - properties: - exec: - description: Exec specifies the action to - take. - properties: - command: - description: |- - Command is the command line to execute inside the container, the working directory for the - command is root ('/') in the container's filesystem. The command is simply exec'd, it is - not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use - a shell, you need to explicitly call out to that shell. - Exit status of 0 is treated as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - failureThreshold: - description: |- - Minimum consecutive failures for the probe to be considered failed after having succeeded. - Defaults to 3. Minimum value is 1. - format: int32 - type: integer - grpc: - description: GRPC specifies an action involving - a GRPC port. - properties: - port: - description: Port number of the gRPC service. - Number must be in the range 1 to 65535. - format: int32 - type: integer - service: - default: "" - description: |- - Service is the name of the service to place in the gRPC HealthCheckRequest - (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - - If this is not specified, the default behavior is defined by gRPC. - type: string - required: - - port - type: object - httpGet: - description: HTTPGet specifies the http request - to perform. - properties: - host: - description: |- - Host name to connect to, defaults to the pod IP. You probably want to set - "Host" in httpHeaders instead. - type: string - httpHeaders: - description: Custom headers to set in - the request. HTTP allows repeated headers. - items: - description: HTTPHeader describes a - custom header to be used in HTTP probes - properties: - name: - description: |- - The header field name. - This will be canonicalized upon output, so case-variant names will be understood as the same header. - type: string - value: - description: The header field value - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - path: - description: Path to access on the HTTP - server. - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: |- - Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - initialDelaySeconds: - description: |- - Number of seconds after the container has started before liveness probes are initiated. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - periodSeconds: - description: |- - How often (in seconds) to perform the probe. - Default to 10 seconds. Minimum value is 1. - format: int32 - type: integer - successThreshold: - description: |- - Minimum consecutive successes for the probe to be considered successful after having failed. - Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. - format: int32 - type: integer - tcpSocket: - description: TCPSocket specifies an action - involving a TCP port. - properties: - host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Number or name of the port to access on the container. - Number must be in the range 1 to 65535. - Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - terminationGracePeriodSeconds: - description: |- - Optional duration in seconds the pod needs to terminate gracefully upon probe failure. - The grace period is the duration in seconds after the processes running in the pod are sent - a termination signal and the time when the processes are forcibly halted with a kill signal. - Set this value longer than the expected cleanup time for your process. - format: int64 - type: integer - timeoutSeconds: - description: |- - Number of seconds after which the probe times out. - Defaults to 1 second. Minimum value is 1. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes - format: int32 - type: integer - type: object - stdin: - description: |- - Whether this container should allocate a buffer for stdin in the container runtime. If this - is not set, reads from stdin in the container will always result in EOF. - Default is false. - type: boolean - stdinOnce: - description: |- - Whether the container runtime should close the stdin channel after it has been opened by - a single attach. When stdin is true the stdin stream will remain open across multiple attach - sessions. - type: boolean - terminationMessagePath: - description: |- - Optional: Path at which the file to which the container's termination message - will be written is mounted into the container's filesystem. - Message written is intended to be brief final status, such as an assertion failure message. - Will be truncated by the node if greater than 4096 bytes. The total message length across - all containers will be limited to 12kb. - Defaults to /dev/termination-log. - type: string - terminationMessagePolicy: - description: |- - Indicate how the termination message should be populated. File will use the contents of - terminationMessagePath to populate the container status message on both success and failure. - FallbackToLogsOnError will use the last chunk of container log output if the termination - message file is empty and the container exited with an error. - type: string - tty: - description: |- - Whether this container should allocate a TTY for itself, also requires 'stdin' to be true. - Default is false. - type: boolean - volumeDevices: - description: volumeDevices is the list of block - devices to be used by the container. - items: - description: volumeDevice describes a mapping - of a raw block device within a container. - properties: - devicePath: - description: devicePath is the path inside - of the container that the device will - be mapped to. - type: string - name: - description: name must match the name of - a persistentVolumeClaim in the pod - type: string - required: - - devicePath - - name - type: object - type: array - x-kubernetes-list-map-keys: - - devicePath - x-kubernetes-list-type: map - volumeMounts: - description: |- - Pod volumes to mount into the container's filesystem. - Cannot be updated. - items: - description: VolumeMount describes a mounting - of a Volume within a container. - properties: - mountPath: - description: |- - Path within the container at which the volume should be mounted. Must - not contain ':'. - type: string - mountPropagation: - description: |- - mountPropagation determines how mounts are propagated from the host - to container and the other way around. - When not set, MountPropagationNone is used. - This field is beta in 1.10. - When RecursiveReadOnly is set to IfPossible or to Enabled, MountPropagation must be None or unspecified - (which defaults to None). - type: string - name: - description: This must match the Name of - a Volume. - type: string - readOnly: - description: |- - Mounted read-only if true, read-write otherwise (false or unspecified). - Defaults to false. - type: boolean - recursiveReadOnly: - description: |- - RecursiveReadOnly specifies whether read-only mounts should be handled - recursively. - - If ReadOnly is false, this field has no meaning and must be unspecified. - - If ReadOnly is true, and this field is set to Disabled, the mount is not made - recursively read-only. If this field is set to IfPossible, the mount is made - recursively read-only, if it is supported by the container runtime. - type: string - subPath: - description: |- - Path within the volume from which the container's volume should be mounted. - Defaults to "" (volume's root). - type: string - subPathExpr: - description: |- - Expanded path within the volume from which the container's volume should be mounted. - Behaves similarly to SubPath but environment variable references $(VAR_NAME) are expanded using the container's environment. - Defaults to "" (volume's root). - SubPathExpr and SubPath are mutually exclusive. - type: string - required: - - mountPath - - name - type: object - type: array - x-kubernetes-list-map-keys: - - mountPath - x-kubernetes-list-type: map - workingDir: - description: |- - Container's working directory. - If not specified, the container runtime's default will be used, which - might be configured in the container image. - Cannot be updated. - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - nodeName: - description: |- - NodeName indicates in which node this pod is scheduled. - If empty, this pod is a candidate for scheduling by the scheduler defined in schedulerName. - Once this field is set, the kubelet for this node becomes responsible for the lifecycle of this pod. - This field should not be used to express a desire for the pod to be scheduled on a specific node. - https://kubernetes. - type: string - nodeSelector: - additionalProperties: - type: string - description: |- - NodeSelector is a selector which must be true for the pod to fit on a node. - Selector which must match a node's labels for the pod to be scheduled on that node. - More info: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/ - type: object - x-kubernetes-map-type: atomic - os: - description: |- - Specifies the OS of the containers in the pod. - Some pod and container fields are restricted if this is set. - - If the OS field is set to linux, the following fields must be unset: - -securityContext.windowsOptions - - If the OS field is set to windows, following fields must be unset: - - spec.hostPID - - spec.hostIPC - - spec.hostUsers - - spec.securityContext.appArmorProfile - - spec.securityContext. - properties: - name: - description: |- - Name is the name of the operating system. The currently supported values are linux and windows. - Additional value may be defined in future and can be one of: - https://github.com/opencontainers/runtime-spec/blob/master/config.md#platform-specific-configuration - Clients should expect to handle additional values and treat unrecognized values in this field as os: null - type: string - required: - - name - type: object - overhead: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: |- - Overhead represents the resource overhead associated with running a pod for a given RuntimeClass. - This field will be autopopulated at admission time by the RuntimeClass admission controller. If - the RuntimeClass admission controller is enabled, overhead must not be set in Pod create requests. - The RuntimeClass admission controller will reject Pod create requests which have the overhead already - set. - type: object - preemptionPolicy: - description: |- - PreemptionPolicy is the Policy for preempting pods with lower priority. - One of Never, PreemptLowerPriority. - Defaults to PreemptLowerPriority if unset. - type: string - priority: - description: |- - The priority value. Various system components use this field to find the - priority of the pod. When Priority Admission Controller is enabled, it - prevents users from setting this field. The admission controller populates - this field from PriorityClassName. - The higher the value, the higher the priority. - format: int32 - type: integer - priorityClassName: - description: |- - If specified, indicates the pod's priority. "system-node-critical" and - "system-cluster-critical" are two special keywords which indicate the - highest priorities with the former being the highest priority. Any other - name must be defined by creating a PriorityClass object with that name. - If not specified, the pod priority will be default or zero if there is no - default. - type: string - readinessGates: - description: |- - If specified, all readiness gates will be evaluated for pod readiness. - A pod is ready when all its containers are ready AND - all conditions specified in the readiness gates have status equal to "True" - More info: https://git.k8s.io/enhancements/keps/sig-network/580-pod-readiness-gates - items: - description: PodReadinessGate contains the reference - to a pod condition - properties: - conditionType: - description: ConditionType refers to a condition - in the pod's condition list with matching type. - type: string - required: - - conditionType - type: object - type: array - x-kubernetes-list-type: atomic - resourceClaims: - description: |- - ResourceClaims defines which ResourceClaims must be allocated - and reserved before the Pod is allowed to start. The resources - will be made available to those containers which consume them - by name. - - This is an alpha field and requires enabling the - DynamicResourceAllocation feature gate. - - This field is immutable. - items: - description: |- - PodResourceClaim references exactly one ResourceClaim, either directly - or by naming a ResourceClaimTemplate which is then turned into a ResourceClaim - for the pod. - - It adds a name to it that uniquely identifies the ResourceClaim inside the Pod. - Containers that need access to the ResourceClaim reference it with this name. - properties: - name: - description: |- - Name uniquely identifies this resource claim inside the pod. - This must be a DNS_LABEL. - type: string - resourceClaimName: - description: |- - ResourceClaimName is the name of a ResourceClaim object in the same - namespace as this pod. - - Exactly one of ResourceClaimName and ResourceClaimTemplateName must - be set. - type: string - resourceClaimTemplateName: - description: |- - ResourceClaimTemplateName is the name of a ResourceClaimTemplate - object in the same namespace as this pod. - - The template will be used to create a new ResourceClaim, which will - be bound to this pod. When this pod is deleted, the ResourceClaim - will also be deleted. - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - restartPolicy: - description: |- - Restart policy for all containers within the pod. - One of Always, OnFailure, Never. In some contexts, only a subset of those values may be permitted. - Default to Always. - More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#restart-policy - type: string - runtimeClassName: - description: |- - RuntimeClassName refers to a RuntimeClass object in the node.k8s.io group, which should be used - to run this pod. If no RuntimeClass resource matches the named class, the pod will not be run. - If unset or empty, the "legacy" RuntimeClass will be used, which is an implicit class with an - empty definition that uses the default runtime handler. - More info: https://git.k8s. - type: string - schedulerName: - description: |- - If specified, the pod will be dispatched by specified scheduler. - If not specified, the pod will be dispatched by default scheduler. - type: string - schedulingGates: - description: |- - SchedulingGates is an opaque list of values that if specified will block scheduling the pod. - If schedulingGates is not empty, the pod will stay in the SchedulingGated state and the - scheduler will not attempt to schedule the pod. - - SchedulingGates can only be set at pod creation time, and be removed only afterwards. - items: - description: PodSchedulingGate is associated to a - Pod to guard its scheduling. - properties: - name: - description: |- - Name of the scheduling gate. - Each scheduling gate must have a unique name field. - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - securityContext: - description: |- - SecurityContext holds pod-level security attributes and common container settings. - Optional: Defaults to empty. See type description for default values of each field. - properties: - appArmorProfile: - description: |- - appArmorProfile is the AppArmor options to use by the containers in this pod. - Note that this field cannot be set when spec.os.name is windows. - properties: - localhostProfile: - description: |- - localhostProfile indicates a profile loaded on the node that should be used. - The profile must be preconfigured on the node to work. - Must match the loaded name of the profile. - Must be set if and only if type is "Localhost". - type: string - type: - description: |- - type indicates which kind of AppArmor profile will be applied. - Valid options are: - Localhost - a profile pre-loaded on the node. - RuntimeDefault - the container runtime's default profile. - Unconfined - no AppArmor enforcement. - type: string - required: - - type - type: object - fsGroup: - description: |- - A special supplemental group that applies to all containers in a pod. - Some volume types allow the Kubelet to change the ownership of that volume - to be owned by the pod: - - 1. The owning GID will be the FSGroup - 2. The setgid bit is set (new files created in the volume will be owned by FSGroup) - 3. - format: int64 - type: integer - fsGroupChangePolicy: - description: |- - fsGroupChangePolicy defines behavior of changing ownership and permission of the volume - before being exposed inside Pod. This field will only apply to - volume types which support fsGroup based ownership(and permissions). - It will have no effect on ephemeral volume types such as: secret, configmaps - and emptydir. - Valid values are "OnRootMismatch" and "Always". If not specified, "Always" is used. - type: string - runAsGroup: - description: |- - The GID to run the entrypoint of the container process. - Uses runtime default if unset. - May also be set in SecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence - for that container. - Note that this field cannot be set when spec.os.name is windows. - format: int64 - type: integer - runAsNonRoot: - description: |- - Indicates that the container must run as a non-root user. - If true, the Kubelet will validate the image at runtime to ensure that it - does not run as UID 0 (root) and fail to start the container if it does. - If unset or false, no such validation will be performed. - May also be set in SecurityContext. - type: boolean - runAsUser: - description: |- - The UID to run the entrypoint of the container process. - Defaults to user specified in image metadata if unspecified. - May also be set in SecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence - for that container. - Note that this field cannot be set when spec.os.name is windows. - format: int64 - type: integer - seLinuxOptions: - description: |- - The SELinux context to be applied to all containers. - If unspecified, the container runtime will allocate a random SELinux context for each - container. May also be set in SecurityContext. If set in - both SecurityContext and PodSecurityContext, the value specified in SecurityContext - takes precedence for that container. - Note that this field cannot be set when spec.os.name is windows. - properties: - level: - description: Level is SELinux level label that - applies to the container. - type: string - role: - description: Role is a SELinux role label that - applies to the container. - type: string - type: - description: Type is a SELinux type label that - applies to the container. - type: string - user: - description: User is a SELinux user label that - applies to the container. - type: string - type: object - seccompProfile: - description: |- - The seccomp options to use by the containers in this pod. - Note that this field cannot be set when spec.os.name is windows. - properties: - localhostProfile: - description: |- - localhostProfile indicates a profile defined in a file on the node should be used. - The profile must be preconfigured on the node to work. - Must be a descending path, relative to the kubelet's configured seccomp profile location. - Must be set if type is "Localhost". Must NOT be set for any other type. - type: string - type: - description: |- - type indicates which kind of seccomp profile will be applied. - Valid options are: - - Localhost - a profile defined in a file on the node should be used. - RuntimeDefault - the container runtime default profile should be used. - Unconfined - no profile should be applied. - type: string - required: - - type - type: object - supplementalGroups: - description: |- - A list of groups applied to the first process run in each container, in - addition to the container's primary GID and fsGroup (if specified). If - the SupplementalGroupsPolicy feature is enabled, the - supplementalGroupsPolicy field determines whether these are in addition - to or instead of any group memberships defined in the container image. - items: - format: int64 - type: integer - type: array - x-kubernetes-list-type: atomic - supplementalGroupsPolicy: - description: |- - Defines how supplemental groups of the first container processes are calculated. - Valid values are "Merge" and "Strict". If not specified, "Merge" is used. - (Alpha) Using the field requires the SupplementalGroupsPolicy feature gate to be enabled - and the container runtime must implement support for this feature. - Note that this field cannot be set when spec.os.name is windows. - type: string - sysctls: - description: |- - Sysctls hold a list of namespaced sysctls used for the pod. Pods with unsupported - sysctls (by the container runtime) might fail to launch. - Note that this field cannot be set when spec.os.name is windows. - items: - description: Sysctl defines a kernel parameter - to be set - properties: - name: - description: Name of a property to set - type: string - value: - description: Value of a property to set - type: string - required: - - name - - value - type: object - type: array - x-kubernetes-list-type: atomic - windowsOptions: - description: |- - The Windows specific settings applied to all containers. - If unspecified, the options within a container's SecurityContext will be used. - If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. - Note that this field cannot be set when spec.os.name is linux. - properties: - gmsaCredentialSpec: - description: |- - GMSACredentialSpec is where the GMSA admission webhook - (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the - GMSA credential spec named by the GMSACredentialSpecName field. - type: string - gmsaCredentialSpecName: - description: GMSACredentialSpecName is the name - of the GMSA credential spec to use. - type: string - hostProcess: - description: |- - HostProcess determines if a container should be run as a 'Host Process' container. - All of a Pod's containers must have the same effective HostProcess value - (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers). - In addition, if HostProcess is true then HostNetwork must also be set to true. - type: boolean - runAsUserName: - description: |- - The UserName in Windows to run the entrypoint of the container process. - Defaults to the user specified in image metadata if unspecified. - May also be set in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext takes precedence. - type: string - type: object - type: object - serviceAccount: - description: |- - DeprecatedServiceAccount is a deprecated alias for ServiceAccountName. - Deprecated: Use serviceAccountName instead. - type: string - serviceAccountName: - description: |- - ServiceAccountName is the name of the ServiceAccount to use to run this pod. - More info: https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/ - type: string - setHostnameAsFQDN: - description: |- - If true the pod's hostname will be configured as the pod's FQDN, rather than the leaf name (the default). - In Linux containers, this means setting the FQDN in the hostname field of the kernel (the nodename field of struct utsname). - type: boolean - shareProcessNamespace: - description: |- - Share a single process namespace between all of the containers in a pod. - When this is set containers will be able to view and signal processes from other containers - in the same pod, and the first process in each container will not be assigned PID 1. - HostPID and ShareProcessNamespace cannot both be set. - Optional: Default to false. - type: boolean - subdomain: - description: |- - If specified, the fully qualified Pod hostname will be "...svc.". - If not specified, the pod will not have a domainname at all. - type: string - terminationGracePeriodSeconds: - description: |- - Optional duration in seconds the pod needs to terminate gracefully. May be decreased in delete request. - Value must be non-negative integer. The value zero indicates stop immediately via - the kill signal (no opportunity to shut down). - If this value is nil, the default grace period will be used instead. - format: int64 - type: integer - tolerations: - description: If specified, the pod's tolerations. - items: - description: |- - The pod this Toleration is attached to tolerates any taint that matches - the triple using the matching operator . - properties: - effect: - description: |- - Effect indicates the taint effect to match. Empty means match all taint effects. - When specified, allowed values are NoSchedule, PreferNoSchedule and NoExecute. - type: string - key: - description: |- - Key is the taint key that the toleration applies to. Empty means match all taint keys. - If the key is empty, operator must be Exists; this combination means to match all values and all keys. - type: string - operator: - description: |- - Operator represents a key's relationship to the value. - Valid operators are Exists and Equal. Defaults to Equal. - Exists is equivalent to wildcard for value, so that a pod can - tolerate all taints of a particular category. - type: string - tolerationSeconds: - description: |- - TolerationSeconds represents the period of time the toleration (which must be - of effect NoExecute, otherwise this field is ignored) tolerates the taint. By default, - it is not set, which means tolerate the taint forever (do not evict). Zero and - negative values will be treated as 0 (evict immediately) by the system. - format: int64 - type: integer - value: - description: |- - Value is the taint value the toleration matches to. - If the operator is Exists, the value should be empty, otherwise just a regular string. - type: string - type: object - type: array - x-kubernetes-list-type: atomic - topologySpreadConstraints: - description: |- - TopologySpreadConstraints describes how a group of pods ought to spread across topology - domains. Scheduler will schedule pods in a way which abides by the constraints. - All topologySpreadConstraints are ANDed. - items: - description: TopologySpreadConstraint specifies how - to spread matching pods among the given topology. - properties: - labelSelector: - description: |- - LabelSelector is used to find matching pods. - Pods that match this label selector are counted to determine the number of pods - in their corresponding topology domain. - properties: - matchExpressions: - description: matchExpressions is a list of - label selector requirements. The requirements - are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the label key that - the selector applies to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - matchLabelKeys: - description: |- - MatchLabelKeys is a set of pod label keys to select the pods over which - spreading will be calculated. The keys are used to lookup values from the - incoming pod labels, those key-value labels are ANDed with labelSelector - to select the group of existing pods over which spreading will be calculated - for the incoming pod. The same key is forbidden to exist in both MatchLabelKeys and LabelSelector. - items: - type: string - type: array - x-kubernetes-list-type: atomic - maxSkew: - description: |- - MaxSkew describes the degree to which pods may be unevenly distributed. - When `whenUnsatisfiable=DoNotSchedule`, it is the maximum permitted difference - between the number of matching pods in the target topology and the global minimum. - The global minimum is the minimum number of matching pods in an eligible domain - or zero if the number of eligible domains is less than MinDomains. - format: int32 - type: integer - minDomains: - description: |- - MinDomains indicates a minimum number of eligible domains. - When the number of eligible domains with matching topology keys is less than minDomains, - Pod Topology Spread treats "global minimum" as 0, and then the calculation of Skew is performed. - And when the number of eligible domains with matching topology keys equals or greater than minDomains, - this value has no effect on scheduling. - format: int32 - type: integer - nodeAffinityPolicy: - description: |- - NodeAffinityPolicy indicates how we will treat Pod's nodeAffinity/nodeSelector - when calculating pod topology spread skew. Options are: - - Honor: only nodes matching nodeAffinity/nodeSelector are included in the calculations. - - Ignore: nodeAffinity/nodeSelector are ignored. All nodes are included in the calculations. - - If this value is nil, the behavior is equivalent to the Honor policy. - type: string - nodeTaintsPolicy: - description: |- - NodeTaintsPolicy indicates how we will treat node taints when calculating - pod topology spread skew. Options are: - - Honor: nodes without taints, along with tainted nodes for which the incoming pod - has a toleration, are included. - - Ignore: node taints are ignored. All nodes are included. - - If this value is nil, the behavior is equivalent to the Ignore policy. - type: string - topologyKey: - description: |- - TopologyKey is the key of node labels. Nodes that have a label with this key - and identical values are considered to be in the same topology. - We consider each as a "bucket", and try to put balanced number - of pods into each bucket. - We define a domain as a particular instance of a topology. - type: string - whenUnsatisfiable: - description: |- - WhenUnsatisfiable indicates how to deal with a pod if it doesn't satisfy - the spread constraint. - - DoNotSchedule (default) tells the scheduler not to schedule it. - - ScheduleAnyway tells the scheduler to schedule the pod in any location, - but giving higher precedence to topologies that would help reduce the - skew. - type: string - required: - - maxSkew - - topologyKey - - whenUnsatisfiable - type: object - type: array - x-kubernetes-list-map-keys: - - topologyKey - - whenUnsatisfiable - x-kubernetes-list-type: map - volumes: - description: |- - List of volumes that can be mounted by containers belonging to the pod. - More info: https://kubernetes.io/docs/concepts/storage/volumes - items: - description: Volume represents a named volume in a - pod that may be accessed by any container in the - pod. - properties: - awsElasticBlockStore: - description: |- - awsElasticBlockStore represents an AWS Disk resource that is attached to a - kubelet's host machine and then exposed to the pod. - More info: https://kubernetes.io/docs/concepts/storage/volumes#awselasticblockstore - properties: - fsType: - description: |- - fsType is the filesystem type of the volume that you want to mount. - Tip: Ensure that the filesystem type is supported by the host operating system. - Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - More info: https://kubernetes.io/docs/concepts/storage/volumes#awselasticblockstore - type: string - partition: - description: |- - partition is the partition in the volume that you want to mount. - If omitted, the default is to mount by volume name. - Examples: For volume /dev/sda1, you specify the partition as "1". - Similarly, the volume partition for /dev/sda is "0" (or you can leave the property empty). - format: int32 - type: integer - readOnly: - description: |- - readOnly value true will force the readOnly setting in VolumeMounts. - More info: https://kubernetes.io/docs/concepts/storage/volumes#awselasticblockstore - type: boolean - volumeID: - description: |- - volumeID is unique ID of the persistent disk resource in AWS (Amazon EBS volume). - More info: https://kubernetes.io/docs/concepts/storage/volumes#awselasticblockstore - type: string - required: - - volumeID - type: object - azureDisk: - description: azureDisk represents an Azure Data - Disk mount on the host and bind mount to the - pod. - properties: - cachingMode: - description: 'cachingMode is the Host Caching - mode: None, Read Only, Read Write.' - type: string - diskName: - description: diskName is the Name of the data - disk in the blob storage - type: string - diskURI: - description: diskURI is the URI of data disk - in the blob storage - type: string - fsType: - default: ext4 - description: |- - fsType is Filesystem type to mount. - Must be a filesystem type supported by the host operating system. - Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - type: string - kind: - description: 'kind expected values are Shared: - multiple blob disks per storage account Dedicated: - single blob disk per storage account Managed: - azure managed data disk (only in managed - availability set). defaults to shared' - type: string - readOnly: - default: false - description: |- - readOnly Defaults to false (read/write). ReadOnly here will force - the ReadOnly setting in VolumeMounts. - type: boolean - required: - - diskName - - diskURI - type: object - azureFile: - description: azureFile represents an Azure File - Service mount on the host and bind mount to - the pod. - properties: - readOnly: - description: |- - readOnly defaults to false (read/write). ReadOnly here will force - the ReadOnly setting in VolumeMounts. - type: boolean - secretName: - description: secretName is the name of secret - that contains Azure Storage Account Name - and Key - type: string - shareName: - description: shareName is the azure share - Name - type: string - required: - - secretName - - shareName - type: object - cephfs: - description: cephFS represents a Ceph FS mount - on the host that shares a pod's lifetime - properties: - monitors: - description: |- - monitors is Required: Monitors is a collection of Ceph monitors - More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it - items: - type: string - type: array - x-kubernetes-list-type: atomic - path: - description: 'path is Optional: Used as the - mounted root, rather than the full Ceph - tree, default is /' - type: string - readOnly: - description: |- - readOnly is Optional: Defaults to false (read/write). ReadOnly here will force - the ReadOnly setting in VolumeMounts. - More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it - type: boolean - secretFile: - description: |- - secretFile is Optional: SecretFile is the path to key ring for User, default is /etc/ceph/user.secret - More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it - type: string - secretRef: - description: |- - secretRef is Optional: SecretRef is reference to the authentication secret for User, default is empty. - More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - type: object - x-kubernetes-map-type: atomic - user: - description: |- - user is optional: User is the rados user name, default is admin - More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it - type: string - required: - - monitors - type: object - cinder: - description: |- - cinder represents a cinder volume attached and mounted on kubelets host machine. - More info: https://examples.k8s.io/mysql-cinder-pd/README.md - properties: - fsType: - description: |- - fsType is the filesystem type to mount. - Must be a filesystem type supported by the host operating system. - Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - More info: https://examples.k8s.io/mysql-cinder-pd/README.md - type: string - readOnly: - description: |- - readOnly defaults to false (read/write). ReadOnly here will force - the ReadOnly setting in VolumeMounts. - More info: https://examples.k8s.io/mysql-cinder-pd/README.md - type: boolean - secretRef: - description: |- - secretRef is optional: points to a secret object containing parameters used to connect - to OpenStack. - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - type: object - x-kubernetes-map-type: atomic - volumeID: - description: |- - volumeID used to identify the volume in cinder. - More info: https://examples.k8s.io/mysql-cinder-pd/README.md - type: string - required: - - volumeID - type: object - configMap: - description: configMap represents a configMap - that should populate this volume - properties: - defaultMode: - description: |- - defaultMode is optional: mode bits used to set permissions on created files by default. - Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. - YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. - Defaults to 0644. - Directories within the path are not affected by this setting. - format: int32 - type: integer - items: - description: |- - items if unspecified, each key-value pair in the Data field of the referenced - ConfigMap will be projected into the volume as a file whose name is the - key and content is the value. If specified, the listed keys will be - projected into the specified paths, and unlisted keys will not be - present. - items: - description: Maps a string key to a path - within a volume. - properties: - key: - description: key is the key to project. - type: string - mode: - description: |- - mode is Optional: mode bits used to set permissions on this file. - Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. - YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. - If not specified, the volume defaultMode will be used. - format: int32 - type: integer - path: - description: |- - path is the relative path of the file to map the key to. - May not be an absolute path. - May not contain the path element '..'. - May not start with the string '..'. - type: string - required: - - key - - path - type: object - type: array - x-kubernetes-list-type: atomic - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: optional specify whether the - ConfigMap or its keys must be defined - type: boolean - type: object - x-kubernetes-map-type: atomic - csi: - description: csi (Container Storage Interface) - represents ephemeral storage that is handled - by certain external CSI drivers (Beta feature). - properties: - driver: - description: |- - driver is the name of the CSI driver that handles this volume. - Consult with your admin for the correct name as registered in the cluster. - type: string - fsType: - description: |- - fsType to mount. Ex. "ext4", "xfs", "ntfs". - If not provided, the empty value is passed to the associated CSI driver - which will determine the default filesystem to apply. - type: string - nodePublishSecretRef: - description: |- - nodePublishSecretRef is a reference to the secret object containing - sensitive information to pass to the CSI driver to complete the CSI - NodePublishVolume and NodeUnpublishVolume calls. - This field is optional, and may be empty if no secret is required. If the - secret object contains more than one secret, all secret references are passed. - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - type: object - x-kubernetes-map-type: atomic - readOnly: - description: |- - readOnly specifies a read-only configuration for the volume. - Defaults to false (read/write). - type: boolean - volumeAttributes: - additionalProperties: - type: string - description: |- - volumeAttributes stores driver-specific properties that are passed to the CSI - driver. Consult your driver's documentation for supported values. - type: object - required: - - driver - type: object - downwardAPI: - description: downwardAPI represents downward API - about the pod that should populate this volume - properties: - defaultMode: - description: |- - Optional: mode bits to use on created files by default. Must be a - Optional: mode bits used to set permissions on created files by default. - Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. - YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. - Defaults to 0644. - Directories within the path are not affected by this setting. - format: int32 - type: integer - items: - description: Items is a list of downward API - volume file - items: - description: DownwardAPIVolumeFile represents - information to create the file containing - the pod field - properties: - fieldRef: - description: 'Required: Selects a field - of the pod: only annotations, labels, - name, namespace and uid are supported.' - properties: - apiVersion: - description: Version of the schema - the FieldPath is written in terms - of, defaults to "v1". - type: string - fieldPath: - description: Path of the field to - select in the specified API version. - type: string - required: - - fieldPath - type: object - x-kubernetes-map-type: atomic - mode: - description: |- - Optional: mode bits used to set permissions on this file, must be an octal value - between 0000 and 0777 or a decimal value between 0 and 511. - YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. - If not specified, the volume defaultMode will be used. - format: int32 - type: integer - path: - description: 'Required: Path is the - relative path name of the file to - be created. Must not be absolute or - contain the ''..'' path. Must be utf-8 - encoded. The first item of the relative - path must not start with ''..''' - type: string - resourceFieldRef: - description: |- - Selects a resource of the container: only resources limits and requests - (limits.cpu, limits.memory, requests.cpu and requests.memory) are currently supported. - properties: - containerName: - description: 'Container name: required - for volumes, optional for env - vars' - type: string - divisor: - anyOf: - - type: integer - - type: string - description: Specifies the output - format of the exposed resources, - defaults to "1" - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - resource: - description: 'Required: resource - to select' - type: string - required: - - resource - type: object - x-kubernetes-map-type: atomic - required: - - path - type: object - type: array - x-kubernetes-list-type: atomic - type: object - emptyDir: - description: |- - emptyDir represents a temporary directory that shares a pod's lifetime. - More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir - properties: - medium: - description: |- - medium represents what type of storage medium should back this directory. - The default is "" which means to use the node's default medium. - Must be an empty string (default) or Memory. - More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir - type: string - sizeLimit: - anyOf: - - type: integer - - type: string - description: |- - sizeLimit is the total amount of local storage required for this EmptyDir volume. - The size limit is also applicable for memory medium. - The maximum usage on memory medium EmptyDir would be the minimum value between - the SizeLimit specified here and the sum of memory limits of all containers in a pod. - The default is nil which means that the limit is undefined. - More info: https://kubernetes. - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - type: object - ephemeral: - description: |- - ephemeral represents a volume that is handled by a cluster storage driver. - The volume's lifecycle is tied to the pod that defines it - it will be created before the pod starts, - and deleted when the pod is removed. - properties: - volumeClaimTemplate: - description: |- - Will be used to create a stand-alone PVC to provision the volume. - The pod in which this EphemeralVolumeSource is embedded will be the - owner of the PVC, i.e. the PVC will be deleted together with the - pod. The name of the PVC will be `-` where - `` is the name from the `PodSpec.Volumes` array - entry. - properties: - metadata: - description: |- - May contain labels and annotations that will be copied into the PVC - when creating it. No other fields are allowed and will be rejected during - validation. - properties: - annotations: - additionalProperties: - type: string - type: object - finalizers: - items: - type: string - type: array - labels: - additionalProperties: - type: string - type: object - name: - type: string - namespace: - type: string - type: object - spec: - description: |- - The specification for the PersistentVolumeClaim. The entire content is - copied unchanged into the PVC that gets created from this - template. The same fields as in a PersistentVolumeClaim - are also valid here. - properties: - accessModes: - description: |- - accessModes contains the desired access modes the volume should have. - More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#access-modes-1 - items: - type: string - type: array - x-kubernetes-list-type: atomic - dataSource: - description: |- - dataSource field can be used to specify either: - * An existing VolumeSnapshot object (snapshot.storage.k8s.io/VolumeSnapshot) - * An existing PVC (PersistentVolumeClaim) - If the provisioner or an external controller can support the specified data source, - it will create a new volume based on the contents of the specified data source. - properties: - apiGroup: - description: |- - APIGroup is the group for the resource being referenced. - If APIGroup is not specified, the specified Kind must be in the core API group. - For any other third-party types, APIGroup is required. - type: string - kind: - description: Kind is the type - of resource being referenced - type: string - name: - description: Name is the name - of resource being referenced - type: string - required: - - kind - - name - type: object - x-kubernetes-map-type: atomic - dataSourceRef: - description: |- - dataSourceRef specifies the object from which to populate the volume with data, if a non-empty - volume is desired. This may be any object from a non-empty API group (non - core object) or a PersistentVolumeClaim object. - When this field is specified, volume binding will only succeed if the type of - the specified object matches some installed volume populator or dynamic - provisioner. - properties: - apiGroup: - description: |- - APIGroup is the group for the resource being referenced. - If APIGroup is not specified, the specified Kind must be in the core API group. - For any other third-party types, APIGroup is required. - type: string - kind: - description: Kind is the type - of resource being referenced - type: string - name: - description: Name is the name - of resource being referenced - type: string - namespace: - description: |- - Namespace is the namespace of resource being referenced - Note that when a namespace is specified, a gateway.networking.k8s.io/ReferenceGrant object is required in the referent namespace to allow that namespace's owner to accept the reference. See the ReferenceGrant documentation for details. - (Alpha) This field requires the CrossNamespaceVolumeDataSource feature gate to be enabled. - type: string - required: - - kind - - name - type: object - resources: - description: |- - resources represents the minimum resources the volume should have. - If RecoverVolumeExpansionFailure feature is enabled users are allowed to specify resource requirements - that are lower than previous value but must still be higher than capacity recorded in the - status field of the claim. - More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#resources - properties: - limits: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: |- - Limits describes the maximum amount of compute resources allowed. - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - type: object - requests: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: |- - Requests describes the minimum amount of compute resources required. - If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, - otherwise to an implementation-defined value. Requests cannot exceed Limits. - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - type: object - type: object - selector: - description: selector is a label query - over volumes to consider for binding. - properties: - matchExpressions: - description: matchExpressions - is a list of label selector - requirements. The requirements - are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the - label key that the selector - applies to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - storageClassName: - description: |- - storageClassName is the name of the StorageClass required by the claim. - More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#class-1 - type: string - volumeAttributesClassName: - description: |- - volumeAttributesClassName may be used to set the VolumeAttributesClass used by this claim. - If specified, the CSI driver will create or update the volume with the attributes defined - in the corresponding VolumeAttributesClass. This has a different purpose than storageClassName, - it can be changed after the claim is created. - type: string - volumeMode: - description: |- - volumeMode defines what type of volume is required by the claim. - Value of Filesystem is implied when not included in claim spec. - type: string - volumeName: - description: volumeName is the binding - reference to the PersistentVolume - backing this claim. - type: string - type: object - required: - - spec - type: object - type: object - fc: - description: fc represents a Fibre Channel resource - that is attached to a kubelet's host machine - and then exposed to the pod. - properties: - fsType: - description: |- - fsType is the filesystem type to mount. - Must be a filesystem type supported by the host operating system. - Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - type: string - lun: - description: 'lun is Optional: FC target lun - number' - format: int32 - type: integer - readOnly: - description: |- - readOnly is Optional: Defaults to false (read/write). ReadOnly here will force - the ReadOnly setting in VolumeMounts. - type: boolean - targetWWNs: - description: 'targetWWNs is Optional: FC target - worldwide names (WWNs)' - items: - type: string - type: array - x-kubernetes-list-type: atomic - wwids: - description: |- - wwids Optional: FC volume world wide identifiers (wwids) - Either wwids or combination of targetWWNs and lun must be set, but not both simultaneously. - items: - type: string - type: array - x-kubernetes-list-type: atomic - type: object - flexVolume: - description: |- - flexVolume represents a generic volume resource that is - provisioned/attached using an exec based plugin. - properties: - driver: - description: driver is the name of the driver - to use for this volume. - type: string - fsType: - description: |- - fsType is the filesystem type to mount. - Must be a filesystem type supported by the host operating system. - Ex. "ext4", "xfs", "ntfs". The default filesystem depends on FlexVolume script. - type: string - options: - additionalProperties: - type: string - description: 'options is Optional: this field - holds extra command options if any.' - type: object - readOnly: - description: |- - readOnly is Optional: defaults to false (read/write). ReadOnly here will force - the ReadOnly setting in VolumeMounts. - type: boolean - secretRef: - description: |- - secretRef is Optional: secretRef is reference to the secret object containing - sensitive information to pass to the plugin scripts. This may be - empty if no secret object is specified. If the secret object - contains more than one secret, all secrets are passed to the plugin - scripts. - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - type: object - x-kubernetes-map-type: atomic - required: - - driver - type: object - flocker: - description: flocker represents a Flocker volume - attached to a kubelet's host machine. This depends - on the Flocker control service being running - properties: - datasetName: - description: |- - datasetName is Name of the dataset stored as metadata -> name on the dataset for Flocker - should be considered as deprecated - type: string - datasetUUID: - description: datasetUUID is the UUID of the - dataset. This is unique identifier of a - Flocker dataset - type: string - type: object - gcePersistentDisk: - description: |- - gcePersistentDisk represents a GCE Disk resource that is attached to a - kubelet's host machine and then exposed to the pod. - More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk - properties: - fsType: - description: |- - fsType is filesystem type of the volume that you want to mount. - Tip: Ensure that the filesystem type is supported by the host operating system. - Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk - type: string - partition: - description: |- - partition is the partition in the volume that you want to mount. - If omitted, the default is to mount by volume name. - Examples: For volume /dev/sda1, you specify the partition as "1". - Similarly, the volume partition for /dev/sda is "0" (or you can leave the property empty). - More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk - format: int32 - type: integer - pdName: - description: |- - pdName is unique name of the PD resource in GCE. Used to identify the disk in GCE. - More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk - type: string - readOnly: - description: |- - readOnly here will force the ReadOnly setting in VolumeMounts. - Defaults to false. - More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk - type: boolean - required: - - pdName - type: object - gitRepo: - description: |- - gitRepo represents a git repository at a particular revision. - DEPRECATED: GitRepo is deprecated. To provision a container with a git repo, mount an - EmptyDir into an InitContainer that clones the repo using git, then mount the EmptyDir - into the Pod's container. - properties: - directory: - description: |- - directory is the target directory name. - Must not contain or start with '..'. If '.' is supplied, the volume directory will be the - git repository. Otherwise, if specified, the volume will contain the git repository in - the subdirectory with the given name. - type: string - repository: - description: repository is the URL - type: string - revision: - description: revision is the commit hash for - the specified revision. - type: string - required: - - repository - type: object - glusterfs: - description: |- - glusterfs represents a Glusterfs mount on the host that shares a pod's lifetime. - More info: https://examples.k8s.io/volumes/glusterfs/README.md - properties: - endpoints: - description: |- - endpoints is the endpoint name that details Glusterfs topology. - More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod - type: string - path: - description: |- - path is the Glusterfs volume path. - More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod - type: string - readOnly: - description: |- - readOnly here will force the Glusterfs volume to be mounted with read-only permissions. - Defaults to false. - More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod - type: boolean - required: - - endpoints - - path - type: object - hostPath: - description: |- - hostPath represents a pre-existing file or directory on the host - machine that is directly exposed to the container. This is generally - used for system agents or other privileged things that are allowed - to see the host machine. Most containers will NOT need this. - More info: https://kubernetes.io/docs/concepts/storage/volumes#hostpath - properties: - path: - description: |- - path of the directory on the host. - If the path is a symlink, it will follow the link to the real path. - More info: https://kubernetes.io/docs/concepts/storage/volumes#hostpath - type: string - type: - description: |- - type for HostPath Volume - Defaults to "" - More info: https://kubernetes.io/docs/concepts/storage/volumes#hostpath - type: string - required: - - path - type: object - image: - description: |- - image represents an OCI object (a container image or artifact) pulled and mounted on the kubelet's host machine. - The volume is resolved at pod startup depending on which PullPolicy value is provided: - - - Always: the kubelet always attempts to pull the reference. Container creation will fail If the pull fails. - - Never: the kubelet never pulls the reference and only uses a local image or artifact. - properties: - pullPolicy: - description: |- - Policy for pulling OCI objects. Possible values are: - Always: the kubelet always attempts to pull the reference. Container creation will fail If the pull fails. - Never: the kubelet never pulls the reference and only uses a local image or artifact. Container creation will fail if the reference isn't present. - IfNotPresent: the kubelet pulls if the reference isn't already present on disk. - type: string - reference: - description: |- - Required: Image or artifact reference to be used. - Behaves in the same way as pod.spec.containers[*].image. - Pull secrets will be assembled in the same way as for the container image by looking up node credentials, SA image pull secrets, and pod spec image pull secrets. - More info: https://kubernetes. - type: string - type: object - iscsi: - description: |- - iscsi represents an ISCSI Disk resource that is attached to a - kubelet's host machine and then exposed to the pod. - More info: https://examples.k8s.io/volumes/iscsi/README.md - properties: - chapAuthDiscovery: - description: chapAuthDiscovery defines whether - support iSCSI Discovery CHAP authentication - type: boolean - chapAuthSession: - description: chapAuthSession defines whether - support iSCSI Session CHAP authentication - type: boolean - fsType: - description: |- - fsType is the filesystem type of the volume that you want to mount. - Tip: Ensure that the filesystem type is supported by the host operating system. - Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - More info: https://kubernetes.io/docs/concepts/storage/volumes#iscsi - type: string - initiatorName: - description: |- - initiatorName is the custom iSCSI Initiator Name. - If initiatorName is specified with iscsiInterface simultaneously, new iSCSI interface - : will be created for the connection. - type: string - iqn: - description: iqn is the target iSCSI Qualified - Name. - type: string - iscsiInterface: - default: default - description: |- - iscsiInterface is the interface Name that uses an iSCSI transport. - Defaults to 'default' (tcp). - type: string - lun: - description: lun represents iSCSI Target Lun - number. - format: int32 - type: integer - portals: - description: |- - portals is the iSCSI Target Portal List. The portal is either an IP or ip_addr:port if the port - is other than default (typically TCP ports 860 and 3260). - items: - type: string - type: array - x-kubernetes-list-type: atomic - readOnly: - description: |- - readOnly here will force the ReadOnly setting in VolumeMounts. - Defaults to false. - type: boolean - secretRef: - description: secretRef is the CHAP Secret - for iSCSI target and initiator authentication - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - type: object - x-kubernetes-map-type: atomic - targetPortal: - description: |- - targetPortal is iSCSI Target Portal. The Portal is either an IP or ip_addr:port if the port - is other than default (typically TCP ports 860 and 3260). - type: string - required: - - iqn - - lun - - targetPortal - type: object - name: - description: |- - name of the volume. - Must be a DNS_LABEL and unique within the pod. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - nfs: - description: |- - nfs represents an NFS mount on the host that shares a pod's lifetime - More info: https://kubernetes.io/docs/concepts/storage/volumes#nfs - properties: - path: - description: |- - path that is exported by the NFS server. - More info: https://kubernetes.io/docs/concepts/storage/volumes#nfs - type: string - readOnly: - description: |- - readOnly here will force the NFS export to be mounted with read-only permissions. - Defaults to false. - More info: https://kubernetes.io/docs/concepts/storage/volumes#nfs - type: boolean - server: - description: |- - server is the hostname or IP address of the NFS server. - More info: https://kubernetes.io/docs/concepts/storage/volumes#nfs - type: string - required: - - path - - server - type: object - persistentVolumeClaim: - description: |- - persistentVolumeClaimVolumeSource represents a reference to a - PersistentVolumeClaim in the same namespace. - More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#persistentvolumeclaims - properties: - claimName: - description: |- - claimName is the name of a PersistentVolumeClaim in the same namespace as the pod using this volume. - More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#persistentvolumeclaims - type: string - readOnly: - description: |- - readOnly Will force the ReadOnly setting in VolumeMounts. - Default false. - type: boolean - required: - - claimName - type: object - photonPersistentDisk: - description: photonPersistentDisk represents a - PhotonController persistent disk attached and - mounted on kubelets host machine - properties: - fsType: - description: |- - fsType is the filesystem type to mount. - Must be a filesystem type supported by the host operating system. - Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - type: string - pdID: - description: pdID is the ID that identifies - Photon Controller persistent disk - type: string - required: - - pdID - type: object - portworxVolume: - description: portworxVolume represents a portworx - volume attached and mounted on kubelets host - machine - properties: - fsType: - description: |- - fSType represents the filesystem type to mount - Must be a filesystem type supported by the host operating system. - Ex. "ext4", "xfs". Implicitly inferred to be "ext4" if unspecified. - type: string - readOnly: - description: |- - readOnly defaults to false (read/write). ReadOnly here will force - the ReadOnly setting in VolumeMounts. - type: boolean - volumeID: - description: volumeID uniquely identifies - a Portworx volume - type: string - required: - - volumeID - type: object - projected: - description: projected items for all in one resources - secrets, configmaps, and downward API - properties: - defaultMode: - description: |- - defaultMode are the mode bits used to set permissions on created files by default. - Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. - YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. - Directories within the path are not affected by this setting. - format: int32 - type: integer - sources: - description: |- - sources is the list of volume projections. Each entry in this list - handles one source. - items: - description: |- - Projection that may be projected along with other supported volume types. - Exactly one of these fields must be set. - properties: - clusterTrustBundle: - description: |- - ClusterTrustBundle allows a pod to access the `.spec.trustBundle` field - of ClusterTrustBundle objects in an auto-updating file. - - Alpha, gated by the ClusterTrustBundleProjection feature gate. - - ClusterTrustBundle objects can either be selected by name, or by the - combination of signer name and a label selector. - properties: - labelSelector: - description: |- - Select all ClusterTrustBundles that match this label selector. Only has - effect if signerName is set. Mutually-exclusive with name. If unset, - interpreted as "match nothing". If set but empty, interpreted as "match - everything". - properties: - matchExpressions: - description: matchExpressions - is a list of label selector - requirements. The requirements - are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the - label key that the selector - applies to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - name: - description: |- - Select a single ClusterTrustBundle by object name. Mutually-exclusive - with signerName and labelSelector. - type: string - optional: - description: |- - If true, don't block pod startup if the referenced ClusterTrustBundle(s) - aren't available. If using name, then the named ClusterTrustBundle is - allowed not to exist. If using signerName, then the combination of - signerName and labelSelector is allowed to match zero - ClusterTrustBundles. - type: boolean - path: - description: Relative path from - the volume root to write the bundle. - type: string - signerName: - description: |- - Select all ClusterTrustBundles that match this signer name. - Mutually-exclusive with name. The contents of all selected - ClusterTrustBundles will be unified and deduplicated. - type: string - required: - - path - type: object - configMap: - description: configMap information about - the configMap data to project - properties: - items: - description: |- - items if unspecified, each key-value pair in the Data field of the referenced - ConfigMap will be projected into the volume as a file whose name is the - key and content is the value. If specified, the listed keys will be - projected into the specified paths, and unlisted keys will not be - present. - items: - description: Maps a string key - to a path within a volume. - properties: - key: - description: key is the key - to project. - type: string - mode: - description: |- - mode is Optional: mode bits used to set permissions on this file. - Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. - YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. - If not specified, the volume defaultMode will be used. - format: int32 - type: integer - path: - description: |- - path is the relative path of the file to map the key to. - May not be an absolute path. - May not contain the path element '..'. - May not start with the string '..'. - type: string - required: - - key - - path - type: object - type: array - x-kubernetes-list-type: atomic - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: optional specify whether - the ConfigMap or its keys must - be defined - type: boolean - type: object - x-kubernetes-map-type: atomic - downwardAPI: - description: downwardAPI information - about the downwardAPI data to project - properties: - items: - description: Items is a list of - DownwardAPIVolume file - items: - description: DownwardAPIVolumeFile - represents information to create - the file containing the pod - field - properties: - fieldRef: - description: 'Required: Selects - a field of the pod: only - annotations, labels, name, - namespace and uid are supported.' - properties: - apiVersion: - description: Version of - the schema the FieldPath - is written in terms - of, defaults to "v1". - type: string - fieldPath: - description: Path of the - field to select in the - specified API version. - type: string - required: - - fieldPath - type: object - x-kubernetes-map-type: atomic - mode: - description: |- - Optional: mode bits used to set permissions on this file, must be an octal value - between 0000 and 0777 or a decimal value between 0 and 511. - YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. - If not specified, the volume defaultMode will be used. - format: int32 - type: integer - path: - description: 'Required: Path - is the relative path name - of the file to be created. - Must not be absolute or - contain the ''..'' path. - Must be utf-8 encoded. The - first item of the relative - path must not start with - ''..''' - type: string - resourceFieldRef: - description: |- - Selects a resource of the container: only resources limits and requests - (limits.cpu, limits.memory, requests.cpu and requests.memory) are currently supported. - properties: - containerName: - description: 'Container - name: required for volumes, - optional for env vars' - type: string - divisor: - anyOf: - - type: integer - - type: string - description: Specifies - the output format of - the exposed resources, - defaults to "1" - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - resource: - description: 'Required: - resource to select' - type: string - required: - - resource - type: object - x-kubernetes-map-type: atomic - required: - - path - type: object - type: array - x-kubernetes-list-type: atomic - type: object - secret: - description: secret information about - the secret data to project - properties: - items: - description: |- - items if unspecified, each key-value pair in the Data field of the referenced - Secret will be projected into the volume as a file whose name is the - key and content is the value. If specified, the listed keys will be - projected into the specified paths, and unlisted keys will not be - present. - items: - description: Maps a string key - to a path within a volume. - properties: - key: - description: key is the key - to project. - type: string - mode: - description: |- - mode is Optional: mode bits used to set permissions on this file. - Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. - YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. - If not specified, the volume defaultMode will be used. - format: int32 - type: integer - path: - description: |- - path is the relative path of the file to map the key to. - May not be an absolute path. - May not contain the path element '..'. - May not start with the string '..'. - type: string - required: - - key - - path - type: object - type: array - x-kubernetes-list-type: atomic - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - optional: - description: optional field specify - whether the Secret or its key - must be defined - type: boolean - type: object - x-kubernetes-map-type: atomic - serviceAccountToken: - description: serviceAccountToken is - information about the serviceAccountToken - data to project - properties: - audience: - description: |- - audience is the intended audience of the token. A recipient of a token - must identify itself with an identifier specified in the audience of the - token, and otherwise should reject the token. The audience defaults to the - identifier of the apiserver. - type: string - expirationSeconds: - description: |- - expirationSeconds is the requested duration of validity of the service - account token. As the token approaches expiration, the kubelet volume - plugin will proactively rotate the service account token. The kubelet will - start trying to rotate the token if the token is older than 80 percent of - its time to live or if the token is older than 24 hours.Defaults to 1 hour - and must be at least 10 minutes. - format: int64 - type: integer - path: - description: |- - path is the path relative to the mount point of the file to project the - token into. - type: string - required: - - path - type: object - type: object - type: array - x-kubernetes-list-type: atomic - type: object - quobyte: - description: quobyte represents a Quobyte mount - on the host that shares a pod's lifetime - properties: - group: - description: |- - group to map volume access to - Default is no group - type: string - readOnly: - description: |- - readOnly here will force the Quobyte volume to be mounted with read-only permissions. - Defaults to false. - type: boolean - registry: - description: |- - registry represents a single or multiple Quobyte Registry services - specified as a string as host:port pair (multiple entries are separated with commas) - which acts as the central registry for volumes - type: string - tenant: - description: |- - tenant owning the given Quobyte volume in the Backend - Used with dynamically provisioned Quobyte volumes, value is set by the plugin - type: string - user: - description: |- - user to map volume access to - Defaults to serivceaccount user - type: string - volume: - description: volume is a string that references - an already created Quobyte volume by name. - type: string - required: - - registry - - volume - type: object - rbd: - description: |- - rbd represents a Rados Block Device mount on the host that shares a pod's lifetime. - More info: https://examples.k8s.io/volumes/rbd/README.md - properties: - fsType: - description: |- - fsType is the filesystem type of the volume that you want to mount. - Tip: Ensure that the filesystem type is supported by the host operating system. - Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - More info: https://kubernetes.io/docs/concepts/storage/volumes#rbd - type: string - image: - description: |- - image is the rados image name. - More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it - type: string - keyring: - default: /etc/ceph/keyring - description: |- - keyring is the path to key ring for RBDUser. - Default is /etc/ceph/keyring. - More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it - type: string - monitors: - description: |- - monitors is a collection of Ceph monitors. - More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it - items: - type: string - type: array - x-kubernetes-list-type: atomic - pool: - default: rbd - description: |- - pool is the rados pool name. - Default is rbd. - More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it - type: string - readOnly: - description: |- - readOnly here will force the ReadOnly setting in VolumeMounts. - Defaults to false. - More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it - type: boolean - secretRef: - description: |- - secretRef is name of the authentication secret for RBDUser. If provided - overrides keyring. - Default is nil. - More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - type: object - x-kubernetes-map-type: atomic - user: - default: admin - description: |- - user is the rados user name. - Default is admin. - More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it - type: string - required: - - image - - monitors - type: object - scaleIO: - description: scaleIO represents a ScaleIO persistent - volume attached and mounted on Kubernetes nodes. - properties: - fsType: - default: xfs - description: |- - fsType is the filesystem type to mount. - Must be a filesystem type supported by the host operating system. - Ex. "ext4", "xfs", "ntfs". - Default is "xfs". - type: string - gateway: - description: gateway is the host address of - the ScaleIO API Gateway. - type: string - protectionDomain: - description: protectionDomain is the name - of the ScaleIO Protection Domain for the - configured storage. - type: string - readOnly: - description: |- - readOnly Defaults to false (read/write). ReadOnly here will force - the ReadOnly setting in VolumeMounts. - type: boolean - secretRef: - description: |- - secretRef references to the secret for ScaleIO user and other - sensitive information. If this is not provided, Login operation will fail. - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - type: object - x-kubernetes-map-type: atomic - sslEnabled: - description: sslEnabled Flag enable/disable - SSL communication with Gateway, default - false - type: boolean - storageMode: - default: ThinProvisioned - description: |- - storageMode indicates whether the storage for a volume should be ThickProvisioned or ThinProvisioned. - Default is ThinProvisioned. - type: string - storagePool: - description: storagePool is the ScaleIO Storage - Pool associated with the protection domain. - type: string - system: - description: system is the name of the storage - system as configured in ScaleIO. - type: string - volumeName: - description: |- - volumeName is the name of a volume already created in the ScaleIO system - that is associated with this volume source. - type: string - required: - - gateway - - secretRef - - system - type: object - secret: - description: |- - secret represents a secret that should populate this volume. - More info: https://kubernetes.io/docs/concepts/storage/volumes#secret - properties: - defaultMode: - description: |- - defaultMode is Optional: mode bits used to set permissions on created files by default. - Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. - YAML accepts both octal and decimal values, JSON requires decimal values - for mode bits. Defaults to 0644. - Directories within the path are not affected by this setting. - format: int32 - type: integer - items: - description: |- - items If unspecified, each key-value pair in the Data field of the referenced - Secret will be projected into the volume as a file whose name is the - key and content is the value. If specified, the listed keys will be - projected into the specified paths, and unlisted keys will not be - present. - items: - description: Maps a string key to a path - within a volume. - properties: - key: - description: key is the key to project. - type: string - mode: - description: |- - mode is Optional: mode bits used to set permissions on this file. - Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. - YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. - If not specified, the volume defaultMode will be used. - format: int32 - type: integer - path: - description: |- - path is the relative path of the file to map the key to. - May not be an absolute path. - May not contain the path element '..'. - May not start with the string '..'. - type: string - required: - - key - - path - type: object - type: array - x-kubernetes-list-type: atomic - optional: - description: optional field specify whether - the Secret or its keys must be defined - type: boolean - secretName: - description: |- - secretName is the name of the secret in the pod's namespace to use. - More info: https://kubernetes.io/docs/concepts/storage/volumes#secret - type: string - type: object - storageos: - description: storageOS represents a StorageOS - volume attached and mounted on Kubernetes nodes. - properties: - fsType: - description: |- - fsType is the filesystem type to mount. - Must be a filesystem type supported by the host operating system. - Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - type: string - readOnly: - description: |- - readOnly defaults to false (read/write). ReadOnly here will force - the ReadOnly setting in VolumeMounts. - type: boolean - secretRef: - description: |- - secretRef specifies the secret to use for obtaining the StorageOS API - credentials. If not specified, default values will be attempted. - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - type: object - x-kubernetes-map-type: atomic - volumeName: - description: |- - volumeName is the human-readable name of the StorageOS volume. Volume - names are only unique within a namespace. - type: string - volumeNamespace: - description: |- - volumeNamespace specifies the scope of the volume within StorageOS. If no - namespace is specified then the Pod's namespace will be used. This allows the - Kubernetes name scoping to be mirrored within StorageOS for tighter integration. - Set VolumeName to any name to override the default behaviour. - Set to "default" if you are not using namespaces within StorageOS. - type: string - type: object - vsphereVolume: - description: vsphereVolume represents a vSphere - volume attached and mounted on kubelets host - machine - properties: - fsType: - description: |- - fsType is filesystem type to mount. - Must be a filesystem type supported by the host operating system. - Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - type: string - storagePolicyID: - description: storagePolicyID is the storage - Policy Based Management (SPBM) profile ID - associated with the StoragePolicyName. - type: string - storagePolicyName: - description: storagePolicyName is the storage - Policy Based Management (SPBM) profile name. - type: string - volumePath: - description: volumePath is the path that identifies - vSphere volume vmdk - type: string - required: - - volumePath - type: object - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map - required: - - containers - type: object - type: object - type: object - type: object - required: - - xgbReplicaSpecs - type: object - status: - description: JobStatus represents the current observed state of the training - Job. - properties: - completionTime: - description: |- - Represents time when the job was completed. It is not guaranteed to - be set in happens-before order across separate operations. - It is represented in RFC3339 form and is in UTC. - format: date-time - type: string - conditions: - description: Conditions is an array of current observed job conditions. - items: - description: JobCondition describes the state of the job at a certain - point. - properties: - lastTransitionTime: - description: Last time the condition transitioned from one status - to another. - format: date-time - type: string - lastUpdateTime: - description: The last time this condition was updated. - format: date-time - type: string - message: - description: A human readable message indicating details about - the transition. - type: string - reason: - description: The reason for the condition's last transition. - type: string - status: - description: Status of the condition, one of True, False, Unknown. - type: string - type: - description: Type of job condition. - type: string - required: - - status - - type - type: object - type: array - lastReconcileTime: - description: |- - Represents last time when the job was reconciled. It is not guaranteed to - be set in happens-before order across separate operations. - It is represented in RFC3339 form and is in UTC. - format: date-time - type: string - replicaStatuses: - additionalProperties: - description: ReplicaStatus represents the current observed state - of the replica. - properties: - active: - description: The number of actively running pods. - format: int32 - type: integer - failed: - description: The number of pods which reached phase Failed. - format: int32 - type: integer - labelSelector: - description: 'Deprecated: Use Selector instead' - properties: - matchExpressions: - description: matchExpressions is a list of label selector - requirements. The requirements are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the label key that the selector - applies to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - selector: - description: |- - A Selector is a label query over a set of resources. The result of matchLabels and - matchExpressions are ANDed. An empty Selector matches all objects. A null - Selector matches no objects. - type: string - succeeded: - description: The number of pods which reached phase Succeeded. - format: int32 - type: integer - type: object - description: |- - ReplicaStatuses is map of ReplicaType and ReplicaStatus, - specifies the status of each replica. - type: object - startTime: - description: |- - Represents time when the job was acknowledged by the job controller. - It is not guaranteed to be set in happens-before order across separate operations. - It is represented in RFC3339 form and is in UTC. - format: date-time - type: string - type: object - type: object - served: true - storage: true - subresources: - status: {} diff --git a/manifests/base/crds/kustomization.yaml b/manifests/base/crds/kustomization.yaml deleted file mode 100644 index 16d824bc23..0000000000 --- a/manifests/base/crds/kustomization.yaml +++ /dev/null @@ -1,9 +0,0 @@ -apiVersion: kustomize.config.k8s.io/v1beta1 -kind: Kustomization -resources: - - kubeflow.org_tfjobs.yaml - - kubeflow.org_pytorchjobs.yaml - - kubeflow.org_xgboostjobs.yaml - - kubeflow.org_mpijobs.yaml - - kubeflow.org_paddlejobs.yaml - - kubeflow.org_jaxjobs.yaml diff --git a/manifests/base/deployment.yaml b/manifests/base/deployment.yaml deleted file mode 100644 index b38295a4a0..0000000000 --- a/manifests/base/deployment.yaml +++ /dev/null @@ -1,64 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: training-operator - labels: - control-plane: kubeflow-training-operator -spec: - selector: - matchLabels: - control-plane: kubeflow-training-operator - replicas: 1 - template: - metadata: - labels: - control-plane: kubeflow-training-operator - annotations: - sidecar.istio.io/inject: "false" - spec: - containers: - - command: - - /manager - image: kubeflow/training-operator - name: training-operator - ports: - - containerPort: 8080 - - containerPort: 9443 - name: webhook-server - protocol: TCP - env: - - name: MY_POD_NAMESPACE - valueFrom: - fieldRef: - fieldPath: metadata.namespace - - name: MY_POD_NAME - valueFrom: - fieldRef: - fieldPath: metadata.name - securityContext: - allowPrivilegeEscalation: false - volumeMounts: - - mountPath: /tmp/k8s-webhook-server/serving-certs - name: cert - readOnly: true - livenessProbe: - httpGet: - path: /healthz - port: 8081 - initialDelaySeconds: 15 - periodSeconds: 20 - timeoutSeconds: 3 - readinessProbe: - httpGet: - path: /readyz - port: 8081 - initialDelaySeconds: 10 - periodSeconds: 15 - timeoutSeconds: 3 - serviceAccountName: training-operator - terminationGracePeriodSeconds: 10 - volumes: - - name: cert - secret: - defaultMode: 420 - secretName: training-operator-webhook-cert diff --git a/manifests/base/kustomization.yaml b/manifests/base/kustomization.yaml deleted file mode 100644 index b140be1441..0000000000 --- a/manifests/base/kustomization.yaml +++ /dev/null @@ -1,10 +0,0 @@ -apiVersion: kustomize.config.k8s.io/v1beta1 -kind: Kustomization -resources: - - ./crds - - ./rbac/cluster-role-binding.yaml - - ./rbac/role.yaml - - ./rbac/service-account.yaml - - ./webhook - - service.yaml - - deployment.yaml diff --git a/manifests/base/rbac/cluster-role-binding.yaml b/manifests/base/rbac/cluster-role-binding.yaml deleted file mode 100644 index 97b47bad69..0000000000 --- a/manifests/base/rbac/cluster-role-binding.yaml +++ /dev/null @@ -1,14 +0,0 @@ ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding -metadata: - labels: - app: training-operator - name: training-operator -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: ClusterRole - name: training-operator -subjects: -- kind: ServiceAccount - name: training-operator diff --git a/manifests/base/rbac/role.yaml b/manifests/base/rbac/role.yaml deleted file mode 100644 index ae91d43ba8..0000000000 --- a/manifests/base/rbac/role.yaml +++ /dev/null @@ -1,292 +0,0 @@ ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - name: training-operator -rules: -- apiGroups: - - "" - resources: - - configmaps - verbs: - - create - - list - - update - - watch -- apiGroups: - - "" - resources: - - events - verbs: - - create - - delete - - get - - list - - patch - - update - - watch -- apiGroups: - - "" - resources: - - pods - verbs: - - create - - delete - - get - - list - - patch - - update - - watch -- apiGroups: - - "" - resources: - - pods/exec - verbs: - - create -- apiGroups: - - "" - resources: - - secrets - verbs: - - get - - list - - update - - watch -- apiGroups: - - "" - resources: - - serviceaccounts - verbs: - - create - - get - - list - - watch -- apiGroups: - - "" - resources: - - services - verbs: - - create - - delete - - get - - list - - watch -- apiGroups: - - admissionregistration.k8s.io - resources: - - validatingwebhookconfigurations - verbs: - - get - - list - - update - - watch -- apiGroups: - - autoscaling - resources: - - horizontalpodautoscalers - verbs: - - create - - delete - - get - - list - - patch - - update - - watch -- apiGroups: - - kubeflow.org - resources: - - jaxjobs - verbs: - - create - - delete - - get - - list - - patch - - update - - watch -- apiGroups: - - kubeflow.org - resources: - - jaxjobs/finalizers - verbs: - - update -- apiGroups: - - kubeflow.org - resources: - - jaxjobs/status - verbs: - - get - - patch - - update -- apiGroups: - - kubeflow.org - resources: - - mpijobs - verbs: - - create - - delete - - get - - list - - patch - - update - - watch -- apiGroups: - - kubeflow.org - resources: - - mpijobs/finalizers - verbs: - - update -- apiGroups: - - kubeflow.org - resources: - - mpijobs/status - verbs: - - get - - patch - - update -- apiGroups: - - kubeflow.org - resources: - - paddlejobs - verbs: - - create - - delete - - get - - list - - patch - - update - - watch -- apiGroups: - - kubeflow.org - resources: - - paddlejobs/finalizers - verbs: - - update -- apiGroups: - - kubeflow.org - resources: - - paddlejobs/status - verbs: - - get - - patch - - update -- apiGroups: - - kubeflow.org - resources: - - pytorchjobs - verbs: - - create - - delete - - get - - list - - patch - - update - - watch -- apiGroups: - - kubeflow.org - resources: - - pytorchjobs/finalizers - verbs: - - update -- apiGroups: - - kubeflow.org - resources: - - pytorchjobs/status - verbs: - - get - - patch - - update -- apiGroups: - - kubeflow.org - resources: - - tfjobs - verbs: - - create - - delete - - get - - list - - patch - - update - - watch -- apiGroups: - - kubeflow.org - resources: - - tfjobs/finalizers - verbs: - - update -- apiGroups: - - kubeflow.org - resources: - - tfjobs/status - verbs: - - get - - patch - - update -- apiGroups: - - kubeflow.org - resources: - - xgboostjobs - verbs: - - create - - delete - - get - - list - - patch - - update - - watch -- apiGroups: - - kubeflow.org - resources: - - xgboostjobs/finalizers - verbs: - - update -- apiGroups: - - kubeflow.org - resources: - - xgboostjobs/status - verbs: - - get - - patch - - update -- apiGroups: - - rbac.authorization.k8s.io - resources: - - rolebindings - verbs: - - create - - list - - update - - watch -- apiGroups: - - rbac.authorization.k8s.io - resources: - - roles - verbs: - - create - - list - - update - - watch -- apiGroups: - - scheduling.volcano.sh - resources: - - podgroups - verbs: - - create - - delete - - get - - list - - patch - - update - - watch -- apiGroups: - - scheduling.x-k8s.io - resources: - - podgroups - verbs: - - create - - delete - - get - - list - - patch - - update - - watch diff --git a/manifests/base/rbac/service-account.yaml b/manifests/base/rbac/service-account.yaml deleted file mode 100644 index e6f10afc28..0000000000 --- a/manifests/base/rbac/service-account.yaml +++ /dev/null @@ -1,6 +0,0 @@ -apiVersion: v1 -kind: ServiceAccount -metadata: - labels: - app: training-operator - name: training-operator diff --git a/manifests/base/service.yaml b/manifests/base/service.yaml deleted file mode 100644 index 4f2300aedf..0000000000 --- a/manifests/base/service.yaml +++ /dev/null @@ -1,22 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - annotations: - prometheus.io/path: /metrics - prometheus.io/scrape: "true" - prometheus.io/port: "8080" - labels: - app: training-operator - name: training-operator -spec: - ports: - - name: monitoring-port - port: 8080 - targetPort: 8080 - - name: webhook-server - port: 443 - protocol: TCP - targetPort: 9443 - selector: - control-plane: kubeflow-training-operator - type: ClusterIP diff --git a/manifests/base/webhook/kustomization.yaml b/manifests/base/webhook/kustomization.yaml deleted file mode 100644 index bc84486cb9..0000000000 --- a/manifests/base/webhook/kustomization.yaml +++ /dev/null @@ -1,17 +0,0 @@ -apiVersion: kustomize.config.k8s.io/v1beta1 -kind: Kustomization -resources: -- manifests.yaml -patches: -- path: patch.yaml - target: - group: admissionregistration.k8s.io - kind: ValidatingWebhookConfiguration - version: v1 - -configurations: -- kustomizeconfig.yaml -labels: -- includeSelectors: true - pairs: - control-plane: kubeflow-training-operator diff --git a/manifests/base/webhook/kustomizeconfig.yaml b/manifests/base/webhook/kustomizeconfig.yaml deleted file mode 100644 index 8b55ef316b..0000000000 --- a/manifests/base/webhook/kustomizeconfig.yaml +++ /dev/null @@ -1,10 +0,0 @@ -# the following config is for teaching kustomize where to look at when substituting vars. -# It requires kustomize v2.1.0 or newer to work properly. -namespace: - - kind: ValidatingWebhookConfiguration - group: admissionregistration.k8s.io - path: webhooks/clientConfig/service/namespace - create: true - -varReference: - - path: metadata/annotations diff --git a/manifests/base/webhook/manifests.yaml b/manifests/base/webhook/manifests.yaml deleted file mode 100644 index 2c381d0cd1..0000000000 --- a/manifests/base/webhook/manifests.yaml +++ /dev/null @@ -1,106 +0,0 @@ ---- -apiVersion: admissionregistration.k8s.io/v1 -kind: ValidatingWebhookConfiguration -metadata: - name: validating-webhook-configuration -webhooks: -- admissionReviewVersions: - - v1 - clientConfig: - service: - name: webhook-service - namespace: system - path: /validate-kubeflow-org-v1-jaxjob - failurePolicy: Fail - name: validator.jaxjob.training-operator.kubeflow.org - rules: - - apiGroups: - - kubeflow.org - apiVersions: - - v1 - operations: - - CREATE - - UPDATE - resources: - - jaxjobs - sideEffects: None -- admissionReviewVersions: - - v1 - clientConfig: - service: - name: webhook-service - namespace: system - path: /validate-kubeflow-org-v1-paddlejob - failurePolicy: Fail - name: validator.paddlejob.training-operator.kubeflow.org - rules: - - apiGroups: - - kubeflow.org - apiVersions: - - v1 - operations: - - CREATE - - UPDATE - resources: - - paddlejobs - sideEffects: None -- admissionReviewVersions: - - v1 - clientConfig: - service: - name: webhook-service - namespace: system - path: /validate-kubeflow-org-v1-pytorchjob - failurePolicy: Fail - name: validator.pytorchjob.training-operator.kubeflow.org - rules: - - apiGroups: - - kubeflow.org - apiVersions: - - v1 - operations: - - CREATE - - UPDATE - resources: - - pytorchjobs - sideEffects: None -- admissionReviewVersions: - - v1 - clientConfig: - service: - name: webhook-service - namespace: system - path: /validate-kubeflow-org-v1-tfjob - failurePolicy: Fail - name: validator.tfjob.training-operator.kubeflow.org - rules: - - apiGroups: - - kubeflow.org - apiVersions: - - v1 - operations: - - CREATE - - UPDATE - resources: - - tfjobs - sideEffects: None -- admissionReviewVersions: - - v1 - clientConfig: - service: - name: webhook-service - namespace: system - path: /validate-kubeflow-org-v1-xgboostjob - failurePolicy: Fail - name: validator.xgboostjob.training-operator.kubeflow.org - rules: - - apiGroups: - - kubeflow.org - apiVersions: - - v1 - operations: - - CREATE - - UPDATE - resources: - - xgboostjobs - sideEffects: None diff --git a/manifests/base/webhook/patch.yaml b/manifests/base/webhook/patch.yaml deleted file mode 100644 index b103423df2..0000000000 --- a/manifests/base/webhook/patch.yaml +++ /dev/null @@ -1,18 +0,0 @@ -- op: replace - path: /webhooks/0/clientConfig/service/name - value: training-operator -- op: replace - path: /webhooks/1/clientConfig/service/name - value: training-operator -- op: replace - path: /webhooks/2/clientConfig/service/name - value: training-operator -- op: replace - path: /webhooks/3/clientConfig/service/name - value: training-operator -- op: replace - path: /webhooks/4/clientConfig/service/name - value: training-operator -- op: replace - path: /metadata/name - value: validator.training-operator.kubeflow.org diff --git a/manifests/overlays/kubeflow/kubeflow-training-roles.yaml b/manifests/overlays/kubeflow/kubeflow-training-roles.yaml deleted file mode 100644 index 695db48313..0000000000 --- a/manifests/overlays/kubeflow/kubeflow-training-roles.yaml +++ /dev/null @@ -1,96 +0,0 @@ -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - name: kubeflow-training-admin - labels: - rbac.authorization.kubeflow.org/aggregate-to-kubeflow-admin: "true" -aggregationRule: - clusterRoleSelectors: - - matchLabels: - rbac.authorization.kubeflow.org/aggregate-to-kubeflow-training-admin: "true" -rules: [] - ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - name: kubeflow-training-edit - labels: - rbac.authorization.kubeflow.org/aggregate-to-kubeflow-edit: "true" - rbac.authorization.kubeflow.org/aggregate-to-kubeflow-training-admin: "true" -rules: - - apiGroups: - - kubeflow.org - resources: - - mpijobs - - tfjobs - - pytorchjobs - - xgboostjobs - - paddlejobs - verbs: - - create - - delete - - get - - list - - patch - - update - - watch - - apiGroups: - - kubeflow.org - resources: - - mpijobs/status - - tfjobs/status - - pytorchjobs/status - - xgboostjobs/status - - paddlejobs/status - verbs: - - get - - apiGroups: - - "" - resources: - - persistentvolumeclaims - verbs: - - create - - delete - - get - - list - - watch - - apiGroups: - - "" - resources: - - events - verbs: - - get - - list - - watch - ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - name: kubeflow-training-view - labels: - rbac.authorization.kubeflow.org/aggregate-to-kubeflow-view: "true" -rules: - - apiGroups: - - kubeflow.org - resources: - - mpijobs - - tfjobs - - pytorchjobs - - xgboostjobs - - paddlejobs - verbs: - - get - - list - - watch - - apiGroups: - - kubeflow.org - resources: - - mpijobs/status - - tfjobs/status - - pytorchjobs/status - - xgboostjobs/status - - paddlejobs/status - verbs: - - get diff --git a/manifests/overlays/kubeflow/kustomization.yaml b/manifests/overlays/kubeflow/kustomization.yaml deleted file mode 100644 index 206d900a88..0000000000 --- a/manifests/overlays/kubeflow/kustomization.yaml +++ /dev/null @@ -1,15 +0,0 @@ -apiVersion: kustomize.config.k8s.io/v1beta1 -kind: Kustomization -namespace: kubeflow -resources: - - ../../base - - kubeflow-training-roles.yaml -images: - - name: kubeflow/training-operator - newTag: "latest" -# TODO (tenzen-y): Once we support cert-manager, we need to remove this secret generation. -# REF: https://github.com/kubeflow/training-operator/issues/2049 -secretGenerator: - - name: training-operator-webhook-cert - options: - disableNameSuffixHash: true diff --git a/manifests/overlays/standalone/kustomization.yaml b/manifests/overlays/standalone/kustomization.yaml deleted file mode 100644 index df72e1dc03..0000000000 --- a/manifests/overlays/standalone/kustomization.yaml +++ /dev/null @@ -1,13 +0,0 @@ -apiVersion: kustomize.config.k8s.io/v1beta1 -kind: Kustomization -namespace: kubeflow -resources: - - ../../base - - namespace.yaml -images: - - name: kubeflow/training-operator - newTag: "latest" -secretGenerator: - - name: training-operator-webhook-cert - options: - disableNameSuffixHash: true diff --git a/manifests/overlays/standalone/namespace.yaml b/manifests/overlays/standalone/namespace.yaml deleted file mode 100644 index 7a940e4673..0000000000 --- a/manifests/overlays/standalone/namespace.yaml +++ /dev/null @@ -1,4 +0,0 @@ -apiVersion: v1 -kind: Namespace -metadata: - name: kubeflow diff --git a/manifests/v2/third-party/jobset/manifests.yaml b/manifests/v2/third-party/jobset/manifests.yaml new file mode 100644 index 0000000000..df7f413391 --- /dev/null +++ b/manifests/v2/third-party/jobset/manifests.yaml @@ -0,0 +1,130 @@ +apiVersion: v1 +kind: Namespace +metadata: + labels: + app.kubernetes.io/component: manager + app.kubernetes.io/created-by: jobset + app.kubernetes.io/instance: system + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/name: namespace + app.kubernetes.io/part-of: jobset + control-plane: controller-manager + name: jobset-system +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/component: webhook + app.kubernetes.io/created-by: jobset + app.kubernetes.io/instance: webhook-service + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/name: service + app.kubernetes.io/part-of: jobset + name: jobset-webhook-service + namespace: jobset-system +spec: + ports: + - port: 443 + protocol: TCP + targetPort: 9443 + selector: + control-plane: controller-manager +--- +apiVersion: admissionregistration.k8s.io/v1 +kind: MutatingWebhookConfiguration +metadata: + name: jobset-mutating-webhook-configuration +webhooks: + - admissionReviewVersions: + - v1 + clientConfig: + service: + name: jobset-webhook-service + namespace: jobset-system + path: /mutate-jobset-x-k8s-io-v1alpha2-jobset + failurePolicy: Fail + name: mjobset.kb.io + rules: + - apiGroups: + - jobset.x-k8s.io + apiVersions: + - v1alpha2 + operations: + - CREATE + - UPDATE + resources: + - jobsets + sideEffects: None + - admissionReviewVersions: + - v1 + clientConfig: + service: + name: jobset-webhook-service + namespace: jobset-system + path: /mutate--v1-pod + failurePolicy: Fail + name: mpod.kb.io + objectSelector: + matchExpressions: + - key: jobset.sigs.k8s.io/jobset-name + operator: Exists + rules: + - apiGroups: + - "" + apiVersions: + - v1 + operations: + - CREATE + resources: + - pods + sideEffects: None +--- +apiVersion: admissionregistration.k8s.io/v1 +kind: ValidatingWebhookConfiguration +metadata: + name: jobset-validating-webhook-configuration +webhooks: + - admissionReviewVersions: + - v1 + clientConfig: + service: + name: jobset-webhook-service + namespace: jobset-system + path: /validate-jobset-x-k8s-io-v1alpha2-jobset + failurePolicy: Fail + name: vjobset.kb.io + rules: + - apiGroups: + - jobset.x-k8s.io + apiVersions: + - v1alpha2 + operations: + - CREATE + - UPDATE + resources: + - jobsets + sideEffects: None + - admissionReviewVersions: + - v1 + clientConfig: + service: + name: jobset-webhook-service + namespace: jobset-system + path: /validate--v1-pod + failurePolicy: Fail + name: vpod.kb.io + objectSelector: + matchExpressions: + - key: jobset.sigs.k8s.io/jobset-name + operator: Exists + rules: + - apiGroups: + - "" + apiVersions: + - v1 + operations: + - CREATE + resources: + - pods + sideEffects: None diff --git a/pkg/apis/kubeflow.org/v1/common_types.go b/pkg/apis/kubeflow.org/v1/common_types.go deleted file mode 100644 index 59923b4da7..0000000000 --- a/pkg/apis/kubeflow.org/v1/common_types.go +++ /dev/null @@ -1,251 +0,0 @@ -// Copyright 2018 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package v1 - -import ( - v1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/resource" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" -) - -const ( - // ReplicaIndexLabel represents the label key for the replica-index, e.g. 0, 1, 2.. etc - ReplicaIndexLabel = "training.kubeflow.org/replica-index" - - // ReplicaTypeLabel represents the label key for the replica-type, e.g. ps, worker etc. - ReplicaTypeLabel = "training.kubeflow.org/replica-type" - - // OperatorNameLabel represents the label key for the operator name, e.g. tf-operator, mpi-operator, etc. - OperatorNameLabel = "training.kubeflow.org/operator-name" - - // JobNameLabel represents the label key for the job name, the value is the job name. - JobNameLabel = "training.kubeflow.org/job-name" - - // JobRoleLabel represents the label key for the job role, e.g. master. - JobRoleLabel = "training.kubeflow.org/job-role" - - // KubeflowJobsController represents the value of the default jobs controller - KubeflowJobsController = "kubeflow.org/training-operator" - - // MultiKueueController represents the MultiKueue controller - MultiKueueController = "kueue.x-k8s.io/multikueue" -) - -// JobStatus represents the current observed state of the training Job. -type JobStatus struct { - // Conditions is an array of current observed job conditions. - Conditions []JobCondition `json:"conditions,omitempty"` - - // ReplicaStatuses is map of ReplicaType and ReplicaStatus, - // specifies the status of each replica. - ReplicaStatuses map[ReplicaType]*ReplicaStatus `json:"replicaStatuses,omitempty"` - - // Represents time when the job was acknowledged by the job controller. - // It is not guaranteed to be set in happens-before order across separate operations. - // It is represented in RFC3339 form and is in UTC. - StartTime *metav1.Time `json:"startTime,omitempty"` - - // Represents time when the job was completed. It is not guaranteed to - // be set in happens-before order across separate operations. - // It is represented in RFC3339 form and is in UTC. - CompletionTime *metav1.Time `json:"completionTime,omitempty"` - - // Represents last time when the job was reconciled. It is not guaranteed to - // be set in happens-before order across separate operations. - // It is represented in RFC3339 form and is in UTC. - LastReconcileTime *metav1.Time `json:"lastReconcileTime,omitempty"` -} - -// ReplicaType represents the type of the replica. Each operator needs to define its -// own set of ReplicaTypes. -type ReplicaType string - -// ReplicaStatus represents the current observed state of the replica. -type ReplicaStatus struct { - // The number of actively running pods. - Active int32 `json:"active,omitempty"` - - // The number of pods which reached phase Succeeded. - Succeeded int32 `json:"succeeded,omitempty"` - - // The number of pods which reached phase Failed. - Failed int32 `json:"failed,omitempty"` - - // Deprecated: Use Selector instead - LabelSelector *metav1.LabelSelector `json:"labelSelector,omitempty"` - - // A Selector is a label query over a set of resources. The result of matchLabels and - // matchExpressions are ANDed. An empty Selector matches all objects. A null - // Selector matches no objects. - Selector string `json:"selector,omitempty"` -} - -// ReplicaSpec is a description of the replica -type ReplicaSpec struct { - // Replicas is the desired number of replicas of the given template. - // If unspecified, defaults to 1. - Replicas *int32 `json:"replicas,omitempty"` - - // Template is the object that describes the pod that - // will be created for this replica. RestartPolicy in PodTemplateSpec - // will be overide by RestartPolicy in ReplicaSpec - Template v1.PodTemplateSpec `json:"template,omitempty"` - - // Restart policy for all replicas within the job. - // One of Always, OnFailure, Never and ExitCode. - // Default to Never. - RestartPolicy RestartPolicy `json:"restartPolicy,omitempty"` -} - -// JobCondition describes the state of the job at a certain point. -type JobCondition struct { - // Type of job condition. - Type JobConditionType `json:"type"` - // Status of the condition, one of True, False, Unknown. - Status v1.ConditionStatus `json:"status"` - // The reason for the condition's last transition. - Reason string `json:"reason,omitempty"` - // A human readable message indicating details about the transition. - Message string `json:"message,omitempty"` - // The last time this condition was updated. - LastUpdateTime metav1.Time `json:"lastUpdateTime,omitempty"` - // Last time the condition transitioned from one status to another. - LastTransitionTime metav1.Time `json:"lastTransitionTime,omitempty"` -} - -// JobConditionType defines all kinds of types of JobStatus. -type JobConditionType string - -const ( - // JobCreated means the job has been accepted by the system, - // but one or more of the pods/services has not been started. - // This includes time before pods being scheduled and launched. - JobCreated JobConditionType = "Created" - - // JobRunning means all sub-resources (e.g. services/pods) of this job - // have been successfully scheduled and launched. - // The training is running without error. - JobRunning JobConditionType = "Running" - - // JobRestarting means one or more sub-resources (e.g. services/pods) of this job - // reached phase failed but maybe restarted according to it's restart policy - // which specified by user in v1.PodTemplateSpec. - // The training is freezing/pending. - JobRestarting JobConditionType = "Restarting" - - // JobSucceeded means all sub-resources (e.g. services/pods) of this job - // reached phase have terminated in success. - // The training is complete without error. - JobSucceeded JobConditionType = "Succeeded" - - // JobSuspended means the job has been suspended. - JobSuspended JobConditionType = "Suspended" - - // JobFailed means one or more sub-resources (e.g. services/pods) of this job - // reached phase failed with no restarting. - // The training has failed its execution. - JobFailed JobConditionType = "Failed" -) - -// CleanPodPolicy describes how to deal with pods when the job is finished. -type CleanPodPolicy string - -const ( - CleanPodPolicyUndefined CleanPodPolicy = "" - CleanPodPolicyAll CleanPodPolicy = "All" - CleanPodPolicyRunning CleanPodPolicy = "Running" - CleanPodPolicyNone CleanPodPolicy = "None" -) - -// RestartPolicy describes how the replicas should be restarted. -// Only one of the following restart policies may be specified. -// If none of the following policies is specified, the default one -// is RestartPolicyAlways. -type RestartPolicy string - -const ( - RestartPolicyAlways RestartPolicy = "Always" - RestartPolicyOnFailure RestartPolicy = "OnFailure" - RestartPolicyNever RestartPolicy = "Never" - - // RestartPolicyExitCode policy means that user should add exit code by themselves, - // The job operator will check these exit codes to - // determine the behavior when an error occurs: - // - 1-127: permanent error, do not restart. - // - 128-255: retryable error, will restart the pod. - RestartPolicyExitCode RestartPolicy = "ExitCode" -) - -// RunPolicy encapsulates various runtime policies of the distributed training -// job, for example how to clean up resources and how long the job can stay -// active. -type RunPolicy struct { - // CleanPodPolicy defines the policy to kill pods after the job completes. - // Default to None. - CleanPodPolicy *CleanPodPolicy `json:"cleanPodPolicy,omitempty"` - - // TTLSecondsAfterFinished is the TTL to clean up jobs. - // It may take extra ReconcilePeriod seconds for the cleanup, since - // reconcile gets called periodically. - // Default to infinite. - TTLSecondsAfterFinished *int32 `json:"ttlSecondsAfterFinished,omitempty"` - - // Specifies the duration in seconds relative to the startTime that the job may be active - // before the system tries to terminate it; value must be positive integer. - // +optional - ActiveDeadlineSeconds *int64 `json:"activeDeadlineSeconds,omitempty"` - - // Optional number of retries before marking this job failed. - // +optional - BackoffLimit *int32 `json:"backoffLimit,omitempty"` - - // SchedulingPolicy defines the policy related to scheduling, e.g. gang-scheduling - // +optional - SchedulingPolicy *SchedulingPolicy `json:"schedulingPolicy,omitempty"` - - // suspend specifies whether the Job controller should create Pods or not. - // If a Job is created with suspend set to true, no Pods are created by - // the Job controller. If a Job is suspended after creation (i.e. the - // flag goes from false to true), the Job controller will delete all - // active Pods and PodGroups associated with this Job. - // Users must design their workload to gracefully handle this. - // Suspending a Job will reset the StartTime field of the Job. - // - // Defaults to false. - // +kubebuilder:default:=false - // +optional - Suspend *bool `json:"suspend,omitempty"` - - // ManagedBy is used to indicate the controller or entity that manages a job. - // The value must be either an empty, 'kubeflow.org/training-operator' or - // 'kueue.x-k8s.io/multikueue'. - // The training-operator reconciles a job which doesn't have this - // field at all or the field value is the reserved string - // 'kubeflow.org/training-operator', but delegates reconciling the job - // with 'kueue.x-k8s.io/multikueue' to the Kueue. - // The field is immutable. - ManagedBy *string `json:"managedBy,omitempty"` -} - -// SchedulingPolicy encapsulates various scheduling policies of the distributed training -// job, for example `minAvailable` for gang-scheduling. -type SchedulingPolicy struct { - MinAvailable *int32 `json:"minAvailable,omitempty"` - // +kubebuilder:validation:XValidation:rule="self == oldSelf", message="spec.runPolicy.schedulingPolicy.queue is immutable" - Queue string `json:"queue,omitempty"` - MinResources *map[v1.ResourceName]resource.Quantity `json:"minResources,omitempty"` - PriorityClass string `json:"priorityClass,omitempty"` - ScheduleTimeoutSeconds *int32 `json:"scheduleTimeoutSeconds,omitempty"` -} diff --git a/pkg/apis/kubeflow.org/v1/defaulting_utils.go b/pkg/apis/kubeflow.org/v1/defaulting_utils.go deleted file mode 100644 index 5b3a670854..0000000000 --- a/pkg/apis/kubeflow.org/v1/defaulting_utils.go +++ /dev/null @@ -1,63 +0,0 @@ -package v1 - -import ( - "strings" - - corev1 "k8s.io/api/core/v1" - "k8s.io/utils/ptr" -) - -func getDefaultContainerIndex(spec *corev1.PodSpec, defaultContainerName string) int { - for i, container := range spec.Containers { - if container.Name == defaultContainerName { - return i - } - } - return 0 -} - -func hasDefaultPort(spec *corev1.PodSpec, containerIndex int, defaultPortName string) bool { - for _, port := range spec.Containers[containerIndex].Ports { - if port.Name == defaultPortName { - return true - } - } - return false -} - -func setDefaultPort(spec *corev1.PodSpec, defaultPortName string, defaultPort int32, defaultContainerIndex int) { - spec.Containers[defaultContainerIndex].Ports = append(spec.Containers[defaultContainerIndex].Ports, - corev1.ContainerPort{ - Name: defaultPortName, - ContainerPort: defaultPort, - }) -} - -func setDefaultRestartPolicy(replicaSpec *ReplicaSpec, defaultRestartPolicy RestartPolicy) { - if replicaSpec != nil && replicaSpec.RestartPolicy == "" { - replicaSpec.RestartPolicy = defaultRestartPolicy - } -} - -func setDefaultReplicas(replicaSpec *ReplicaSpec, replicas int32) { - if replicaSpec != nil && replicaSpec.Replicas == nil { - replicaSpec.Replicas = ptr.To[int32](replicas) - } -} - -// setTypeNameToCamelCase sets the name of the replica type from any case to correct case. -// E.g. from server to Server; from WORKER to Worker. -func setTypeNameToCamelCase(replicaSpecs map[ReplicaType]*ReplicaSpec, typ ReplicaType) { - for t := range replicaSpecs { - if strings.EqualFold(string(t), string(typ)) && t != typ { - spec := replicaSpecs[t] - delete(replicaSpecs, t) - replicaSpecs[typ] = spec - return - } - } -} - -func CleanPodPolicyPointer(cleanPodPolicy CleanPodPolicy) *CleanPodPolicy { - return &cleanPodPolicy -} diff --git a/pkg/apis/kubeflow.org/v1/doc.go b/pkg/apis/kubeflow.org/v1/doc.go deleted file mode 100644 index 0f1d56499d..0000000000 --- a/pkg/apis/kubeflow.org/v1/doc.go +++ /dev/null @@ -1,22 +0,0 @@ -// Copyright 2018 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// +k8s:defaulter-gen=TypeMeta -// +k8s:openapi-gen=true -// +k8s:deepcopy-gen=package - -// Package v1 is the v1 version of the API. -// +groupName=kubeflow.org - -package v1 diff --git a/pkg/apis/kubeflow.org/v1/groupversion_info.go b/pkg/apis/kubeflow.org/v1/groupversion_info.go deleted file mode 100644 index 8d3e2aaae0..0000000000 --- a/pkg/apis/kubeflow.org/v1/groupversion_info.go +++ /dev/null @@ -1,46 +0,0 @@ -/* -Copyright 2021. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -// Package v1 contains API Schema definitions for the kubeflow.org v1 API group -// +kubebuilder:object:generate=true -// +groupName=kubeflow.org -package v1 - -import ( - "k8s.io/apimachinery/pkg/runtime/schema" - "sigs.k8s.io/controller-runtime/pkg/scheme" -) - -var ( - // GroupVersion is group version used to register these objects - GroupVersion = schema.GroupVersion{Group: "kubeflow.org", Version: "v1"} - - MPIJobSchemeGroupVersionKind = schema.GroupVersionKind{Group: "kubeflow.org", Version: "v1", Kind: MPIJobKind} - - // SchemeBuilder is used to add go types to the GroupVersionKind scheme - SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion} - - // AddToScheme adds the types in this group-version to the given scheme. - AddToScheme = SchemeBuilder.AddToScheme - - // SchemeGroupVersion is group version used to register these objects. - SchemeGroupVersion = GroupVersion -) - -// Resource takes an unqualified resource and returns a Group-qualified GroupResource. -func Resource(resource string) schema.GroupResource { - return GroupVersion.WithResource(resource).GroupResource() -} diff --git a/pkg/apis/kubeflow.org/v1/jax_defaults.go b/pkg/apis/kubeflow.org/v1/jax_defaults.go deleted file mode 100644 index f1c865d755..0000000000 --- a/pkg/apis/kubeflow.org/v1/jax_defaults.go +++ /dev/null @@ -1,62 +0,0 @@ -// Copyright 2024 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package v1 - -import ( - corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/runtime" -) - -func addJAXDefaultingFuncs(scheme *runtime.Scheme) error { - return RegisterDefaults(scheme) -} - -// setJAXDefaultPort sets the default ports for jax container. -func setJAXDefaultPort(spec *corev1.PodSpec) { - index := getDefaultContainerIndex(spec, JAXJobDefaultContainerName) - if ok := hasDefaultPort(spec, index, JAXJobDefaultPortName); !ok { - setDefaultPort(spec, JAXJobDefaultPortName, JAXJobDefaultPort, index) - } -} - -// setJAXTypeNamesToCamelCase sets the name of all replica types from any case to correct case. -func setJAXTypeNamesToCamelCase(jaxJob *JAXJob) { - replicaTypes := []ReplicaType{ - JAXJobReplicaTypeWorker, - } - for _, replicaType := range replicaTypes { - setTypeNameToCamelCase(jaxJob.Spec.JAXReplicaSpecs, replicaType) - } -} - -// SetDefaults_JAXJob sets any unspecified values to defaults. -func SetDefaults_JAXJob(job *JAXJob) { - // Set default cleanpod policy to None. - if job.Spec.RunPolicy.CleanPodPolicy == nil { - job.Spec.RunPolicy.CleanPodPolicy = CleanPodPolicyPointer(CleanPodPolicyNone) - } - - // Update the key of JAXReplicaSpecs to camel case. - setJAXTypeNamesToCamelCase(job) - - for _, spec := range job.Spec.JAXReplicaSpecs { - // Set default replicas to 1. - setDefaultReplicas(spec, 1) - // Set default restartPolicy - setDefaultRestartPolicy(spec, JAXJobDefaultRestartPolicy) - // Set default port to jax container. - setJAXDefaultPort(&spec.Template.Spec) - } -} diff --git a/pkg/apis/kubeflow.org/v1/jax_types.go b/pkg/apis/kubeflow.org/v1/jax_types.go deleted file mode 100644 index bb539a6424..0000000000 --- a/pkg/apis/kubeflow.org/v1/jax_types.go +++ /dev/null @@ -1,101 +0,0 @@ -// Copyright 2024 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package v1 - -import ( - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" -) - -const ( - // JAXJobDefaultPortName is name of the port used to communicate between Coordinator and Workers. - JAXJobDefaultPortName = "jaxjob-port" - // JAXJobDefaultContainerName is the name of the JAXJob container. - JAXJobDefaultContainerName = "jax" - // JAXJobDefaultPort is default value of the port. - JAXJobDefaultPort = 6666 - // JAXJobDefaultRestartPolicy is default RestartPolicy for JAXReplicaSpecs. - JAXJobDefaultRestartPolicy = RestartPolicyNever - // JAXJobKind is the kind name. - JAXJobKind = "JAXJob" - // JAXJobPlural is the JAXJobPlural for JAXJob. - JAXJobPlural = "jaxjobs" - // JAXJobSingular is the singular for JAXJob. - JAXJobSingular = "jaxjob" - // JAXJobFrameworkName is the name of the ML Framework - JAXJobFrameworkName = "jax" - // JAXJobReplicaTypeWorker is the type for workers of distributed JAX. - JAXJobReplicaTypeWorker ReplicaType = "Worker" -) - -// +genclient -// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object -// +resource:path=jaxjob -//+kubebuilder:object:root=true -//+kubebuilder:subresource:status -//+kubebuilder:printcolumn:name="State",type=string,JSONPath=`.status.conditions[-1:].type` -//+kubebuilder:printcolumn:name="Age",type=date,JSONPath=`.metadata.creationTimestamp` -// +kubebuilder:subresource:scale:specpath=.spec.jaxReplicaSpecs.Worker.replicas,statuspath=.status.replicaStatuses.Worker.active,selectorpath=.status.replicaStatuses.Worker.selector - -// JAXJob Represents a JAXJob resource. -type JAXJob struct { - // Standard Kubernetes type metadata. - metav1.TypeMeta `json:",inline"` - - metav1.ObjectMeta `json:"metadata,omitempty"` - - // Specification of the desired state of the JAXJob. - Spec JAXJobSpec `json:"spec,omitempty"` - - // Most recently observed status of the JAXJob. - // Read-only (modified by the system). - Status JobStatus `json:"status,omitempty"` -} - -// JAXJobSpec is a desired state description of the JAXJob. -type JAXJobSpec struct { - // RunPolicy encapsulates various runtime policies of the distributed training - // job, for example how to clean up resources and how long the job can stay - // active. - //+kubebuilder:validation:Optional - RunPolicy RunPolicy `json:"runPolicy"` - - // A map of JAXReplicaType (type) to ReplicaSpec (value). Specifies the JAX cluster configuration. - // For example, - // { - // "Worker": JAXReplicaSpec, - // } - JAXReplicaSpecs map[ReplicaType]*ReplicaSpec `json:"jaxReplicaSpecs"` -} - -// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object -// +resource:path=jaxjobs -//+kubebuilder:object:root=true - -// JAXJobList is a list of JAXJobs. -type JAXJobList struct { - // Standard type metadata. - metav1.TypeMeta `json:",inline"` - - // Standard list metadata. - metav1.ListMeta `json:"metadata,omitempty"` - - // List of JAXJobs. - Items []JAXJob `json:"items"` -} - -func init() { - SchemeBuilder.Register(&JAXJob{}, &JAXJobList{}) - SchemeBuilder.SchemeBuilder.Register(addJAXDefaultingFuncs) -} diff --git a/pkg/apis/kubeflow.org/v1/mpi_defaults.go b/pkg/apis/kubeflow.org/v1/mpi_defaults.go deleted file mode 100644 index 6a8f5e676b..0000000000 --- a/pkg/apis/kubeflow.org/v1/mpi_defaults.go +++ /dev/null @@ -1,39 +0,0 @@ -// Copyright 2019 The Kubeflow Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package v1 - -import ( - "k8s.io/apimachinery/pkg/runtime" -) - -func addMPIJobDefaultingFuncs(scheme *runtime.Scheme) error { - return RegisterDefaults(scheme) -} - -func SetDefaults_MPIJob(mpiJob *MPIJob) { - // Set default CleanPodPolicy to None when neither fields specified. - if mpiJob.Spec.CleanPodPolicy == nil && mpiJob.Spec.RunPolicy.CleanPodPolicy == nil { - mpiJob.Spec.CleanPodPolicy = CleanPodPolicyPointer(CleanPodPolicyNone) - mpiJob.Spec.RunPolicy.CleanPodPolicy = CleanPodPolicyPointer(CleanPodPolicyNone) - } - - // Set default replicas - setDefaultReplicas(mpiJob.Spec.MPIReplicaSpecs[MPIJobReplicaTypeLauncher], 1) - setDefaultReplicas(mpiJob.Spec.MPIReplicaSpecs[MPIJobReplicaTypeWorker], 0) - - // Set default restartPolicy - setDefaultRestartPolicy(mpiJob.Spec.MPIReplicaSpecs[MPIJobReplicaTypeLauncher], MPIJobDefaultRestartPolicy) - setDefaultRestartPolicy(mpiJob.Spec.MPIReplicaSpecs[MPIJobReplicaTypeWorker], MPIJobDefaultRestartPolicy) -} diff --git a/pkg/apis/kubeflow.org/v1/mpi_defaults_test.go b/pkg/apis/kubeflow.org/v1/mpi_defaults_test.go deleted file mode 100644 index 8be47db29b..0000000000 --- a/pkg/apis/kubeflow.org/v1/mpi_defaults_test.go +++ /dev/null @@ -1,171 +0,0 @@ -package v1 - -import ( - "reflect" - "testing" - - corev1 "k8s.io/api/core/v1" - "k8s.io/utils/ptr" -) - -func expectedMPIJob(cleanPodPolicy CleanPodPolicy, restartPolicy RestartPolicy) *MPIJob { - return &MPIJob{ - Spec: MPIJobSpec{ - CleanPodPolicy: &cleanPodPolicy, - RunPolicy: RunPolicy{ - CleanPodPolicy: &cleanPodPolicy, - }, - MPIReplicaSpecs: map[ReplicaType]*ReplicaSpec{ - MPIJobReplicaTypeLauncher: { - Replicas: ptr.To[int32](1), - RestartPolicy: restartPolicy, - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: MPIJobDefaultContainerName, - Image: testImage, - }, - }, - }, - }, - }, - MPIJobReplicaTypeWorker: { - Replicas: ptr.To[int32](0), - RestartPolicy: restartPolicy, - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: MPIJobDefaultContainerName, - Image: testImage, - }, - }, - }, - }, - }, - }, - }, - } -} - -func TestSetDefaults_MPIJob(t *testing.T) { - customRestartPolicy := RestartPolicyAlways - - testCases := map[string]struct { - original *MPIJob - expected *MPIJob - }{ - "set default replicas": { - original: &MPIJob{ - Spec: MPIJobSpec{ - CleanPodPolicy: CleanPodPolicyPointer(CleanPodPolicyRunning), - RunPolicy: RunPolicy{ - CleanPodPolicy: CleanPodPolicyPointer(CleanPodPolicyRunning), - }, - MPIReplicaSpecs: map[ReplicaType]*ReplicaSpec{ - MPIJobReplicaTypeLauncher: { - RestartPolicy: customRestartPolicy, - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: MPIJobDefaultContainerName, - Image: testImage, - }, - }, - }, - }, - }, - MPIJobReplicaTypeWorker: { - RestartPolicy: customRestartPolicy, - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: MPIJobDefaultContainerName, - Image: testImage, - }, - }, - }, - }, - }, - }, - }, - }, - expected: expectedMPIJob(CleanPodPolicyRunning, customRestartPolicy), - }, - "set default clean pod policy": { - original: &MPIJob{ - Spec: MPIJobSpec{ - MPIReplicaSpecs: map[ReplicaType]*ReplicaSpec{ - MPIJobReplicaTypeLauncher: { - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: MPIJobDefaultContainerName, - Image: testImage, - }, - }, - }, - }, - }, - MPIJobReplicaTypeWorker: { - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: MPIJobDefaultContainerName, - Image: testImage, - }, - }, - }, - }, - }, - }, - }, - }, - expected: expectedMPIJob(CleanPodPolicyNone, MPIJobDefaultRestartPolicy), - }, - "set default restart policy": { - original: &MPIJob{ - Spec: MPIJobSpec{ - MPIReplicaSpecs: map[ReplicaType]*ReplicaSpec{ - MPIJobReplicaTypeLauncher: { - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: MPIJobDefaultContainerName, - Image: testImage, - }, - }, - }, - }, - }, - MPIJobReplicaTypeWorker: { - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: MPIJobDefaultContainerName, - Image: testImage, - }, - }, - }, - }, - }, - }, - }, - }, - expected: expectedMPIJob(CleanPodPolicyNone, MPIJobDefaultRestartPolicy), - }, - } - for name, tc := range testCases { - SetDefaults_MPIJob(tc.original) - if !reflect.DeepEqual(tc.original, tc.expected) { - t.Errorf("%s: Want\n%v; Got\n %v", name, tc.expected, tc.original) - } - } -} diff --git a/pkg/apis/kubeflow.org/v1/mpi_types.go b/pkg/apis/kubeflow.org/v1/mpi_types.go deleted file mode 100644 index 5aa6222c23..0000000000 --- a/pkg/apis/kubeflow.org/v1/mpi_types.go +++ /dev/null @@ -1,96 +0,0 @@ -// Copyright 2019 The Kubeflow Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package v1 - -import ( - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" -) - -const ( - // MPIJobDefaultPortName is name of the port used to communicate between Master and Workers. - MPIJobDefaultPortName = "mpi-port" - // MPIJobDefaultPort is default value of the port. - MPIJobDefaultPort = 9999 - // MPIJobDefaultContainerName is the name of the MPIJob container. - MPIJobDefaultContainerName = "mpi" - // MPIJobDefaultRestartPolicy is default RestartPolicy for ReplicaSpec. - MPIJobDefaultRestartPolicy = RestartPolicyNever - MPIJobKind = "MPIJob" - // MPIJobPlural is the MPIJobPlural for TFJob. - MPIJobPlural = "mpijobs" - // MPIJobSingular is the singular for TFJob. - MPIJobSingular = "mpijob" - // MPIJobFrameworkName is the name of the ML Framework - MPIJobFrameworkName = "mpi" - // MPIJobReplicaTypeLauncher is the type for launcher replica. - MPIJobReplicaTypeLauncher ReplicaType = "Launcher" - // MPIJobReplicaTypeWorker is the type for worker replicas. - MPIJobReplicaTypeWorker ReplicaType = "Worker" -) - -// +genclient -// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object -// +resource:path=mpijob -// +kubebuilder:object:root=true -// +kubebuilder:printcolumn:JSONPath=`.metadata.creationTimestamp`,name="Age",type=date -// +kubebuilder:printcolumn:JSONPath=`.status.conditions[-1:].type`,name="State",type=string -// +kubebuilder:subresource:status - -type MPIJob struct { - metav1.TypeMeta `json:",inline"` - metav1.ObjectMeta `json:"metadata,omitempty"` - Spec MPIJobSpec `json:"spec,omitempty"` - Status JobStatus `json:"status,omitempty"` -} - -type MPIJobSpec struct { - - // Specifies the number of slots per worker used in hostfile. - // Defaults to 1. - // +optional - SlotsPerWorker *int32 `json:"slotsPerWorker,omitempty"` - - // CleanPodPolicy defines the policy that whether to kill pods after the job completes. - // Defaults to None. - CleanPodPolicy *CleanPodPolicy `json:"cleanPodPolicy,omitempty"` - - // `MPIReplicaSpecs` contains maps from `MPIReplicaType` to `ReplicaSpec` that - // specify the MPI replicas to run. - MPIReplicaSpecs map[ReplicaType]*ReplicaSpec `json:"mpiReplicaSpecs"` - - // MainContainer specifies name of the main container which - // executes the MPI code. - MainContainer string `json:"mainContainer,omitempty"` - - // `RunPolicy` encapsulates various runtime policies of the distributed training - // job, for example how to clean up resources and how long the job can stay - // active. - RunPolicy RunPolicy `json:"runPolicy,omitempty"` -} - -// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object -// +resource:path=mpijobs -// +kubebuilder:object:root=true - -type MPIJobList struct { - metav1.TypeMeta `json:",inline"` - metav1.ListMeta `json:"metadata,omitempty"` - Items []MPIJob `json:"items"` -} - -func init() { - SchemeBuilder.Register(&MPIJob{}, &MPIJobList{}) - SchemeBuilder.SchemeBuilder.Register(addMPIJobDefaultingFuncs) -} diff --git a/pkg/apis/kubeflow.org/v1/mpi_validation.go b/pkg/apis/kubeflow.org/v1/mpi_validation.go deleted file mode 100644 index 1447d36183..0000000000 --- a/pkg/apis/kubeflow.org/v1/mpi_validation.go +++ /dev/null @@ -1,67 +0,0 @@ -// Copyright 2021 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package v1 - -import ( - "fmt" -) - -func ValidateV1MpiJobSpec(c *MPIJobSpec) error { - if c.MPIReplicaSpecs == nil { - return fmt.Errorf("MPIReplicaSpecs is not valid") - } - launcherExists := false - for rType, value := range c.MPIReplicaSpecs { - if value == nil || len(value.Template.Spec.Containers) == 0 { - return fmt.Errorf("MPIReplicaSpecs is not valid: containers definition expected in %v", rType) - } - // Make sure the replica type is valid. - validReplicaTypes := []ReplicaType{MPIJobReplicaTypeLauncher, MPIJobReplicaTypeWorker} - - isValidReplicaType := false - for _, t := range validReplicaTypes { - if t == rType { - isValidReplicaType = true - break - } - } - if !isValidReplicaType { - return fmt.Errorf("MPIReplicaType is %v but must be one of %v", rType, validReplicaTypes) - } - - for _, container := range value.Template.Spec.Containers { - if container.Image == "" { - return fmt.Errorf("MPIReplicaSpec is not valid: Image is undefined in the container of %v", rType) - } - - if container.Name == "" { - return fmt.Errorf("MPIReplicaSpec is not valid: ImageName is undefined in the container of %v", rType) - } - } - if rType == MPIJobReplicaTypeLauncher { - launcherExists = true - if value.Replicas != nil && int(*value.Replicas) != 1 { - return fmt.Errorf("MPIReplicaSpec is not valid: There must be only 1 launcher replica") - } - } - - } - - if !launcherExists { - return fmt.Errorf("MPIReplicaSpec is not valid: Master ReplicaSpec must be present") - } - return nil - -} diff --git a/pkg/apis/kubeflow.org/v1/mpi_validation_test.go b/pkg/apis/kubeflow.org/v1/mpi_validation_test.go deleted file mode 100644 index e42e242cb6..0000000000 --- a/pkg/apis/kubeflow.org/v1/mpi_validation_test.go +++ /dev/null @@ -1,95 +0,0 @@ -// Copyright 2021 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package v1 - -import ( - "testing" - - corev1 "k8s.io/api/core/v1" - "k8s.io/utils/ptr" -) - -func TestValidateV1MpiJobSpec(t *testing.T) { - testCases := []MPIJobSpec{ - { - MPIReplicaSpecs: nil, - }, - { - MPIReplicaSpecs: map[ReplicaType]*ReplicaSpec{ - MPIJobReplicaTypeLauncher: &ReplicaSpec{ - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{}, - }, - }, - }, - }, - }, - { - MPIReplicaSpecs: map[ReplicaType]*ReplicaSpec{ - MPIJobReplicaTypeLauncher: &ReplicaSpec{ - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - corev1.Container{ - Image: "", - }, - }, - }, - }, - }, - }, - }, - { - MPIReplicaSpecs: map[ReplicaType]*ReplicaSpec{ - MPIJobReplicaTypeLauncher: &ReplicaSpec{ - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - corev1.Container{ - Name: "", - Image: "kubeflow/tf-dist-mnist-test:1.0", - }, - }, - }, - }, - }, - }, - }, - { - MPIReplicaSpecs: map[ReplicaType]*ReplicaSpec{ - MPIJobReplicaTypeLauncher: &ReplicaSpec{ - Replicas: ptr.To[int32](2), - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - corev1.Container{ - Name: "tensorflow", - Image: "kubeflow/tf-dist-mnist-test:1.0", - }, - }, - }, - }, - }, - }, - }, - } - for _, c := range testCases { - err := ValidateV1MpiJobSpec(&c) - if err == nil { - t.Error("Failed validate the kubeflowv1.MpiJobSpec") - } - } -} diff --git a/pkg/apis/kubeflow.org/v1/paddlepaddle_defaults.go b/pkg/apis/kubeflow.org/v1/paddlepaddle_defaults.go deleted file mode 100644 index e65b481bd6..0000000000 --- a/pkg/apis/kubeflow.org/v1/paddlepaddle_defaults.go +++ /dev/null @@ -1,81 +0,0 @@ -// Copyright 2022 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package v1 - -import ( - corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/runtime" -) - -func addPaddleDefaultingFuncs(scheme *runtime.Scheme) error { - return RegisterDefaults(scheme) -} - -// setPaddleDefaultPort sets the default ports for paddle container. -func setPaddleDefaultPort(spec *corev1.PodSpec) { - index := getDefaultContainerIndex(spec, PaddleJobDefaultContainerName) - if ok := hasDefaultPort(spec, index, PaddleJobDefaultPortName); !ok { - setDefaultPort(spec, PaddleJobDefaultPortName, PaddleJobDefaultPort, index) - } -} - -func setPaddleElasticPolicy(paddleJob *PaddleJob) { - if paddleJob.Spec.ElasticPolicy != nil { - if paddleJob.Spec.ElasticPolicy.MaxReplicas != nil && - paddleJob.Spec.ElasticPolicy.MinReplicas != nil { - return - } else if paddleJob.Spec.ElasticPolicy.MaxReplicas != nil { - // Set MinRepliacs to elasticPolicy.MaxReplicas. - paddleJob.Spec.ElasticPolicy.MinReplicas = paddleJob.Spec.ElasticPolicy.MaxReplicas - } else if paddleJob.Spec.ElasticPolicy.MinReplicas != nil { - paddleJob.Spec.ElasticPolicy.MaxReplicas = paddleJob.Spec.ElasticPolicy.MinReplicas - } else { - workerReplicas := paddleJob.Spec.PaddleReplicaSpecs[PaddleJobReplicaTypeWorker].Replicas - // Set Min and Max to worker.spec.Replicas. - paddleJob.Spec.ElasticPolicy.MaxReplicas = workerReplicas - paddleJob.Spec.ElasticPolicy.MinReplicas = workerReplicas - } - } -} - -// setPaddleTypeNamesToCamelCase sets the name of all replica types from any case to correct case. -func setPaddleTypeNamesToCamelCase(paddleJob *PaddleJob) { - replicaTypes := []ReplicaType{ - PaddleJobReplicaTypeMaster, - PaddleJobReplicaTypeWorker, - } - for _, replicaType := range replicaTypes { - setTypeNameToCamelCase(paddleJob.Spec.PaddleReplicaSpecs, replicaType) - } -} - -// SetDefaults_PaddleJob sets any unspecified values to defaults. -func SetDefaults_PaddleJob(job *PaddleJob) { - // Set default cleanpod policy to None. - if job.Spec.RunPolicy.CleanPodPolicy == nil { - job.Spec.RunPolicy.CleanPodPolicy = CleanPodPolicyPointer(CleanPodPolicyNone) - } - - // Update the key of PaddleReplicaSpecs to camel case. - setPaddleTypeNamesToCamelCase(job) - - for _, spec := range job.Spec.PaddleReplicaSpecs { - setDefaultReplicas(spec, 1) - setDefaultRestartPolicy(spec, PaddleJobDefaultRestartPolicy) - setPaddleDefaultPort(&spec.Template.Spec) - } - // Set default elastic policy. - setPaddleElasticPolicy(job) -} diff --git a/pkg/apis/kubeflow.org/v1/paddlepaddle_types.go b/pkg/apis/kubeflow.org/v1/paddlepaddle_types.go deleted file mode 100644 index 67f6d46d29..0000000000 --- a/pkg/apis/kubeflow.org/v1/paddlepaddle_types.go +++ /dev/null @@ -1,134 +0,0 @@ -// Copyright 2022 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package v1 - -import ( - autoscalingv2 "k8s.io/api/autoscaling/v2" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" -) - -const ( - // PaddleJobDefaultPortName is name of the port used to communicate between Master and - // workers. - PaddleJobDefaultPortName = "master" - // PaddleJobDefaultContainerName is the name of the PaddleJob container. - PaddleJobDefaultContainerName = "paddle" - // PaddleJobDefaultPort is default value of the port. - PaddleJobDefaultPort = 36543 - // PaddleJobDefaultRestartPolicy is default RestartPolicy for PaddleReplicaSpec. - PaddleJobDefaultRestartPolicy = RestartPolicyOnFailure - // PaddleJobKind is the kind name. - PaddleJobKind = "PaddleJob" - // PaddleJobPlural is the PaddlePlural for paddleJob. - PaddleJobPlural = "paddlejobs" - // PaddleJobSingular is the singular for paddleJob. - PaddleJobSingular = "paddlejob" - // PaddleJobFrameworkName is the name of the ML Framework - PaddleJobFrameworkName = "paddle" - // PaddleJobReplicaTypeMaster is the type of Master of distributed Paddle - PaddleJobReplicaTypeMaster ReplicaType = "Master" - // PaddleJobReplicaTypeWorker is the type for workers of distributed Paddle. - PaddleJobReplicaTypeWorker ReplicaType = "Worker" -) - -// +genclient -// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object -// +resource:path=paddlejob -//+kubebuilder:object:root=true -//+kubebuilder:subresource:status -//+kubebuilder:printcolumn:name="State",type=string,JSONPath=`.status.conditions[-1:].type` -//+kubebuilder:printcolumn:name="Age",type=date,JSONPath=`.metadata.creationTimestamp` -// +kubebuilder:subresource:scale:specpath=.spec.paddleReplicaSpecs.Worker.replicas,statuspath=.status.replicaStatuses.Worker.active,selectorpath=.status.replicaStatuses.Worker.selector - -// PaddleJob Represents a PaddleJob resource. -type PaddleJob struct { - // Standard Kubernetes type metadata. - metav1.TypeMeta `json:",inline"` - - metav1.ObjectMeta `json:"metadata,omitempty"` - - // Specification of the desired state of the PaddleJob. - Spec PaddleJobSpec `json:"spec,omitempty"` - - // Most recently observed status of the PaddleJob. - // Read-only (modified by the system). - Status JobStatus `json:"status,omitempty"` -} - -// PaddleJobSpec is a desired state description of the PaddleJob. -type PaddleJobSpec struct { - // RunPolicy encapsulates various runtime policies of the distributed training - // job, for example how to clean up resources and how long the job can stay - // active. - //+kubebuilder:validation:Optional - RunPolicy RunPolicy `json:"runPolicy"` - - // ElasticPolicy holds the elastic policy for paddle job. - ElasticPolicy *PaddleElasticPolicy `json:"elasticPolicy,omitempty"` - - // A map of PaddleReplicaType (type) to ReplicaSpec (value). Specifies the Paddle cluster configuration. - // For example, - // { - // "Master": PaddleReplicaSpec, - // "Worker": PaddleReplicaSpec, - // } - PaddleReplicaSpecs map[ReplicaType]*ReplicaSpec `json:"paddleReplicaSpecs"` -} - -type PaddleElasticPolicy struct { - // minReplicas is the lower limit for the number of replicas to which the training job - // can scale down. It defaults to null. - // +optional - MinReplicas *int32 `json:"minReplicas,omitempty"` - // upper limit for the number of pods that can be set by the autoscaler; cannot be smaller than MinReplicas, defaults to null. - // +optional - MaxReplicas *int32 `json:"maxReplicas,omitempty"` - - // MaxRestarts is the limit for restart times of pods in elastic mode. - // +optional - MaxRestarts *int32 `json:"maxRestarts,omitempty"` - - // Metrics contains the specifications which are used to calculate the - // desired replica count (the maximum replica count across all metrics will - // be used). The desired replica count is calculated with multiplying the - // ratio between the target value and the current value by the current - // number of pods. Ergo, metrics used must decrease as the pod count is - // increased, and vice-versa. See the individual metric source types for - // more information about how each type of metric must respond. - // If not set, the HPA will not be created. - // +optional - Metrics []autoscalingv2.MetricSpec `json:"metrics,omitempty"` -} - -// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object -// +resource:path=paddlejobs -//+kubebuilder:object:root=true - -// PaddleJobList is a list of PaddleJobs. -type PaddleJobList struct { - // Standard type metadata. - metav1.TypeMeta `json:",inline"` - - // Standard list metadata. - metav1.ListMeta `json:"metadata,omitempty"` - - // List of PaddleJobs. - Items []PaddleJob `json:"items"` -} - -func init() { - SchemeBuilder.Register(&PaddleJob{}, &PaddleJobList{}) - SchemeBuilder.SchemeBuilder.Register(addPaddleDefaultingFuncs) -} diff --git a/pkg/apis/kubeflow.org/v1/pytorch_defaults.go b/pkg/apis/kubeflow.org/v1/pytorch_defaults.go deleted file mode 100644 index 1ec2b5f641..0000000000 --- a/pkg/apis/kubeflow.org/v1/pytorch_defaults.go +++ /dev/null @@ -1,96 +0,0 @@ -// Copyright 2018 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package v1 - -import ( - corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/runtime" -) - -var ( - DefaultNprocPerNode = "auto" -) - -func addPyTorchDefaultingFuncs(scheme *runtime.Scheme) error { - return RegisterDefaults(scheme) -} - -// setPyTorchDefaultPort sets the default ports for pytorch container. -func setPyTorchDefaultPort(spec *corev1.PodSpec) { - index := getDefaultContainerIndex(spec, PyTorchJobDefaultContainerName) - if ok := hasDefaultPort(spec, index, PyTorchJobDefaultPortName); !ok { - setDefaultPort(spec, PyTorchJobDefaultPortName, PyTorchJobDefaultPort, index) - } -} - -func setElasticPolicy(pytorchJob *PyTorchJob) { - if pytorchJob.Spec.ElasticPolicy != nil { - if pytorchJob.Spec.ElasticPolicy.MaxReplicas != nil && - pytorchJob.Spec.ElasticPolicy.MinReplicas != nil { - return - } else if pytorchJob.Spec.ElasticPolicy.MaxReplicas != nil { - // Set MinRepliacs to elasticPolicy.MaxReplicas. - pytorchJob.Spec.ElasticPolicy.MinReplicas = pytorchJob.Spec.ElasticPolicy.MaxReplicas - } else if pytorchJob.Spec.ElasticPolicy.MinReplicas != nil { - pytorchJob.Spec.ElasticPolicy.MaxReplicas = pytorchJob.Spec.ElasticPolicy.MinReplicas - } else { - workerReplicas := pytorchJob.Spec.PyTorchReplicaSpecs[PyTorchJobReplicaTypeWorker].Replicas - // Set Min and Max to worker.spec.Replicas. - pytorchJob.Spec.ElasticPolicy.MaxReplicas = workerReplicas - pytorchJob.Spec.ElasticPolicy.MinReplicas = workerReplicas - } - } -} - -// setPyTorchTypeNamesToCamelCase sets the name of all replica types from any case to correct case. -func setPyTorchTypeNamesToCamelCase(pytorchJob *PyTorchJob) { - replicaTypes := []ReplicaType{ - PyTorchJobReplicaTypeMaster, - PyTorchJobReplicaTypeWorker, - } - for _, replicaType := range replicaTypes { - setTypeNameToCamelCase(pytorchJob.Spec.PyTorchReplicaSpecs, replicaType) - } -} - -func setDefaultNprocPerNode(job *PyTorchJob) { - if (job.Spec.ElasticPolicy != nil && job.Spec.ElasticPolicy.NProcPerNode == nil) || (job.Spec.ElasticPolicy == nil) { - if job.Spec.NprocPerNode == nil { - job.Spec.NprocPerNode = &DefaultNprocPerNode - } - } -} - -// SetDefaults_PyTorchJob sets any unspecified values to defaults. -func SetDefaults_PyTorchJob(job *PyTorchJob) { - // Set default cleanpod policy to None. - if job.Spec.RunPolicy.CleanPodPolicy == nil { - job.Spec.RunPolicy.CleanPodPolicy = CleanPodPolicyPointer(CleanPodPolicyNone) - } - - // Update the key of PyTorchReplicaSpecs to camel case. - setPyTorchTypeNamesToCamelCase(job) - - for _, spec := range job.Spec.PyTorchReplicaSpecs { - setDefaultReplicas(spec, 1) - setDefaultRestartPolicy(spec, PyTorchJobDefaultRestartPolicy) - setPyTorchDefaultPort(&spec.Template.Spec) - } - // Set default elastic policy. - setElasticPolicy(job) - - // Set default nproc_per_node. - setDefaultNprocPerNode(job) -} diff --git a/pkg/apis/kubeflow.org/v1/pytorch_defaults_test.go b/pkg/apis/kubeflow.org/v1/pytorch_defaults_test.go deleted file mode 100644 index 9cf2423589..0000000000 --- a/pkg/apis/kubeflow.org/v1/pytorch_defaults_test.go +++ /dev/null @@ -1,192 +0,0 @@ -package v1 - -import ( - "testing" - - "github.com/onsi/ginkgo/v2" - "github.com/onsi/gomega" - "k8s.io/utils/ptr" -) - -func TestSetElasticPolicy(t *testing.T) { - gomega.RegisterFailHandler(ginkgo.Fail) - - type args struct { - job *PyTorchJob - } - type result struct { - expectedMinReplicas *int32 - expectedMaxReplicas *int32 - } - tests := []struct { - name string - args args - result result - }{ - { - name: "minReplicas and maxReplicas to null", - args: args{ - job: &PyTorchJob{ - Spec: PyTorchJobSpec{ - ElasticPolicy: &ElasticPolicy{}, - PyTorchReplicaSpecs: map[ReplicaType]*ReplicaSpec{ - PyTorchJobReplicaTypeWorker: { - Replicas: ptr.To[int32](1), - }, - }, - }, - }, - }, - result: result{ - expectedMinReplicas: ptr.To[int32](1), - expectedMaxReplicas: ptr.To[int32](1), - }, - }, - { - name: "minReplicas and maxReplicas to 1", - args: args{ - job: &PyTorchJob{ - Spec: PyTorchJobSpec{ - ElasticPolicy: &ElasticPolicy{ - MaxReplicas: ptr.To[int32](1), - MinReplicas: ptr.To[int32](1), - }, - PyTorchReplicaSpecs: map[ReplicaType]*ReplicaSpec{ - PyTorchJobReplicaTypeWorker: { - Replicas: ptr.To[int32](1), - }, - }, - }, - }, - }, - result: result{ - expectedMinReplicas: ptr.To[int32](1), - expectedMaxReplicas: ptr.To[int32](1), - }, - }, - { - name: "minReplicas and maxReplicas to 1", - args: args{ - job: &PyTorchJob{ - Spec: PyTorchJobSpec{ - ElasticPolicy: &ElasticPolicy{ - MaxReplicas: ptr.To[int32](1), - MinReplicas: ptr.To[int32](1), - }, - PyTorchReplicaSpecs: map[ReplicaType]*ReplicaSpec{ - PyTorchJobReplicaTypeWorker: { - Replicas: ptr.To[int32](1), - }, - }, - }, - }, - }, - result: result{ - expectedMinReplicas: ptr.To[int32](1), - expectedMaxReplicas: ptr.To[int32](1), - }, - }, - { - name: "minReplicas to null, maxRepliacs to 1", - args: args{ - job: &PyTorchJob{ - Spec: PyTorchJobSpec{ - ElasticPolicy: &ElasticPolicy{ - MaxReplicas: ptr.To[int32](1), - MinReplicas: nil, - }, - PyTorchReplicaSpecs: map[ReplicaType]*ReplicaSpec{ - PyTorchJobReplicaTypeWorker: { - Replicas: ptr.To[int32](1), - }, - }, - }, - }, - }, - result: result{ - expectedMinReplicas: ptr.To[int32](1), - expectedMaxReplicas: ptr.To[int32](1), - }, - }, - { - name: "maxRepliacs to null, minReplicas to 1", - args: args{ - job: &PyTorchJob{ - Spec: PyTorchJobSpec{ - ElasticPolicy: &ElasticPolicy{ - MaxReplicas: nil, - MinReplicas: ptr.To[int32](1), - }, - PyTorchReplicaSpecs: map[ReplicaType]*ReplicaSpec{ - PyTorchJobReplicaTypeWorker: { - Replicas: ptr.To[int32](1), - }, - }, - }, - }, - }, - result: result{ - expectedMinReplicas: ptr.To[int32](1), - expectedMaxReplicas: ptr.To[int32](1), - }, - }, - } - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - setElasticPolicy(test.args.job) - if test.result.expectedMinReplicas != nil { - gomega.Expect(test.args.job.Spec.ElasticPolicy.MinReplicas). - To(gomega.Equal(test.result.expectedMinReplicas)) - } else { - gomega.Expect(test.args.job.Spec.ElasticPolicy.MinReplicas). - To(gomega.BeNil()) - } - - if test.result.expectedMaxReplicas != nil { - gomega.Expect(test.args.job.Spec.ElasticPolicy.MaxReplicas). - To(gomega.Equal(test.result.expectedMaxReplicas)) - } else { - gomega.Expect(test.args.job.Spec.ElasticPolicy.MaxReplicas). - To(gomega.BeNil()) - } - }) - } -} - -func TestSetDefaultNprocPerNode(t *testing.T) { - gomega.RegisterFailHandler(ginkgo.Fail) - t.Run("test default nproc per node", func(t *testing.T) { - job := &PyTorchJob{ - Spec: PyTorchJobSpec{ - ElasticPolicy: &ElasticPolicy{ - NProcPerNode: nil, - }, - PyTorchReplicaSpecs: map[ReplicaType]*ReplicaSpec{ - PyTorchJobReplicaTypeWorker: { - Replicas: ptr.To[int32](1), - }, - }, - }, - } - - setDefaultNprocPerNode(job) - gomega.Expect(job.Spec.NprocPerNode). - To(gomega.Equal(&DefaultNprocPerNode)) - }) - t.Run("test default nproc per node", func(t *testing.T) { - job := &PyTorchJob{ - Spec: PyTorchJobSpec{ - ElasticPolicy: nil, - PyTorchReplicaSpecs: map[ReplicaType]*ReplicaSpec{ - PyTorchJobReplicaTypeWorker: { - Replicas: ptr.To[int32](1), - }, - }, - }, - } - - setDefaultNprocPerNode(job) - gomega.Expect(job.Spec.NprocPerNode). - To(gomega.Equal(&DefaultNprocPerNode)) - }) -} diff --git a/pkg/apis/kubeflow.org/v1/pytorch_types.go b/pkg/apis/kubeflow.org/v1/pytorch_types.go deleted file mode 100644 index 59d58c3b90..0000000000 --- a/pkg/apis/kubeflow.org/v1/pytorch_types.go +++ /dev/null @@ -1,172 +0,0 @@ -// Copyright 2020 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package v1 - -import ( - autoscalingv2 "k8s.io/api/autoscaling/v2" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" -) - -const ( - // PyTorchJobDefaultPortName is name of the port used to communicate between Master and - // workers. - PyTorchJobDefaultPortName = "pytorchjob-port" - // PyTorchJobDefaultContainerName is the name of the PyTorchJob container. - PyTorchJobDefaultContainerName = "pytorch" - // PyTorchJobDefaultPort is default value of the port. - PyTorchJobDefaultPort = 23456 - // PyTorchJobDefaultRestartPolicy is default RestartPolicy for PyTorchReplicaSpec. - PyTorchJobDefaultRestartPolicy = RestartPolicyOnFailure - // PyTorchJobKind is the kind name. - PyTorchJobKind = "PyTorchJob" - // PyTorchJobPlural is the PyTorchPlural for pytorchJob. - PyTorchJobPlural = "pytorchjobs" - // PyTorchJobSingular is the singular for pytorchJob. - PyTorchJobSingular = "pytorchjob" - // PyTorchJobFrameworkName is the name of the ML Framework - PyTorchJobFrameworkName = "pytorch" - // PyTorchJobReplicaTypeMaster is the type of Master of distributed PyTorch - PyTorchJobReplicaTypeMaster ReplicaType = "Master" - // PyTorchJobReplicaTypeWorker is the type for workers of distributed PyTorch. - PyTorchJobReplicaTypeWorker ReplicaType = "Worker" -) - -// +genclient -// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object -// +resource:path=pytorchjob -//+kubebuilder:object:root=true -//+kubebuilder:subresource:status -//+kubebuilder:printcolumn:name="State",type=string,JSONPath=`.status.conditions[-1:].type` -//+kubebuilder:printcolumn:name="Age",type=date,JSONPath=`.metadata.creationTimestamp` -// +kubebuilder:subresource:scale:specpath=.spec.pytorchReplicaSpecs.Worker.replicas,statuspath=.status.replicaStatuses.Worker.active,selectorpath=.status.replicaStatuses.Worker.selector - -// PyTorchJob Represents a PyTorchJob resource. -type PyTorchJob struct { - // Standard Kubernetes type metadata. - metav1.TypeMeta `json:",inline"` - - metav1.ObjectMeta `json:"metadata,omitempty"` - - // Specification of the desired state of the PyTorchJob. - Spec PyTorchJobSpec `json:"spec,omitempty"` - - // Most recently observed status of the PyTorchJob. - // Read-only (modified by the system). - Status JobStatus `json:"status,omitempty"` -} - -// For PyTorch launch/run related spec declaration, please see the following doc for more detail: -// https://pytorch.org/docs/stable/elastic/run.html -// Or run command `torchrun --help` for a brief description. - -// PyTorchJobSpec is a desired state description of the PyTorchJob. -type PyTorchJobSpec struct { - // RunPolicy encapsulates various runtime policies of the distributed training - // job, for example how to clean up resources and how long the job can stay - // active. - //+kubebuilder:validation:Optional - RunPolicy RunPolicy `json:"runPolicy"` - - ElasticPolicy *ElasticPolicy `json:"elasticPolicy,omitempty"` - - // A map of PyTorchReplicaType (type) to ReplicaSpec (value). Specifies the PyTorch cluster configuration. - // For example, - // { - // "Master": PyTorchReplicaSpec, - // "Worker": PyTorchReplicaSpec, - // } - PyTorchReplicaSpecs map[ReplicaType]*ReplicaSpec `json:"pytorchReplicaSpecs"` - - // Number of workers per node; supported values: [auto, cpu, gpu, int]. - // For more, https://github.com/pytorch/pytorch/blob/26f7f470df64d90e092081e39507e4ac751f55d6/torch/distributed/run.py#L629-L658. - // Defaults to auto. - NprocPerNode *string `json:"nprocPerNode,omitempty"` -} - -type ElasticPolicy struct { - // minReplicas is the lower limit for the number of replicas to which the training job - // can scale down. It defaults to null. - // +optional - MinReplicas *int32 `json:"minReplicas,omitempty"` - // upper limit for the number of pods that can be set by the autoscaler; cannot be smaller than MinReplicas, defaults to null. - // +optional - MaxReplicas *int32 `json:"maxReplicas,omitempty"` - - RDZVBackend *RDZVBackend `json:"rdzvBackend,omitempty"` - RDZVPort *int32 `json:"rdzvPort,omitempty"` - RDZVHost *string `json:"rdzvHost,omitempty"` - RDZVID *string `json:"rdzvId,omitempty"` - // RDZVConf contains additional rendezvous configuration (=,=,...). - RDZVConf []RDZVConf `json:"rdzvConf,omitempty"` - // Start a local standalone rendezvous backend that is represented by a C10d TCP store - // on port 29400. Useful when launching single-node, multi-worker job. If specified - // --rdzv_backend, --rdzv_endpoint, --rdzv_id are auto-assigned; any explicitly set values - // are ignored. - Standalone *bool `json:"standalone,omitempty"` - // Number of workers per node; supported values: [auto, cpu, gpu, int]. - // Deprecated: This API is deprecated in v1.7+ - // Use .spec.nprocPerNode instead. - NProcPerNode *int32 `json:"nProcPerNode,omitempty"` - - MaxRestarts *int32 `json:"maxRestarts,omitempty"` - - // Metrics contains the specifications which are used to calculate the - // desired replica count (the maximum replica count across all metrics will - // be used). The desired replica count is calculated with multiplying the - // ratio between the target value and the current value by the current - // number of pods. Ergo, metrics used must decrease as the pod count is - // increased, and vice-versa. See the individual metric source types for - // more information about how each type of metric must respond. - // If not set, the HPA will not be created. - // +optional - Metrics []autoscalingv2.MetricSpec `json:"metrics,omitempty"` -} - -type RDZVConf struct { - Key string `json:"key,omitempty"` - Value string `json:"value,omitempty"` -} - -type RDZVBackend string - -const ( - // BackendC10D is the rendezvous backend type for C10d. - BackendC10D RDZVBackend = "c10d" - // BackendETCD is the rendezvous backend type for ETCD. - BackendETCD RDZVBackend = "etcd" - // BackendETCDV2 is the rendezvous backend type for ETCD v2. - BackendETCDV2 RDZVBackend = "etcd-v2" -) - -// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object -// +resource:path=pytorchjobs -//+kubebuilder:object:root=true - -// PyTorchJobList is a list of PyTorchJobs. -type PyTorchJobList struct { - // Standard type metadata. - metav1.TypeMeta `json:",inline"` - - // Standard list metadata. - metav1.ListMeta `json:"metadata,omitempty"` - - // List of PyTorchJobs. - Items []PyTorchJob `json:"items"` -} - -func init() { - SchemeBuilder.Register(&PyTorchJob{}, &PyTorchJobList{}) - SchemeBuilder.SchemeBuilder.Register(addPyTorchDefaultingFuncs) -} diff --git a/pkg/apis/kubeflow.org/v1/tensorflow_defaults.go b/pkg/apis/kubeflow.org/v1/tensorflow_defaults.go deleted file mode 100644 index 7872c96bac..0000000000 --- a/pkg/apis/kubeflow.org/v1/tensorflow_defaults.go +++ /dev/null @@ -1,72 +0,0 @@ -// Copyright 2018 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package v1 - -import ( - corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/runtime" -) - -// addTensorflowDefaultingFuncs is used to register default funcs -func addTensorflowDefaultingFuncs(scheme *runtime.Scheme) error { - return RegisterDefaults(scheme) -} - -// setTensorflowDefaultPort sets the default ports for tensorflow container. -func setTensorflowDefaultPort(spec *corev1.PodSpec) { - index := getDefaultContainerIndex(spec, TFJobDefaultContainerName) - if ok := hasDefaultPort(spec, index, TFJobDefaultPortName); !ok { - setDefaultPort(spec, TFJobDefaultPortName, TFJobDefaultPort, index) - } -} - -// setTensorflowTypeNamesToCamelCase sets the name of all replica types from any case to correct case. -func setTensorflowTypeNamesToCamelCase(tfJob *TFJob) { - replicaTypes := []ReplicaType{ - TFJobReplicaTypePS, - TFJobReplicaTypeWorker, - TFJobReplicaTypeChief, - TFJobReplicaTypeMaster, - TFJobReplicaTypeEval, - } - for _, replicaType := range replicaTypes { - setTypeNameToCamelCase(tfJob.Spec.TFReplicaSpecs, replicaType) - } -} - -// SetDefaults_TFJob sets any unspecified values to defaults. -func SetDefaults_TFJob(tfJob *TFJob) { - // Set default cleanpod policy to None. - if tfJob.Spec.RunPolicy.CleanPodPolicy == nil { - tfJob.Spec.RunPolicy.CleanPodPolicy = CleanPodPolicyPointer(CleanPodPolicyNone) - } - // Set default success policy to "". - if tfJob.Spec.SuccessPolicy == nil { - defaultPolicy := SuccessPolicyDefault - tfJob.Spec.SuccessPolicy = &defaultPolicy - } - - // Update the key of TFReplicaSpecs to camel case. - setTensorflowTypeNamesToCamelCase(tfJob) - - for _, spec := range tfJob.Spec.TFReplicaSpecs { - // Set default replicas to 1. - setDefaultReplicas(spec, 1) - // Set default restartPolicy - setDefaultRestartPolicy(spec, TFJobDefaultRestartPolicy) - // Set default port to tensorFlow container. - setTensorflowDefaultPort(&spec.Template.Spec) - } -} diff --git a/pkg/apis/kubeflow.org/v1/tensorflow_defaults_test.go b/pkg/apis/kubeflow.org/v1/tensorflow_defaults_test.go deleted file mode 100644 index d77a0de2ad..0000000000 --- a/pkg/apis/kubeflow.org/v1/tensorflow_defaults_test.go +++ /dev/null @@ -1,272 +0,0 @@ -// Copyright 2018 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package v1 - -import ( - "reflect" - "testing" - - corev1 "k8s.io/api/core/v1" - "k8s.io/utils/ptr" -) - -func expectedTFJob(cleanPodPolicy CleanPodPolicy, restartPolicy RestartPolicy, portName string, port int32) *TFJob { - var ports []corev1.ContainerPort - - // port not set - if portName != "" { - ports = append(ports, - corev1.ContainerPort{ - Name: portName, - ContainerPort: port, - }, - ) - } - - // port set with custom name - if portName != TFJobDefaultPortName { - ports = append(ports, - corev1.ContainerPort{ - Name: TFJobDefaultPortName, - ContainerPort: TFJobDefaultPort, - }, - ) - } - - defaultSuccessPolicy := SuccessPolicyDefault - - return &TFJob{ - Spec: TFJobSpec{ - SuccessPolicy: &defaultSuccessPolicy, - RunPolicy: RunPolicy{ - CleanPodPolicy: &cleanPodPolicy, - }, - TFReplicaSpecs: map[ReplicaType]*ReplicaSpec{ - TFJobReplicaTypeWorker: { - Replicas: ptr.To[int32](1), - RestartPolicy: restartPolicy, - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: TFJobDefaultContainerName, - Image: testImage, - Ports: ports, - }, - }, - }, - }, - }, - }, - }, - } -} - -func TestSetTypeNames(t *testing.T) { - spec := &ReplicaSpec{ - RestartPolicy: RestartPolicyAlways, - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: TFJobDefaultContainerName, - Image: testImage, - Ports: []corev1.ContainerPort{ - { - Name: TFJobDefaultPortName, - ContainerPort: TFJobDefaultPort, - }, - }, - }, - }, - }, - }, - } - - workerUpperCase := ReplicaType("WORKER") - original := &TFJob{ - Spec: TFJobSpec{ - TFReplicaSpecs: map[ReplicaType]*ReplicaSpec{ - workerUpperCase: spec, - }, - }, - } - - setTensorflowTypeNamesToCamelCase(original) - if _, ok := original.Spec.TFReplicaSpecs[workerUpperCase]; ok { - t.Errorf("Failed to delete key %s", workerUpperCase) - } - if _, ok := original.Spec.TFReplicaSpecs[TFJobReplicaTypeWorker]; !ok { - t.Errorf("Failed to set key %s", TFJobReplicaTypeWorker) - } -} - -func TestSetDefaultTFJob(t *testing.T) { - customPortName := "customPort" - var customPort int32 = 1234 - customRestartPolicy := RestartPolicyAlways - - testCases := map[string]struct { - original *TFJob - expected *TFJob - }{ - "set replicas": { - original: &TFJob{ - Spec: TFJobSpec{ - TFReplicaSpecs: map[ReplicaType]*ReplicaSpec{ - TFJobReplicaTypeWorker: { - RestartPolicy: customRestartPolicy, - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: TFJobDefaultContainerName, - Image: testImage, - Ports: []corev1.ContainerPort{ - { - Name: TFJobDefaultPortName, - ContainerPort: TFJobDefaultPort, - }, - }, - }, - }, - }, - }, - }, - }, - }, - }, - expected: expectedTFJob(CleanPodPolicyNone, customRestartPolicy, TFJobDefaultPortName, TFJobDefaultPort), - }, - "set replicas with default restartpolicy": { - original: &TFJob{ - Spec: TFJobSpec{ - TFReplicaSpecs: map[ReplicaType]*ReplicaSpec{ - TFJobReplicaTypeWorker: { - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: TFJobDefaultContainerName, - Image: testImage, - Ports: []corev1.ContainerPort{ - { - Name: TFJobDefaultPortName, - ContainerPort: TFJobDefaultPort, - }, - }, - }, - }, - }, - }, - }, - }, - }, - }, - expected: expectedTFJob(CleanPodPolicyNone, TFJobDefaultRestartPolicy, TFJobDefaultPortName, TFJobDefaultPort), - }, - "set replicas with default port": { - original: &TFJob{ - Spec: TFJobSpec{ - TFReplicaSpecs: map[ReplicaType]*ReplicaSpec{ - TFJobReplicaTypeWorker: { - Replicas: ptr.To[int32](1), - RestartPolicy: customRestartPolicy, - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: TFJobDefaultContainerName, - Image: testImage, - }, - }, - }, - }, - }, - }, - }, - }, - expected: expectedTFJob(CleanPodPolicyNone, customRestartPolicy, "", 0), - }, - "set replicas adding default port": { - original: &TFJob{ - Spec: TFJobSpec{ - TFReplicaSpecs: map[ReplicaType]*ReplicaSpec{ - TFJobReplicaTypeWorker: { - Replicas: ptr.To[int32](1), - RestartPolicy: customRestartPolicy, - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: TFJobDefaultContainerName, - Image: testImage, - Ports: []corev1.ContainerPort{ - { - Name: customPortName, - ContainerPort: customPort, - }, - }, - }, - }, - }, - }, - }, - }, - }, - }, - expected: expectedTFJob(CleanPodPolicyNone, customRestartPolicy, customPortName, customPort), - }, - "set custom cleanpod policy": { - original: &TFJob{ - Spec: TFJobSpec{ - RunPolicy: RunPolicy{ - CleanPodPolicy: CleanPodPolicyPointer(CleanPodPolicyAll), - }, - TFReplicaSpecs: map[ReplicaType]*ReplicaSpec{ - TFJobReplicaTypeWorker: { - Replicas: ptr.To[int32](1), - RestartPolicy: customRestartPolicy, - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: TFJobDefaultContainerName, - Image: testImage, - Ports: []corev1.ContainerPort{ - { - Name: customPortName, - ContainerPort: customPort, - }, - }, - }, - }, - }, - }, - }, - }, - }, - }, - expected: expectedTFJob(CleanPodPolicyAll, customRestartPolicy, customPortName, customPort), - }, - } - - for name, tc := range testCases { - SetDefaults_TFJob(tc.original) - if !reflect.DeepEqual(tc.original, tc.expected) { - t.Errorf("%s: Want\n%v; Got\n %v", name, tc.expected, tc.original) - } - } -} diff --git a/pkg/apis/kubeflow.org/v1/tensorflow_types.go b/pkg/apis/kubeflow.org/v1/tensorflow_types.go deleted file mode 100644 index 7ee0a81d3a..0000000000 --- a/pkg/apis/kubeflow.org/v1/tensorflow_types.go +++ /dev/null @@ -1,150 +0,0 @@ -// Copyright 2020 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package v1 - -import ( - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" -) - -const ( - // TFJobDefaultPortName is name of the port used to communicate between PS and - // workers. - TFJobDefaultPortName = "tfjob-port" - // TFJobDefaultContainerName is the name of the TFJob container. - TFJobDefaultContainerName = "tensorflow" - // TFJobDefaultPort is default value of the port. - TFJobDefaultPort = 2222 - // TFJobDefaultRestartPolicy is default RestartPolicy for TFReplicaSpec. - TFJobDefaultRestartPolicy = RestartPolicyNever - // TFJobKind is the kind name. - TFJobKind = "TFJob" - // TFJobPlural is the TensorflowPlural for TFJob. - TFJobPlural = "tfjobs" - // TFJobSingular is the singular for TFJob. - TFJobSingular = "tfjob" - // TFJobFrameworkName is the name of the ML Framework - TFJobFrameworkName = "tensorflow" -) - -// +genclient -// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object -// +resource:path=tfjob -//+kubebuilder:object:root=true -//+kubebuilder:subresource:status -//+kubebuilder:printcolumn:name="State",type=string,JSONPath=`.status.conditions[-1:].type` -//+kubebuilder:printcolumn:name="Age",type=date,JSONPath=`.metadata.creationTimestamp` - -// TFJob represents a TFJob resource. -type TFJob struct { - // Standard Kubernetes type metadata. - metav1.TypeMeta `json:",inline"` - - // +optional - metav1.ObjectMeta `json:"metadata,omitempty"` - - // Specification of the desired state of the TFJob. - // +optional - Spec TFJobSpec `json:"spec,omitempty"` - - // Most recently observed status of the TFJob. - // Populated by the system. - // Read-only. - // +optional - Status JobStatus `json:"status,omitempty"` -} - -// TFJobSpec is a desired state description of the TFJob. -type TFJobSpec struct { - // RunPolicy encapsulates various runtime policies of the distributed training - // job, for example how to clean up resources and how long the job can stay - // active. - //+kubebuilder:validation:Optional - RunPolicy RunPolicy `json:"runPolicy"` - - // SuccessPolicy defines the policy to mark the TFJob as succeeded. - // Default to "", using the default rules. - // +optional - SuccessPolicy *SuccessPolicy `json:"successPolicy,omitempty"` - - // A map of TFReplicaType (type) to ReplicaSpec (value). Specifies the TF cluster configuration. - // For example, - // { - // "PS": ReplicaSpec, - // "Worker": ReplicaSpec, - // } - TFReplicaSpecs map[ReplicaType]*ReplicaSpec `json:"tfReplicaSpecs"` - - // A switch to enable dynamic worker - EnableDynamicWorker bool `json:"enableDynamicWorker,omitempty"` -} - -// SuccessPolicy is the success policy. -type SuccessPolicy string - -const ( - SuccessPolicyDefault SuccessPolicy = "" - SuccessPolicyAllWorkers SuccessPolicy = "AllWorkers" -) - -// TFReplicaType is the type for TFReplica. Can be one of: "Chief"/"Master" (semantically equivalent), -// "Worker", "PS", or "Evaluator". - -const ( - // TFJobReplicaTypePS is the type for parameter servers of distributed TensorFlow. - TFJobReplicaTypePS ReplicaType = "PS" - - // TFJobReplicaTypeWorker is the type for workers of distributed TensorFlow. - // This is also used for non-distributed TensorFlow. - TFJobReplicaTypeWorker ReplicaType = "Worker" - - // TFJobReplicaTypeChief is the type for chief worker of distributed TensorFlow. - // If there is "chief" replica type, it's the "chief worker". - // Else, worker:0 is the chief worker. - TFJobReplicaTypeChief ReplicaType = "Chief" - - // TFJobReplicaTypeMaster is the type for master worker of distributed TensorFlow. - // This is similar to chief, and kept just for backwards compatibility. - TFJobReplicaTypeMaster ReplicaType = "Master" - - // TFJobReplicaTypeEval is the type for evaluation replica in TensorFlow. - TFJobReplicaTypeEval ReplicaType = "Evaluator" -) - -// IsChiefOrMaster returns true if the type is Master or Chief. -func IsChiefOrMaster(typ ReplicaType) bool { - return typ == TFJobReplicaTypeChief || typ == TFJobReplicaTypeMaster -} - -// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object -// +resource:path=tfjobs -// +kubebuilder:object:root=true - -// TFJobList is a list of TFJobs. -type TFJobList struct { - // Standard type metadata. - metav1.TypeMeta `json:",inline"` - - // Standard list metadata. - // +optional - metav1.ListMeta `json:"metadata,omitempty"` - - // List of TFJobs. - Items []TFJob `json:"items"` -} - -func init() { - SchemeBuilder.Register(&TFJob{}, &TFJobList{}) - SchemeBuilder.SchemeBuilder.Register(addTensorflowDefaultingFuncs) -} diff --git a/pkg/apis/kubeflow.org/v1/tensorflow_types_test.go b/pkg/apis/kubeflow.org/v1/tensorflow_types_test.go deleted file mode 100644 index 4f4e289843..0000000000 --- a/pkg/apis/kubeflow.org/v1/tensorflow_types_test.go +++ /dev/null @@ -1,45 +0,0 @@ -/* -Copyright 2024 The Kubeflow Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package v1 - -import "testing" - -func TestIsChiefOrMaster(t *testing.T) { - tc := []struct { - Type ReplicaType - Expected bool - }{ - { - Type: TFJobReplicaTypeChief, - Expected: true, - }, - { - Type: TFJobReplicaTypeMaster, - Expected: true, - }, - { - Type: TFJobReplicaTypeWorker, - Expected: false, - }, - } - for _, c := range tc { - actual := IsChiefOrMaster(c.Type) - if actual != c.Expected { - t.Errorf("Expected %v; Got %v", c.Expected, actual) - } - } -} diff --git a/pkg/apis/kubeflow.org/v1/validation_test_utils.go b/pkg/apis/kubeflow.org/v1/validation_test_utils.go deleted file mode 100644 index e6d9ae2314..0000000000 --- a/pkg/apis/kubeflow.org/v1/validation_test_utils.go +++ /dev/null @@ -1,25 +0,0 @@ -package v1 - -import ( - "encoding/json" - "fmt" - - log "github.com/sirupsen/logrus" -) - -const ( - testImage = "test-image:latest" -) - -// pformat returns a pretty format output of any value that can be marshaled to JSON. -func pformat(value interface{}) string { - if s, ok := value.(string); ok { - return s - } - valueJSON, err := json.MarshalIndent(value, "", " ") - if err != nil { - log.Warningf("Couldn't pretty format %v, error: %v", value, err) - return fmt.Sprintf("%v", value) - } - return string(valueJSON) -} diff --git a/pkg/apis/kubeflow.org/v1/xgboost_defaults.go b/pkg/apis/kubeflow.org/v1/xgboost_defaults.go deleted file mode 100644 index c1b7233720..0000000000 --- a/pkg/apis/kubeflow.org/v1/xgboost_defaults.go +++ /dev/null @@ -1,63 +0,0 @@ -// Copyright 2018 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package v1 - -import ( - corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/runtime" -) - -func addXGBoostJobDefaultingFuncs(scheme *runtime.Scheme) error { - return RegisterDefaults(scheme) -} - -// setXGBoostJobDefaultPort sets the default ports for xgboost container. -func setXGBoostJobDefaultPort(spec *corev1.PodSpec) { - index := getDefaultContainerIndex(spec, XGBoostJobDefaultContainerName) - if ok := hasDefaultPort(spec, index, XGBoostJobDefaultPortName); !ok { - setDefaultPort(spec, XGBoostJobDefaultPortName, XGBoostJobDefaultPort, index) - } -} - -// setXGBoostJobTypeNamesToCamelCase sets the name of all replica types from any case to correct case. -func setXGBoostJobTypeNamesToCamelCase(xgboostJob *XGBoostJob) { - replicaTypes := []ReplicaType{ - XGBoostJobReplicaTypeMaster, - XGBoostJobReplicaTypeWorker, - } - for _, replicaType := range replicaTypes { - setTypeNameToCamelCase(xgboostJob.Spec.XGBReplicaSpecs, replicaType) - } -} - -// SetDefaults_XGBoostJob sets any unspecified values to defaults. -func SetDefaults_XGBoostJob(xgboostJob *XGBoostJob) { - // Set default cleanpod policy to None. - if xgboostJob.Spec.RunPolicy.CleanPodPolicy == nil { - xgboostJob.Spec.RunPolicy.CleanPodPolicy = CleanPodPolicyPointer(CleanPodPolicyNone) - } - - // Update the key of XGBoostReplicaSpecs to camel case. - setXGBoostJobTypeNamesToCamelCase(xgboostJob) - - for _, spec := range xgboostJob.Spec.XGBReplicaSpecs { - // Set default replicas to 1. - setDefaultReplicas(spec, 1) - // Set default restartPolicy - setDefaultRestartPolicy(spec, XGBoostJobDefaultRestartPolicy) - // Set default port to xgboost container. - setXGBoostJobDefaultPort(&spec.Template.Spec) - } -} diff --git a/pkg/apis/kubeflow.org/v1/xgboost_defaults_test.go b/pkg/apis/kubeflow.org/v1/xgboost_defaults_test.go deleted file mode 100644 index 4f1ddbc342..0000000000 --- a/pkg/apis/kubeflow.org/v1/xgboost_defaults_test.go +++ /dev/null @@ -1,233 +0,0 @@ -// Copyright 2018 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package v1 - -import ( - "reflect" - "testing" - - corev1 "k8s.io/api/core/v1" - "k8s.io/utils/ptr" -) - -func expectedXGBoostJob(cleanPodPolicy CleanPodPolicy, restartPolicy RestartPolicy, replicas int32, portName string, port int32) *XGBoostJob { - var ports []corev1.ContainerPort - - // port not set - if portName != "" { - ports = append(ports, - corev1.ContainerPort{ - Name: portName, - ContainerPort: port, - }, - ) - } - - // port set with custom name - if portName != XGBoostJobDefaultPortName { - ports = append(ports, - corev1.ContainerPort{ - Name: XGBoostJobDefaultPortName, - ContainerPort: XGBoostJobDefaultPort, - }, - ) - } - - return &XGBoostJob{ - Spec: XGBoostJobSpec{ - RunPolicy: RunPolicy{ - CleanPodPolicy: &cleanPodPolicy, - }, - XGBReplicaSpecs: map[ReplicaType]*ReplicaSpec{ - XGBoostJobReplicaTypeWorker: { - Replicas: ptr.To[int32](replicas), - RestartPolicy: restartPolicy, - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: XGBoostJobDefaultContainerName, - Image: testImage, - Ports: ports, - }, - }, - }, - }, - }, - }, - }, - } -} - -func TestSetDefaults_XGBoostJob(t *testing.T) { - testCases := map[string]struct { - original *XGBoostJob - expected *XGBoostJob - }{ - "set spec with minimum setting": { - original: &XGBoostJob{ - Spec: XGBoostJobSpec{ - XGBReplicaSpecs: map[ReplicaType]*ReplicaSpec{ - XGBoostJobReplicaTypeWorker: { - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: XGBoostJobDefaultContainerName, - Image: testImage, - }, - }, - }, - }, - }, - }, - }, - }, - expected: expectedXGBoostJob(CleanPodPolicyNone, XGBoostJobDefaultRestartPolicy, 1, XGBoostJobDefaultPortName, XGBoostJobDefaultPort), - }, - "Set spec with restart policy": { - original: &XGBoostJob{ - Spec: XGBoostJobSpec{ - XGBReplicaSpecs: map[ReplicaType]*ReplicaSpec{ - XGBoostJobReplicaTypeWorker: { - RestartPolicy: RestartPolicyOnFailure, - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: XGBoostJobDefaultContainerName, - Image: testImage, - }, - }, - }, - }, - }, - }, - }, - }, - expected: expectedXGBoostJob(CleanPodPolicyNone, RestartPolicyOnFailure, 1, XGBoostJobDefaultPortName, XGBoostJobDefaultPort), - }, - "Set spec with replicas": { - original: &XGBoostJob{ - Spec: XGBoostJobSpec{ - XGBReplicaSpecs: map[ReplicaType]*ReplicaSpec{ - XGBoostJobReplicaTypeWorker: { - Replicas: ptr.To[int32](3), - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: XGBoostJobDefaultContainerName, - Image: testImage, - }, - }, - }, - }, - }, - }, - }, - }, - expected: expectedXGBoostJob(CleanPodPolicyNone, XGBoostJobDefaultRestartPolicy, 3, XGBoostJobDefaultPortName, XGBoostJobDefaultPort), - }, - - "Set spec with default node port name and port": { - original: &XGBoostJob{ - Spec: XGBoostJobSpec{ - XGBReplicaSpecs: map[ReplicaType]*ReplicaSpec{ - XGBoostJobReplicaTypeWorker: { - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: XGBoostJobDefaultContainerName, - Image: testImage, - Ports: []corev1.ContainerPort{ - { - Name: XGBoostJobDefaultPortName, - ContainerPort: XGBoostJobDefaultPort, - }, - }, - }, - }, - }, - }, - }, - }, - }, - }, - expected: expectedXGBoostJob(CleanPodPolicyNone, XGBoostJobDefaultRestartPolicy, 1, XGBoostJobDefaultPortName, XGBoostJobDefaultPort), - }, - - "Set spec with node port": { - original: &XGBoostJob{ - Spec: XGBoostJobSpec{ - XGBReplicaSpecs: map[ReplicaType]*ReplicaSpec{ - XGBoostJobReplicaTypeWorker: { - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: XGBoostJobDefaultContainerName, - Image: testImage, - Ports: []corev1.ContainerPort{ - { - Name: XGBoostJobDefaultPortName, - ContainerPort: 9999, - }, - }, - }, - }, - }, - }, - }, - }, - }, - }, - expected: expectedXGBoostJob(CleanPodPolicyNone, XGBoostJobDefaultRestartPolicy, 1, XGBoostJobDefaultPortName, 9999), - }, - "set spec with cleanpod policy": { - original: &XGBoostJob{ - Spec: XGBoostJobSpec{ - RunPolicy: RunPolicy{ - CleanPodPolicy: CleanPodPolicyPointer(CleanPodPolicyAll), - }, - XGBReplicaSpecs: map[ReplicaType]*ReplicaSpec{ - XGBoostJobReplicaTypeWorker: { - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: XGBoostJobDefaultContainerName, - Image: testImage, - }, - }, - }, - }, - }, - }, - }, - }, - expected: expectedXGBoostJob(CleanPodPolicyAll, XGBoostJobDefaultRestartPolicy, 1, XGBoostJobDefaultPortName, XGBoostJobDefaultPort), - }, - } - - for name, tc := range testCases { - SetDefaults_XGBoostJob(tc.original) - if !reflect.DeepEqual(tc.original, tc.expected) { - t.Errorf("%s: Want\n%v; Got\n %v", name, pformat(tc.expected), pformat(tc.original)) - } - } - -} diff --git a/pkg/apis/kubeflow.org/v1/xgboost_types.go b/pkg/apis/kubeflow.org/v1/xgboost_types.go deleted file mode 100644 index 53039aa310..0000000000 --- a/pkg/apis/kubeflow.org/v1/xgboost_types.go +++ /dev/null @@ -1,87 +0,0 @@ -// Copyright 2021 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package v1 - -import ( - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" -) - -// EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN! -// NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized. - -const ( - // XGBoostJobDefaultPortName is name of the port used to communicate between Master and Workers. - XGBoostJobDefaultPortName = "xgboostjob-port" - // XGBoostJobDefaultContainerName is the name of the XGBoostJob container. - XGBoostJobDefaultContainerName = "xgboost" - // XGBoostJobDefaultPort is default value of the port. - XGBoostJobDefaultPort = 9999 - // XGBoostJobDefaultRestartPolicy is default RestartPolicy for XGBReplicaSpecs. - XGBoostJobDefaultRestartPolicy = RestartPolicyNever - // XGBoostJobKind is the kind name. - XGBoostJobKind = "XGBoostJob" - // XGBoostJobPlural is the XGBoostJobPlural for XGBoostJob. - XGBoostJobPlural = "xgboostjobs" - // XGBoostJobSingular is the singular for XGBoostJob. - XGBoostJobSingular = "xgboostjob" - // XGBoostJobFrameworkName is the name of the ML Framework - XGBoostJobFrameworkName = "xgboost" - // XGBoostJobReplicaTypeMaster is the type for master replica. - XGBoostJobReplicaTypeMaster ReplicaType = "Master" - // XGBoostJobReplicaTypeWorker is the type for worker replicas. - XGBoostJobReplicaTypeWorker ReplicaType = "Worker" -) - -// XGBoostJobSpec defines the desired state of XGBoostJob -type XGBoostJobSpec struct { - // INSERT ADDITIONAL SPEC FIELDS - desired state of cluster - // Important: Run "make" to regenerate code after modifying this file - //+kubebuilder:validation:Optional - RunPolicy RunPolicy `json:"runPolicy"` - - XGBReplicaSpecs map[ReplicaType]*ReplicaSpec `json:"xgbReplicaSpecs"` -} - -//+kubebuilder:object:root=true -//+kubebuilder:subresource:status -//+kubebuilder:printcolumn:name="State",type=string,JSONPath=`.status.conditions[-1:].type` -//+kubebuilder:printcolumn:name="Age",type=date,JSONPath=`.metadata.creationTimestamp` -// +genclient -// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object - -// XGBoostJob is the Schema for the xgboostjobs API -// +k8s:openapi-gen=true -type XGBoostJob struct { - metav1.TypeMeta `json:",inline"` - metav1.ObjectMeta `json:"metadata,omitempty"` - - Spec XGBoostJobSpec `json:"spec,omitempty"` - Status JobStatus `json:"status,omitempty"` -} - -//+kubebuilder:object:root=true -// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object - -// XGBoostJobList contains a list of XGBoostJob -type XGBoostJobList struct { - metav1.TypeMeta `json:",inline"` - metav1.ListMeta `json:"metadata,omitempty"` - Items []XGBoostJob `json:"items"` -} - -func init() { - SchemeBuilder.Register(&XGBoostJob{}, &XGBoostJobList{}) - SchemeBuilder.SchemeBuilder.Register(addXGBoostJobDefaultingFuncs) -} diff --git a/pkg/apis/kubeflow.org/v1/zz_generated.deepcopy.go b/pkg/apis/kubeflow.org/v1/zz_generated.deepcopy.go deleted file mode 100644 index 3153e31a7b..0000000000 --- a/pkg/apis/kubeflow.org/v1/zz_generated.deepcopy.go +++ /dev/null @@ -1,942 +0,0 @@ -//go:build !ignore_autogenerated -// +build !ignore_autogenerated - -// Copyright 2024 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by deepcopy-gen. DO NOT EDIT. - -package v1 - -import ( - v2 "k8s.io/api/autoscaling/v2" - corev1 "k8s.io/api/core/v1" - resource "k8s.io/apimachinery/pkg/api/resource" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - runtime "k8s.io/apimachinery/pkg/runtime" -) - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *ElasticPolicy) DeepCopyInto(out *ElasticPolicy) { - *out = *in - if in.MinReplicas != nil { - in, out := &in.MinReplicas, &out.MinReplicas - *out = new(int32) - **out = **in - } - if in.MaxReplicas != nil { - in, out := &in.MaxReplicas, &out.MaxReplicas - *out = new(int32) - **out = **in - } - if in.RDZVBackend != nil { - in, out := &in.RDZVBackend, &out.RDZVBackend - *out = new(RDZVBackend) - **out = **in - } - if in.RDZVPort != nil { - in, out := &in.RDZVPort, &out.RDZVPort - *out = new(int32) - **out = **in - } - if in.RDZVHost != nil { - in, out := &in.RDZVHost, &out.RDZVHost - *out = new(string) - **out = **in - } - if in.RDZVID != nil { - in, out := &in.RDZVID, &out.RDZVID - *out = new(string) - **out = **in - } - if in.RDZVConf != nil { - in, out := &in.RDZVConf, &out.RDZVConf - *out = make([]RDZVConf, len(*in)) - copy(*out, *in) - } - if in.Standalone != nil { - in, out := &in.Standalone, &out.Standalone - *out = new(bool) - **out = **in - } - if in.NProcPerNode != nil { - in, out := &in.NProcPerNode, &out.NProcPerNode - *out = new(int32) - **out = **in - } - if in.MaxRestarts != nil { - in, out := &in.MaxRestarts, &out.MaxRestarts - *out = new(int32) - **out = **in - } - if in.Metrics != nil { - in, out := &in.Metrics, &out.Metrics - *out = make([]v2.MetricSpec, len(*in)) - for i := range *in { - (*in)[i].DeepCopyInto(&(*out)[i]) - } - } - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ElasticPolicy. -func (in *ElasticPolicy) DeepCopy() *ElasticPolicy { - if in == nil { - return nil - } - out := new(ElasticPolicy) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *JAXJob) DeepCopyInto(out *JAXJob) { - *out = *in - out.TypeMeta = in.TypeMeta - in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) - in.Spec.DeepCopyInto(&out.Spec) - in.Status.DeepCopyInto(&out.Status) - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new JAXJob. -func (in *JAXJob) DeepCopy() *JAXJob { - if in == nil { - return nil - } - out := new(JAXJob) - in.DeepCopyInto(out) - return out -} - -// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. -func (in *JAXJob) DeepCopyObject() runtime.Object { - if c := in.DeepCopy(); c != nil { - return c - } - return nil -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *JAXJobList) DeepCopyInto(out *JAXJobList) { - *out = *in - out.TypeMeta = in.TypeMeta - in.ListMeta.DeepCopyInto(&out.ListMeta) - if in.Items != nil { - in, out := &in.Items, &out.Items - *out = make([]JAXJob, len(*in)) - for i := range *in { - (*in)[i].DeepCopyInto(&(*out)[i]) - } - } - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new JAXJobList. -func (in *JAXJobList) DeepCopy() *JAXJobList { - if in == nil { - return nil - } - out := new(JAXJobList) - in.DeepCopyInto(out) - return out -} - -// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. -func (in *JAXJobList) DeepCopyObject() runtime.Object { - if c := in.DeepCopy(); c != nil { - return c - } - return nil -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *JAXJobSpec) DeepCopyInto(out *JAXJobSpec) { - *out = *in - in.RunPolicy.DeepCopyInto(&out.RunPolicy) - if in.JAXReplicaSpecs != nil { - in, out := &in.JAXReplicaSpecs, &out.JAXReplicaSpecs - *out = make(map[ReplicaType]*ReplicaSpec, len(*in)) - for key, val := range *in { - var outVal *ReplicaSpec - if val == nil { - (*out)[key] = nil - } else { - in, out := &val, &outVal - *out = new(ReplicaSpec) - (*in).DeepCopyInto(*out) - } - (*out)[key] = outVal - } - } - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new JAXJobSpec. -func (in *JAXJobSpec) DeepCopy() *JAXJobSpec { - if in == nil { - return nil - } - out := new(JAXJobSpec) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *JobCondition) DeepCopyInto(out *JobCondition) { - *out = *in - in.LastUpdateTime.DeepCopyInto(&out.LastUpdateTime) - in.LastTransitionTime.DeepCopyInto(&out.LastTransitionTime) - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new JobCondition. -func (in *JobCondition) DeepCopy() *JobCondition { - if in == nil { - return nil - } - out := new(JobCondition) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *JobStatus) DeepCopyInto(out *JobStatus) { - *out = *in - if in.Conditions != nil { - in, out := &in.Conditions, &out.Conditions - *out = make([]JobCondition, len(*in)) - for i := range *in { - (*in)[i].DeepCopyInto(&(*out)[i]) - } - } - if in.ReplicaStatuses != nil { - in, out := &in.ReplicaStatuses, &out.ReplicaStatuses - *out = make(map[ReplicaType]*ReplicaStatus, len(*in)) - for key, val := range *in { - var outVal *ReplicaStatus - if val == nil { - (*out)[key] = nil - } else { - in, out := &val, &outVal - *out = new(ReplicaStatus) - (*in).DeepCopyInto(*out) - } - (*out)[key] = outVal - } - } - if in.StartTime != nil { - in, out := &in.StartTime, &out.StartTime - *out = (*in).DeepCopy() - } - if in.CompletionTime != nil { - in, out := &in.CompletionTime, &out.CompletionTime - *out = (*in).DeepCopy() - } - if in.LastReconcileTime != nil { - in, out := &in.LastReconcileTime, &out.LastReconcileTime - *out = (*in).DeepCopy() - } - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new JobStatus. -func (in *JobStatus) DeepCopy() *JobStatus { - if in == nil { - return nil - } - out := new(JobStatus) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *MPIJob) DeepCopyInto(out *MPIJob) { - *out = *in - out.TypeMeta = in.TypeMeta - in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) - in.Spec.DeepCopyInto(&out.Spec) - in.Status.DeepCopyInto(&out.Status) - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MPIJob. -func (in *MPIJob) DeepCopy() *MPIJob { - if in == nil { - return nil - } - out := new(MPIJob) - in.DeepCopyInto(out) - return out -} - -// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. -func (in *MPIJob) DeepCopyObject() runtime.Object { - if c := in.DeepCopy(); c != nil { - return c - } - return nil -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *MPIJobList) DeepCopyInto(out *MPIJobList) { - *out = *in - out.TypeMeta = in.TypeMeta - in.ListMeta.DeepCopyInto(&out.ListMeta) - if in.Items != nil { - in, out := &in.Items, &out.Items - *out = make([]MPIJob, len(*in)) - for i := range *in { - (*in)[i].DeepCopyInto(&(*out)[i]) - } - } - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MPIJobList. -func (in *MPIJobList) DeepCopy() *MPIJobList { - if in == nil { - return nil - } - out := new(MPIJobList) - in.DeepCopyInto(out) - return out -} - -// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. -func (in *MPIJobList) DeepCopyObject() runtime.Object { - if c := in.DeepCopy(); c != nil { - return c - } - return nil -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *MPIJobSpec) DeepCopyInto(out *MPIJobSpec) { - *out = *in - if in.SlotsPerWorker != nil { - in, out := &in.SlotsPerWorker, &out.SlotsPerWorker - *out = new(int32) - **out = **in - } - if in.CleanPodPolicy != nil { - in, out := &in.CleanPodPolicy, &out.CleanPodPolicy - *out = new(CleanPodPolicy) - **out = **in - } - if in.MPIReplicaSpecs != nil { - in, out := &in.MPIReplicaSpecs, &out.MPIReplicaSpecs - *out = make(map[ReplicaType]*ReplicaSpec, len(*in)) - for key, val := range *in { - var outVal *ReplicaSpec - if val == nil { - (*out)[key] = nil - } else { - in, out := &val, &outVal - *out = new(ReplicaSpec) - (*in).DeepCopyInto(*out) - } - (*out)[key] = outVal - } - } - in.RunPolicy.DeepCopyInto(&out.RunPolicy) - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MPIJobSpec. -func (in *MPIJobSpec) DeepCopy() *MPIJobSpec { - if in == nil { - return nil - } - out := new(MPIJobSpec) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *PaddleElasticPolicy) DeepCopyInto(out *PaddleElasticPolicy) { - *out = *in - if in.MinReplicas != nil { - in, out := &in.MinReplicas, &out.MinReplicas - *out = new(int32) - **out = **in - } - if in.MaxReplicas != nil { - in, out := &in.MaxReplicas, &out.MaxReplicas - *out = new(int32) - **out = **in - } - if in.MaxRestarts != nil { - in, out := &in.MaxRestarts, &out.MaxRestarts - *out = new(int32) - **out = **in - } - if in.Metrics != nil { - in, out := &in.Metrics, &out.Metrics - *out = make([]v2.MetricSpec, len(*in)) - for i := range *in { - (*in)[i].DeepCopyInto(&(*out)[i]) - } - } - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PaddleElasticPolicy. -func (in *PaddleElasticPolicy) DeepCopy() *PaddleElasticPolicy { - if in == nil { - return nil - } - out := new(PaddleElasticPolicy) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *PaddleJob) DeepCopyInto(out *PaddleJob) { - *out = *in - out.TypeMeta = in.TypeMeta - in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) - in.Spec.DeepCopyInto(&out.Spec) - in.Status.DeepCopyInto(&out.Status) - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PaddleJob. -func (in *PaddleJob) DeepCopy() *PaddleJob { - if in == nil { - return nil - } - out := new(PaddleJob) - in.DeepCopyInto(out) - return out -} - -// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. -func (in *PaddleJob) DeepCopyObject() runtime.Object { - if c := in.DeepCopy(); c != nil { - return c - } - return nil -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *PaddleJobList) DeepCopyInto(out *PaddleJobList) { - *out = *in - out.TypeMeta = in.TypeMeta - in.ListMeta.DeepCopyInto(&out.ListMeta) - if in.Items != nil { - in, out := &in.Items, &out.Items - *out = make([]PaddleJob, len(*in)) - for i := range *in { - (*in)[i].DeepCopyInto(&(*out)[i]) - } - } - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PaddleJobList. -func (in *PaddleJobList) DeepCopy() *PaddleJobList { - if in == nil { - return nil - } - out := new(PaddleJobList) - in.DeepCopyInto(out) - return out -} - -// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. -func (in *PaddleJobList) DeepCopyObject() runtime.Object { - if c := in.DeepCopy(); c != nil { - return c - } - return nil -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *PaddleJobSpec) DeepCopyInto(out *PaddleJobSpec) { - *out = *in - in.RunPolicy.DeepCopyInto(&out.RunPolicy) - if in.ElasticPolicy != nil { - in, out := &in.ElasticPolicy, &out.ElasticPolicy - *out = new(PaddleElasticPolicy) - (*in).DeepCopyInto(*out) - } - if in.PaddleReplicaSpecs != nil { - in, out := &in.PaddleReplicaSpecs, &out.PaddleReplicaSpecs - *out = make(map[ReplicaType]*ReplicaSpec, len(*in)) - for key, val := range *in { - var outVal *ReplicaSpec - if val == nil { - (*out)[key] = nil - } else { - in, out := &val, &outVal - *out = new(ReplicaSpec) - (*in).DeepCopyInto(*out) - } - (*out)[key] = outVal - } - } - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PaddleJobSpec. -func (in *PaddleJobSpec) DeepCopy() *PaddleJobSpec { - if in == nil { - return nil - } - out := new(PaddleJobSpec) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *PyTorchJob) DeepCopyInto(out *PyTorchJob) { - *out = *in - out.TypeMeta = in.TypeMeta - in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) - in.Spec.DeepCopyInto(&out.Spec) - in.Status.DeepCopyInto(&out.Status) - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PyTorchJob. -func (in *PyTorchJob) DeepCopy() *PyTorchJob { - if in == nil { - return nil - } - out := new(PyTorchJob) - in.DeepCopyInto(out) - return out -} - -// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. -func (in *PyTorchJob) DeepCopyObject() runtime.Object { - if c := in.DeepCopy(); c != nil { - return c - } - return nil -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *PyTorchJobList) DeepCopyInto(out *PyTorchJobList) { - *out = *in - out.TypeMeta = in.TypeMeta - in.ListMeta.DeepCopyInto(&out.ListMeta) - if in.Items != nil { - in, out := &in.Items, &out.Items - *out = make([]PyTorchJob, len(*in)) - for i := range *in { - (*in)[i].DeepCopyInto(&(*out)[i]) - } - } - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PyTorchJobList. -func (in *PyTorchJobList) DeepCopy() *PyTorchJobList { - if in == nil { - return nil - } - out := new(PyTorchJobList) - in.DeepCopyInto(out) - return out -} - -// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. -func (in *PyTorchJobList) DeepCopyObject() runtime.Object { - if c := in.DeepCopy(); c != nil { - return c - } - return nil -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *PyTorchJobSpec) DeepCopyInto(out *PyTorchJobSpec) { - *out = *in - in.RunPolicy.DeepCopyInto(&out.RunPolicy) - if in.ElasticPolicy != nil { - in, out := &in.ElasticPolicy, &out.ElasticPolicy - *out = new(ElasticPolicy) - (*in).DeepCopyInto(*out) - } - if in.PyTorchReplicaSpecs != nil { - in, out := &in.PyTorchReplicaSpecs, &out.PyTorchReplicaSpecs - *out = make(map[ReplicaType]*ReplicaSpec, len(*in)) - for key, val := range *in { - var outVal *ReplicaSpec - if val == nil { - (*out)[key] = nil - } else { - in, out := &val, &outVal - *out = new(ReplicaSpec) - (*in).DeepCopyInto(*out) - } - (*out)[key] = outVal - } - } - if in.NprocPerNode != nil { - in, out := &in.NprocPerNode, &out.NprocPerNode - *out = new(string) - **out = **in - } - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PyTorchJobSpec. -func (in *PyTorchJobSpec) DeepCopy() *PyTorchJobSpec { - if in == nil { - return nil - } - out := new(PyTorchJobSpec) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *RDZVConf) DeepCopyInto(out *RDZVConf) { - *out = *in - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RDZVConf. -func (in *RDZVConf) DeepCopy() *RDZVConf { - if in == nil { - return nil - } - out := new(RDZVConf) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *ReplicaSpec) DeepCopyInto(out *ReplicaSpec) { - *out = *in - if in.Replicas != nil { - in, out := &in.Replicas, &out.Replicas - *out = new(int32) - **out = **in - } - in.Template.DeepCopyInto(&out.Template) - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ReplicaSpec. -func (in *ReplicaSpec) DeepCopy() *ReplicaSpec { - if in == nil { - return nil - } - out := new(ReplicaSpec) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *ReplicaStatus) DeepCopyInto(out *ReplicaStatus) { - *out = *in - if in.LabelSelector != nil { - in, out := &in.LabelSelector, &out.LabelSelector - *out = new(metav1.LabelSelector) - (*in).DeepCopyInto(*out) - } - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ReplicaStatus. -func (in *ReplicaStatus) DeepCopy() *ReplicaStatus { - if in == nil { - return nil - } - out := new(ReplicaStatus) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *RunPolicy) DeepCopyInto(out *RunPolicy) { - *out = *in - if in.CleanPodPolicy != nil { - in, out := &in.CleanPodPolicy, &out.CleanPodPolicy - *out = new(CleanPodPolicy) - **out = **in - } - if in.TTLSecondsAfterFinished != nil { - in, out := &in.TTLSecondsAfterFinished, &out.TTLSecondsAfterFinished - *out = new(int32) - **out = **in - } - if in.ActiveDeadlineSeconds != nil { - in, out := &in.ActiveDeadlineSeconds, &out.ActiveDeadlineSeconds - *out = new(int64) - **out = **in - } - if in.BackoffLimit != nil { - in, out := &in.BackoffLimit, &out.BackoffLimit - *out = new(int32) - **out = **in - } - if in.SchedulingPolicy != nil { - in, out := &in.SchedulingPolicy, &out.SchedulingPolicy - *out = new(SchedulingPolicy) - (*in).DeepCopyInto(*out) - } - if in.Suspend != nil { - in, out := &in.Suspend, &out.Suspend - *out = new(bool) - **out = **in - } - if in.ManagedBy != nil { - in, out := &in.ManagedBy, &out.ManagedBy - *out = new(string) - **out = **in - } - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RunPolicy. -func (in *RunPolicy) DeepCopy() *RunPolicy { - if in == nil { - return nil - } - out := new(RunPolicy) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *SchedulingPolicy) DeepCopyInto(out *SchedulingPolicy) { - *out = *in - if in.MinAvailable != nil { - in, out := &in.MinAvailable, &out.MinAvailable - *out = new(int32) - **out = **in - } - if in.MinResources != nil { - in, out := &in.MinResources, &out.MinResources - *out = new(map[corev1.ResourceName]resource.Quantity) - if **in != nil { - in, out := *in, *out - *out = make(map[corev1.ResourceName]resource.Quantity, len(*in)) - for key, val := range *in { - (*out)[key] = val.DeepCopy() - } - } - } - if in.ScheduleTimeoutSeconds != nil { - in, out := &in.ScheduleTimeoutSeconds, &out.ScheduleTimeoutSeconds - *out = new(int32) - **out = **in - } - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SchedulingPolicy. -func (in *SchedulingPolicy) DeepCopy() *SchedulingPolicy { - if in == nil { - return nil - } - out := new(SchedulingPolicy) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *TFJob) DeepCopyInto(out *TFJob) { - *out = *in - out.TypeMeta = in.TypeMeta - in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) - in.Spec.DeepCopyInto(&out.Spec) - in.Status.DeepCopyInto(&out.Status) - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TFJob. -func (in *TFJob) DeepCopy() *TFJob { - if in == nil { - return nil - } - out := new(TFJob) - in.DeepCopyInto(out) - return out -} - -// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. -func (in *TFJob) DeepCopyObject() runtime.Object { - if c := in.DeepCopy(); c != nil { - return c - } - return nil -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *TFJobList) DeepCopyInto(out *TFJobList) { - *out = *in - out.TypeMeta = in.TypeMeta - in.ListMeta.DeepCopyInto(&out.ListMeta) - if in.Items != nil { - in, out := &in.Items, &out.Items - *out = make([]TFJob, len(*in)) - for i := range *in { - (*in)[i].DeepCopyInto(&(*out)[i]) - } - } - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TFJobList. -func (in *TFJobList) DeepCopy() *TFJobList { - if in == nil { - return nil - } - out := new(TFJobList) - in.DeepCopyInto(out) - return out -} - -// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. -func (in *TFJobList) DeepCopyObject() runtime.Object { - if c := in.DeepCopy(); c != nil { - return c - } - return nil -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *TFJobSpec) DeepCopyInto(out *TFJobSpec) { - *out = *in - in.RunPolicy.DeepCopyInto(&out.RunPolicy) - if in.SuccessPolicy != nil { - in, out := &in.SuccessPolicy, &out.SuccessPolicy - *out = new(SuccessPolicy) - **out = **in - } - if in.TFReplicaSpecs != nil { - in, out := &in.TFReplicaSpecs, &out.TFReplicaSpecs - *out = make(map[ReplicaType]*ReplicaSpec, len(*in)) - for key, val := range *in { - var outVal *ReplicaSpec - if val == nil { - (*out)[key] = nil - } else { - in, out := &val, &outVal - *out = new(ReplicaSpec) - (*in).DeepCopyInto(*out) - } - (*out)[key] = outVal - } - } - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TFJobSpec. -func (in *TFJobSpec) DeepCopy() *TFJobSpec { - if in == nil { - return nil - } - out := new(TFJobSpec) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *XGBoostJob) DeepCopyInto(out *XGBoostJob) { - *out = *in - out.TypeMeta = in.TypeMeta - in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) - in.Spec.DeepCopyInto(&out.Spec) - in.Status.DeepCopyInto(&out.Status) - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new XGBoostJob. -func (in *XGBoostJob) DeepCopy() *XGBoostJob { - if in == nil { - return nil - } - out := new(XGBoostJob) - in.DeepCopyInto(out) - return out -} - -// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. -func (in *XGBoostJob) DeepCopyObject() runtime.Object { - if c := in.DeepCopy(); c != nil { - return c - } - return nil -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *XGBoostJobList) DeepCopyInto(out *XGBoostJobList) { - *out = *in - out.TypeMeta = in.TypeMeta - in.ListMeta.DeepCopyInto(&out.ListMeta) - if in.Items != nil { - in, out := &in.Items, &out.Items - *out = make([]XGBoostJob, len(*in)) - for i := range *in { - (*in)[i].DeepCopyInto(&(*out)[i]) - } - } - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new XGBoostJobList. -func (in *XGBoostJobList) DeepCopy() *XGBoostJobList { - if in == nil { - return nil - } - out := new(XGBoostJobList) - in.DeepCopyInto(out) - return out -} - -// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. -func (in *XGBoostJobList) DeepCopyObject() runtime.Object { - if c := in.DeepCopy(); c != nil { - return c - } - return nil -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *XGBoostJobSpec) DeepCopyInto(out *XGBoostJobSpec) { - *out = *in - in.RunPolicy.DeepCopyInto(&out.RunPolicy) - if in.XGBReplicaSpecs != nil { - in, out := &in.XGBReplicaSpecs, &out.XGBReplicaSpecs - *out = make(map[ReplicaType]*ReplicaSpec, len(*in)) - for key, val := range *in { - var outVal *ReplicaSpec - if val == nil { - (*out)[key] = nil - } else { - in, out := &val, &outVal - *out = new(ReplicaSpec) - (*in).DeepCopyInto(*out) - } - (*out)[key] = outVal - } - } - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new XGBoostJobSpec. -func (in *XGBoostJobSpec) DeepCopy() *XGBoostJobSpec { - if in == nil { - return nil - } - out := new(XGBoostJobSpec) - in.DeepCopyInto(out) - return out -} diff --git a/pkg/apis/kubeflow.org/v1/zz_generated.defaults.go b/pkg/apis/kubeflow.org/v1/zz_generated.defaults.go deleted file mode 100644 index 0f0c399ad8..0000000000 --- a/pkg/apis/kubeflow.org/v1/zz_generated.defaults.go +++ /dev/null @@ -1,109 +0,0 @@ -//go:build !ignore_autogenerated -// +build !ignore_autogenerated - -// Copyright 2024 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by defaulter-gen. DO NOT EDIT. - -package v1 - -import ( - runtime "k8s.io/apimachinery/pkg/runtime" -) - -// RegisterDefaults adds defaulters functions to the given scheme. -// Public to allow building arbitrary schemes. -// All generated defaulters are covering - they call all nested defaulters. -func RegisterDefaults(scheme *runtime.Scheme) error { - scheme.AddTypeDefaultingFunc(&JAXJob{}, func(obj interface{}) { SetObjectDefaults_JAXJob(obj.(*JAXJob)) }) - scheme.AddTypeDefaultingFunc(&JAXJobList{}, func(obj interface{}) { SetObjectDefaults_JAXJobList(obj.(*JAXJobList)) }) - scheme.AddTypeDefaultingFunc(&MPIJob{}, func(obj interface{}) { SetObjectDefaults_MPIJob(obj.(*MPIJob)) }) - scheme.AddTypeDefaultingFunc(&MPIJobList{}, func(obj interface{}) { SetObjectDefaults_MPIJobList(obj.(*MPIJobList)) }) - scheme.AddTypeDefaultingFunc(&PaddleJob{}, func(obj interface{}) { SetObjectDefaults_PaddleJob(obj.(*PaddleJob)) }) - scheme.AddTypeDefaultingFunc(&PaddleJobList{}, func(obj interface{}) { SetObjectDefaults_PaddleJobList(obj.(*PaddleJobList)) }) - scheme.AddTypeDefaultingFunc(&PyTorchJob{}, func(obj interface{}) { SetObjectDefaults_PyTorchJob(obj.(*PyTorchJob)) }) - scheme.AddTypeDefaultingFunc(&PyTorchJobList{}, func(obj interface{}) { SetObjectDefaults_PyTorchJobList(obj.(*PyTorchJobList)) }) - scheme.AddTypeDefaultingFunc(&TFJob{}, func(obj interface{}) { SetObjectDefaults_TFJob(obj.(*TFJob)) }) - scheme.AddTypeDefaultingFunc(&TFJobList{}, func(obj interface{}) { SetObjectDefaults_TFJobList(obj.(*TFJobList)) }) - scheme.AddTypeDefaultingFunc(&XGBoostJob{}, func(obj interface{}) { SetObjectDefaults_XGBoostJob(obj.(*XGBoostJob)) }) - scheme.AddTypeDefaultingFunc(&XGBoostJobList{}, func(obj interface{}) { SetObjectDefaults_XGBoostJobList(obj.(*XGBoostJobList)) }) - return nil -} - -func SetObjectDefaults_JAXJob(in *JAXJob) { - SetDefaults_JAXJob(in) -} - -func SetObjectDefaults_JAXJobList(in *JAXJobList) { - for i := range in.Items { - a := &in.Items[i] - SetObjectDefaults_JAXJob(a) - } -} - -func SetObjectDefaults_MPIJob(in *MPIJob) { - SetDefaults_MPIJob(in) -} - -func SetObjectDefaults_MPIJobList(in *MPIJobList) { - for i := range in.Items { - a := &in.Items[i] - SetObjectDefaults_MPIJob(a) - } -} - -func SetObjectDefaults_PaddleJob(in *PaddleJob) { - SetDefaults_PaddleJob(in) -} - -func SetObjectDefaults_PaddleJobList(in *PaddleJobList) { - for i := range in.Items { - a := &in.Items[i] - SetObjectDefaults_PaddleJob(a) - } -} - -func SetObjectDefaults_PyTorchJob(in *PyTorchJob) { - SetDefaults_PyTorchJob(in) -} - -func SetObjectDefaults_PyTorchJobList(in *PyTorchJobList) { - for i := range in.Items { - a := &in.Items[i] - SetObjectDefaults_PyTorchJob(a) - } -} - -func SetObjectDefaults_TFJob(in *TFJob) { - SetDefaults_TFJob(in) -} - -func SetObjectDefaults_TFJobList(in *TFJobList) { - for i := range in.Items { - a := &in.Items[i] - SetObjectDefaults_TFJob(a) - } -} - -func SetObjectDefaults_XGBoostJob(in *XGBoostJob) { - SetDefaults_XGBoostJob(in) -} - -func SetObjectDefaults_XGBoostJobList(in *XGBoostJobList) { - for i := range in.Items { - a := &in.Items[i] - SetObjectDefaults_XGBoostJob(a) - } -} diff --git a/pkg/apis/kubeflow.org/v1/zz_generated.openapi.go b/pkg/apis/kubeflow.org/v1/zz_generated.openapi.go deleted file mode 100644 index 7e3cac6dd1..0000000000 --- a/pkg/apis/kubeflow.org/v1/zz_generated.openapi.go +++ /dev/null @@ -1,1412 +0,0 @@ -//go:build !ignore_autogenerated -// +build !ignore_autogenerated - -// Copyright 2024 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by openapi-gen. DO NOT EDIT. - -package v1 - -import ( - common "k8s.io/kube-openapi/pkg/common" - spec "k8s.io/kube-openapi/pkg/validation/spec" -) - -func GetOpenAPIDefinitions(ref common.ReferenceCallback) map[string]common.OpenAPIDefinition { - return map[string]common.OpenAPIDefinition{ - "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.ElasticPolicy": schema_pkg_apis_kubefloworg_v1_ElasticPolicy(ref), - "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.JAXJob": schema_pkg_apis_kubefloworg_v1_JAXJob(ref), - "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.JAXJobList": schema_pkg_apis_kubefloworg_v1_JAXJobList(ref), - "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.JAXJobSpec": schema_pkg_apis_kubefloworg_v1_JAXJobSpec(ref), - "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.JobCondition": schema_pkg_apis_kubefloworg_v1_JobCondition(ref), - "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.JobStatus": schema_pkg_apis_kubefloworg_v1_JobStatus(ref), - "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.MPIJob": schema_pkg_apis_kubefloworg_v1_MPIJob(ref), - "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.MPIJobList": schema_pkg_apis_kubefloworg_v1_MPIJobList(ref), - "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.MPIJobSpec": schema_pkg_apis_kubefloworg_v1_MPIJobSpec(ref), - "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.PaddleElasticPolicy": schema_pkg_apis_kubefloworg_v1_PaddleElasticPolicy(ref), - "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.PaddleJob": schema_pkg_apis_kubefloworg_v1_PaddleJob(ref), - "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.PaddleJobList": schema_pkg_apis_kubefloworg_v1_PaddleJobList(ref), - "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.PaddleJobSpec": schema_pkg_apis_kubefloworg_v1_PaddleJobSpec(ref), - "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.PyTorchJob": schema_pkg_apis_kubefloworg_v1_PyTorchJob(ref), - "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.PyTorchJobList": schema_pkg_apis_kubefloworg_v1_PyTorchJobList(ref), - "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.PyTorchJobSpec": schema_pkg_apis_kubefloworg_v1_PyTorchJobSpec(ref), - "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.RDZVConf": schema_pkg_apis_kubefloworg_v1_RDZVConf(ref), - "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.ReplicaSpec": schema_pkg_apis_kubefloworg_v1_ReplicaSpec(ref), - "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.ReplicaStatus": schema_pkg_apis_kubefloworg_v1_ReplicaStatus(ref), - "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.RunPolicy": schema_pkg_apis_kubefloworg_v1_RunPolicy(ref), - "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.SchedulingPolicy": schema_pkg_apis_kubefloworg_v1_SchedulingPolicy(ref), - "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.TFJob": schema_pkg_apis_kubefloworg_v1_TFJob(ref), - "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.TFJobList": schema_pkg_apis_kubefloworg_v1_TFJobList(ref), - "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.TFJobSpec": schema_pkg_apis_kubefloworg_v1_TFJobSpec(ref), - "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.XGBoostJob": schema_pkg_apis_kubefloworg_v1_XGBoostJob(ref), - "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.XGBoostJobList": schema_pkg_apis_kubefloworg_v1_XGBoostJobList(ref), - "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.XGBoostJobSpec": schema_pkg_apis_kubefloworg_v1_XGBoostJobSpec(ref), - } -} - -func schema_pkg_apis_kubefloworg_v1_ElasticPolicy(ref common.ReferenceCallback) common.OpenAPIDefinition { - return common.OpenAPIDefinition{ - Schema: spec.Schema{ - SchemaProps: spec.SchemaProps{ - Type: []string{"object"}, - Properties: map[string]spec.Schema{ - "minReplicas": { - SchemaProps: spec.SchemaProps{ - Description: "minReplicas is the lower limit for the number of replicas to which the training job can scale down. It defaults to null.", - Type: []string{"integer"}, - Format: "int32", - }, - }, - "maxReplicas": { - SchemaProps: spec.SchemaProps{ - Description: "upper limit for the number of pods that can be set by the autoscaler; cannot be smaller than MinReplicas, defaults to null.", - Type: []string{"integer"}, - Format: "int32", - }, - }, - "rdzvBackend": { - SchemaProps: spec.SchemaProps{ - Type: []string{"string"}, - Format: "", - }, - }, - "rdzvPort": { - SchemaProps: spec.SchemaProps{ - Type: []string{"integer"}, - Format: "int32", - }, - }, - "rdzvHost": { - SchemaProps: spec.SchemaProps{ - Type: []string{"string"}, - Format: "", - }, - }, - "rdzvId": { - SchemaProps: spec.SchemaProps{ - Type: []string{"string"}, - Format: "", - }, - }, - "rdzvConf": { - SchemaProps: spec.SchemaProps{ - Description: "RDZVConf contains additional rendezvous configuration (=,=,...).", - Type: []string{"array"}, - Items: &spec.SchemaOrArray{ - Schema: &spec.Schema{ - SchemaProps: spec.SchemaProps{ - Default: map[string]interface{}{}, - Ref: ref("github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.RDZVConf"), - }, - }, - }, - }, - }, - "standalone": { - SchemaProps: spec.SchemaProps{ - Description: "Start a local standalone rendezvous backend that is represented by a C10d TCP store on port 29400. Useful when launching single-node, multi-worker job. If specified --rdzv_backend, --rdzv_endpoint, --rdzv_id are auto-assigned; any explicitly set values are ignored.", - Type: []string{"boolean"}, - Format: "", - }, - }, - "nProcPerNode": { - SchemaProps: spec.SchemaProps{ - Description: "Number of workers per node; supported values: [auto, cpu, gpu, int]. Deprecated: This API is deprecated in v1.7+ Use .spec.nprocPerNode instead.", - Type: []string{"integer"}, - Format: "int32", - }, - }, - "maxRestarts": { - SchemaProps: spec.SchemaProps{ - Type: []string{"integer"}, - Format: "int32", - }, - }, - "metrics": { - SchemaProps: spec.SchemaProps{ - Description: "Metrics contains the specifications which are used to calculate the desired replica count (the maximum replica count across all metrics will be used). The desired replica count is calculated with multiplying the ratio between the target value and the current value by the current number of pods. Ergo, metrics used must decrease as the pod count is increased, and vice-versa. See the individual metric source types for more information about how each type of metric must respond. If not set, the HPA will not be created.", - Type: []string{"array"}, - Items: &spec.SchemaOrArray{ - Schema: &spec.Schema{ - SchemaProps: spec.SchemaProps{ - Default: map[string]interface{}{}, - Ref: ref("k8s.io/api/autoscaling/v2.MetricSpec"), - }, - }, - }, - }, - }, - }, - }, - }, - Dependencies: []string{ - "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.RDZVConf", "k8s.io/api/autoscaling/v2.MetricSpec"}, - } -} - -func schema_pkg_apis_kubefloworg_v1_JAXJob(ref common.ReferenceCallback) common.OpenAPIDefinition { - return common.OpenAPIDefinition{ - Schema: spec.Schema{ - SchemaProps: spec.SchemaProps{ - Description: "JAXJob Represents a JAXJob resource.", - Type: []string{"object"}, - Properties: map[string]spec.Schema{ - "kind": { - SchemaProps: spec.SchemaProps{ - Description: "Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds", - Type: []string{"string"}, - Format: "", - }, - }, - "apiVersion": { - SchemaProps: spec.SchemaProps{ - Description: "APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources", - Type: []string{"string"}, - Format: "", - }, - }, - "metadata": { - SchemaProps: spec.SchemaProps{ - Default: map[string]interface{}{}, - Ref: ref("k8s.io/apimachinery/pkg/apis/meta/v1.ObjectMeta"), - }, - }, - "spec": { - SchemaProps: spec.SchemaProps{ - Description: "Specification of the desired state of the JAXJob.", - Default: map[string]interface{}{}, - Ref: ref("github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.JAXJobSpec"), - }, - }, - "status": { - SchemaProps: spec.SchemaProps{ - Description: "Most recently observed status of the JAXJob. Read-only (modified by the system).", - Default: map[string]interface{}{}, - Ref: ref("github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.JobStatus"), - }, - }, - }, - }, - }, - Dependencies: []string{ - "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.JAXJobSpec", "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.JobStatus", "k8s.io/apimachinery/pkg/apis/meta/v1.ObjectMeta"}, - } -} - -func schema_pkg_apis_kubefloworg_v1_JAXJobList(ref common.ReferenceCallback) common.OpenAPIDefinition { - return common.OpenAPIDefinition{ - Schema: spec.Schema{ - SchemaProps: spec.SchemaProps{ - Description: "JAXJobList is a list of JAXJobs.", - Type: []string{"object"}, - Properties: map[string]spec.Schema{ - "kind": { - SchemaProps: spec.SchemaProps{ - Description: "Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds", - Type: []string{"string"}, - Format: "", - }, - }, - "apiVersion": { - SchemaProps: spec.SchemaProps{ - Description: "APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources", - Type: []string{"string"}, - Format: "", - }, - }, - "metadata": { - SchemaProps: spec.SchemaProps{ - Description: "Standard list metadata.", - Default: map[string]interface{}{}, - Ref: ref("k8s.io/apimachinery/pkg/apis/meta/v1.ListMeta"), - }, - }, - "items": { - SchemaProps: spec.SchemaProps{ - Description: "List of JAXJobs.", - Type: []string{"array"}, - Items: &spec.SchemaOrArray{ - Schema: &spec.Schema{ - SchemaProps: spec.SchemaProps{ - Default: map[string]interface{}{}, - Ref: ref("github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.JAXJob"), - }, - }, - }, - }, - }, - }, - Required: []string{"items"}, - }, - }, - Dependencies: []string{ - "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.JAXJob", "k8s.io/apimachinery/pkg/apis/meta/v1.ListMeta"}, - } -} - -func schema_pkg_apis_kubefloworg_v1_JAXJobSpec(ref common.ReferenceCallback) common.OpenAPIDefinition { - return common.OpenAPIDefinition{ - Schema: spec.Schema{ - SchemaProps: spec.SchemaProps{ - Description: "JAXJobSpec is a desired state description of the JAXJob.", - Type: []string{"object"}, - Properties: map[string]spec.Schema{ - "runPolicy": { - SchemaProps: spec.SchemaProps{ - Description: "RunPolicy encapsulates various runtime policies of the distributed training job, for example how to clean up resources and how long the job can stay active.", - Default: map[string]interface{}{}, - Ref: ref("github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.RunPolicy"), - }, - }, - "jaxReplicaSpecs": { - SchemaProps: spec.SchemaProps{ - Description: "A map of JAXReplicaType (type) to ReplicaSpec (value). Specifies the JAX cluster configuration. For example,\n {\n \"Worker\": JAXReplicaSpec,\n }", - Type: []string{"object"}, - AdditionalProperties: &spec.SchemaOrBool{ - Allows: true, - Schema: &spec.Schema{ - SchemaProps: spec.SchemaProps{ - Ref: ref("github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.ReplicaSpec"), - }, - }, - }, - }, - }, - }, - Required: []string{"runPolicy", "jaxReplicaSpecs"}, - }, - }, - Dependencies: []string{ - "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.ReplicaSpec", "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.RunPolicy"}, - } -} - -func schema_pkg_apis_kubefloworg_v1_JobCondition(ref common.ReferenceCallback) common.OpenAPIDefinition { - return common.OpenAPIDefinition{ - Schema: spec.Schema{ - SchemaProps: spec.SchemaProps{ - Description: "JobCondition describes the state of the job at a certain point.", - Type: []string{"object"}, - Properties: map[string]spec.Schema{ - "type": { - SchemaProps: spec.SchemaProps{ - Description: "Type of job condition.", - Default: "", - Type: []string{"string"}, - Format: "", - }, - }, - "status": { - SchemaProps: spec.SchemaProps{ - Description: "Status of the condition, one of True, False, Unknown.", - Default: "", - Type: []string{"string"}, - Format: "", - }, - }, - "reason": { - SchemaProps: spec.SchemaProps{ - Description: "The reason for the condition's last transition.", - Type: []string{"string"}, - Format: "", - }, - }, - "message": { - SchemaProps: spec.SchemaProps{ - Description: "A human readable message indicating details about the transition.", - Type: []string{"string"}, - Format: "", - }, - }, - "lastUpdateTime": { - SchemaProps: spec.SchemaProps{ - Description: "The last time this condition was updated.", - Ref: ref("k8s.io/apimachinery/pkg/apis/meta/v1.Time"), - }, - }, - "lastTransitionTime": { - SchemaProps: spec.SchemaProps{ - Description: "Last time the condition transitioned from one status to another.", - Ref: ref("k8s.io/apimachinery/pkg/apis/meta/v1.Time"), - }, - }, - }, - Required: []string{"type", "status"}, - }, - }, - Dependencies: []string{ - "k8s.io/apimachinery/pkg/apis/meta/v1.Time"}, - } -} - -func schema_pkg_apis_kubefloworg_v1_JobStatus(ref common.ReferenceCallback) common.OpenAPIDefinition { - return common.OpenAPIDefinition{ - Schema: spec.Schema{ - SchemaProps: spec.SchemaProps{ - Description: "JobStatus represents the current observed state of the training Job.", - Type: []string{"object"}, - Properties: map[string]spec.Schema{ - "conditions": { - SchemaProps: spec.SchemaProps{ - Description: "Conditions is an array of current observed job conditions.", - Type: []string{"array"}, - Items: &spec.SchemaOrArray{ - Schema: &spec.Schema{ - SchemaProps: spec.SchemaProps{ - Default: map[string]interface{}{}, - Ref: ref("github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.JobCondition"), - }, - }, - }, - }, - }, - "replicaStatuses": { - SchemaProps: spec.SchemaProps{ - Description: "ReplicaStatuses is map of ReplicaType and ReplicaStatus, specifies the status of each replica.", - Type: []string{"object"}, - AdditionalProperties: &spec.SchemaOrBool{ - Allows: true, - Schema: &spec.Schema{ - SchemaProps: spec.SchemaProps{ - Ref: ref("github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.ReplicaStatus"), - }, - }, - }, - }, - }, - "startTime": { - SchemaProps: spec.SchemaProps{ - Description: "Represents time when the job was acknowledged by the job controller. It is not guaranteed to be set in happens-before order across separate operations. It is represented in RFC3339 form and is in UTC.", - Ref: ref("k8s.io/apimachinery/pkg/apis/meta/v1.Time"), - }, - }, - "completionTime": { - SchemaProps: spec.SchemaProps{ - Description: "Represents time when the job was completed. It is not guaranteed to be set in happens-before order across separate operations. It is represented in RFC3339 form and is in UTC.", - Ref: ref("k8s.io/apimachinery/pkg/apis/meta/v1.Time"), - }, - }, - "lastReconcileTime": { - SchemaProps: spec.SchemaProps{ - Description: "Represents last time when the job was reconciled. It is not guaranteed to be set in happens-before order across separate operations. It is represented in RFC3339 form and is in UTC.", - Ref: ref("k8s.io/apimachinery/pkg/apis/meta/v1.Time"), - }, - }, - }, - }, - }, - Dependencies: []string{ - "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.JobCondition", "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.ReplicaStatus", "k8s.io/apimachinery/pkg/apis/meta/v1.Time"}, - } -} - -func schema_pkg_apis_kubefloworg_v1_MPIJob(ref common.ReferenceCallback) common.OpenAPIDefinition { - return common.OpenAPIDefinition{ - Schema: spec.Schema{ - SchemaProps: spec.SchemaProps{ - Type: []string{"object"}, - Properties: map[string]spec.Schema{ - "kind": { - SchemaProps: spec.SchemaProps{ - Description: "Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds", - Type: []string{"string"}, - Format: "", - }, - }, - "apiVersion": { - SchemaProps: spec.SchemaProps{ - Description: "APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources", - Type: []string{"string"}, - Format: "", - }, - }, - "metadata": { - SchemaProps: spec.SchemaProps{ - Default: map[string]interface{}{}, - Ref: ref("k8s.io/apimachinery/pkg/apis/meta/v1.ObjectMeta"), - }, - }, - "spec": { - SchemaProps: spec.SchemaProps{ - Default: map[string]interface{}{}, - Ref: ref("github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.MPIJobSpec"), - }, - }, - "status": { - SchemaProps: spec.SchemaProps{ - Default: map[string]interface{}{}, - Ref: ref("github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.JobStatus"), - }, - }, - }, - }, - }, - Dependencies: []string{ - "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.JobStatus", "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.MPIJobSpec", "k8s.io/apimachinery/pkg/apis/meta/v1.ObjectMeta"}, - } -} - -func schema_pkg_apis_kubefloworg_v1_MPIJobList(ref common.ReferenceCallback) common.OpenAPIDefinition { - return common.OpenAPIDefinition{ - Schema: spec.Schema{ - SchemaProps: spec.SchemaProps{ - Type: []string{"object"}, - Properties: map[string]spec.Schema{ - "kind": { - SchemaProps: spec.SchemaProps{ - Description: "Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds", - Type: []string{"string"}, - Format: "", - }, - }, - "apiVersion": { - SchemaProps: spec.SchemaProps{ - Description: "APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources", - Type: []string{"string"}, - Format: "", - }, - }, - "metadata": { - SchemaProps: spec.SchemaProps{ - Default: map[string]interface{}{}, - Ref: ref("k8s.io/apimachinery/pkg/apis/meta/v1.ListMeta"), - }, - }, - "items": { - SchemaProps: spec.SchemaProps{ - Type: []string{"array"}, - Items: &spec.SchemaOrArray{ - Schema: &spec.Schema{ - SchemaProps: spec.SchemaProps{ - Default: map[string]interface{}{}, - Ref: ref("github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.MPIJob"), - }, - }, - }, - }, - }, - }, - Required: []string{"items"}, - }, - }, - Dependencies: []string{ - "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.MPIJob", "k8s.io/apimachinery/pkg/apis/meta/v1.ListMeta"}, - } -} - -func schema_pkg_apis_kubefloworg_v1_MPIJobSpec(ref common.ReferenceCallback) common.OpenAPIDefinition { - return common.OpenAPIDefinition{ - Schema: spec.Schema{ - SchemaProps: spec.SchemaProps{ - Type: []string{"object"}, - Properties: map[string]spec.Schema{ - "slotsPerWorker": { - SchemaProps: spec.SchemaProps{ - Description: "Specifies the number of slots per worker used in hostfile. Defaults to 1.", - Type: []string{"integer"}, - Format: "int32", - }, - }, - "cleanPodPolicy": { - SchemaProps: spec.SchemaProps{ - Description: "CleanPodPolicy defines the policy that whether to kill pods after the job completes. Defaults to None.", - Type: []string{"string"}, - Format: "", - }, - }, - "mpiReplicaSpecs": { - SchemaProps: spec.SchemaProps{ - Description: "`MPIReplicaSpecs` contains maps from `MPIReplicaType` to `ReplicaSpec` that specify the MPI replicas to run.", - Type: []string{"object"}, - AdditionalProperties: &spec.SchemaOrBool{ - Allows: true, - Schema: &spec.Schema{ - SchemaProps: spec.SchemaProps{ - Ref: ref("github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.ReplicaSpec"), - }, - }, - }, - }, - }, - "mainContainer": { - SchemaProps: spec.SchemaProps{ - Description: "MainContainer specifies name of the main container which executes the MPI code.", - Type: []string{"string"}, - Format: "", - }, - }, - "runPolicy": { - SchemaProps: spec.SchemaProps{ - Description: "`RunPolicy` encapsulates various runtime policies of the distributed training job, for example how to clean up resources and how long the job can stay active.", - Default: map[string]interface{}{}, - Ref: ref("github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.RunPolicy"), - }, - }, - }, - Required: []string{"mpiReplicaSpecs"}, - }, - }, - Dependencies: []string{ - "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.ReplicaSpec", "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.RunPolicy"}, - } -} - -func schema_pkg_apis_kubefloworg_v1_PaddleElasticPolicy(ref common.ReferenceCallback) common.OpenAPIDefinition { - return common.OpenAPIDefinition{ - Schema: spec.Schema{ - SchemaProps: spec.SchemaProps{ - Type: []string{"object"}, - Properties: map[string]spec.Schema{ - "minReplicas": { - SchemaProps: spec.SchemaProps{ - Description: "minReplicas is the lower limit for the number of replicas to which the training job can scale down. It defaults to null.", - Type: []string{"integer"}, - Format: "int32", - }, - }, - "maxReplicas": { - SchemaProps: spec.SchemaProps{ - Description: "upper limit for the number of pods that can be set by the autoscaler; cannot be smaller than MinReplicas, defaults to null.", - Type: []string{"integer"}, - Format: "int32", - }, - }, - "maxRestarts": { - SchemaProps: spec.SchemaProps{ - Description: "MaxRestarts is the limit for restart times of pods in elastic mode.", - Type: []string{"integer"}, - Format: "int32", - }, - }, - "metrics": { - SchemaProps: spec.SchemaProps{ - Description: "Metrics contains the specifications which are used to calculate the desired replica count (the maximum replica count across all metrics will be used). The desired replica count is calculated with multiplying the ratio between the target value and the current value by the current number of pods. Ergo, metrics used must decrease as the pod count is increased, and vice-versa. See the individual metric source types for more information about how each type of metric must respond. If not set, the HPA will not be created.", - Type: []string{"array"}, - Items: &spec.SchemaOrArray{ - Schema: &spec.Schema{ - SchemaProps: spec.SchemaProps{ - Default: map[string]interface{}{}, - Ref: ref("k8s.io/api/autoscaling/v2.MetricSpec"), - }, - }, - }, - }, - }, - }, - }, - }, - Dependencies: []string{ - "k8s.io/api/autoscaling/v2.MetricSpec"}, - } -} - -func schema_pkg_apis_kubefloworg_v1_PaddleJob(ref common.ReferenceCallback) common.OpenAPIDefinition { - return common.OpenAPIDefinition{ - Schema: spec.Schema{ - SchemaProps: spec.SchemaProps{ - Description: "PaddleJob Represents a PaddleJob resource.", - Type: []string{"object"}, - Properties: map[string]spec.Schema{ - "kind": { - SchemaProps: spec.SchemaProps{ - Description: "Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds", - Type: []string{"string"}, - Format: "", - }, - }, - "apiVersion": { - SchemaProps: spec.SchemaProps{ - Description: "APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources", - Type: []string{"string"}, - Format: "", - }, - }, - "metadata": { - SchemaProps: spec.SchemaProps{ - Default: map[string]interface{}{}, - Ref: ref("k8s.io/apimachinery/pkg/apis/meta/v1.ObjectMeta"), - }, - }, - "spec": { - SchemaProps: spec.SchemaProps{ - Description: "Specification of the desired state of the PaddleJob.", - Default: map[string]interface{}{}, - Ref: ref("github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.PaddleJobSpec"), - }, - }, - "status": { - SchemaProps: spec.SchemaProps{ - Description: "Most recently observed status of the PaddleJob. Read-only (modified by the system).", - Default: map[string]interface{}{}, - Ref: ref("github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.JobStatus"), - }, - }, - }, - }, - }, - Dependencies: []string{ - "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.JobStatus", "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.PaddleJobSpec", "k8s.io/apimachinery/pkg/apis/meta/v1.ObjectMeta"}, - } -} - -func schema_pkg_apis_kubefloworg_v1_PaddleJobList(ref common.ReferenceCallback) common.OpenAPIDefinition { - return common.OpenAPIDefinition{ - Schema: spec.Schema{ - SchemaProps: spec.SchemaProps{ - Description: "PaddleJobList is a list of PaddleJobs.", - Type: []string{"object"}, - Properties: map[string]spec.Schema{ - "kind": { - SchemaProps: spec.SchemaProps{ - Description: "Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds", - Type: []string{"string"}, - Format: "", - }, - }, - "apiVersion": { - SchemaProps: spec.SchemaProps{ - Description: "APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources", - Type: []string{"string"}, - Format: "", - }, - }, - "metadata": { - SchemaProps: spec.SchemaProps{ - Description: "Standard list metadata.", - Default: map[string]interface{}{}, - Ref: ref("k8s.io/apimachinery/pkg/apis/meta/v1.ListMeta"), - }, - }, - "items": { - SchemaProps: spec.SchemaProps{ - Description: "List of PaddleJobs.", - Type: []string{"array"}, - Items: &spec.SchemaOrArray{ - Schema: &spec.Schema{ - SchemaProps: spec.SchemaProps{ - Default: map[string]interface{}{}, - Ref: ref("github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.PaddleJob"), - }, - }, - }, - }, - }, - }, - Required: []string{"items"}, - }, - }, - Dependencies: []string{ - "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.PaddleJob", "k8s.io/apimachinery/pkg/apis/meta/v1.ListMeta"}, - } -} - -func schema_pkg_apis_kubefloworg_v1_PaddleJobSpec(ref common.ReferenceCallback) common.OpenAPIDefinition { - return common.OpenAPIDefinition{ - Schema: spec.Schema{ - SchemaProps: spec.SchemaProps{ - Description: "PaddleJobSpec is a desired state description of the PaddleJob.", - Type: []string{"object"}, - Properties: map[string]spec.Schema{ - "runPolicy": { - SchemaProps: spec.SchemaProps{ - Description: "RunPolicy encapsulates various runtime policies of the distributed training job, for example how to clean up resources and how long the job can stay active.", - Default: map[string]interface{}{}, - Ref: ref("github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.RunPolicy"), - }, - }, - "elasticPolicy": { - SchemaProps: spec.SchemaProps{ - Description: "ElasticPolicy holds the elastic policy for paddle job.", - Ref: ref("github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.PaddleElasticPolicy"), - }, - }, - "paddleReplicaSpecs": { - SchemaProps: spec.SchemaProps{ - Description: "A map of PaddleReplicaType (type) to ReplicaSpec (value). Specifies the Paddle cluster configuration. For example,\n {\n \"Master\": PaddleReplicaSpec,\n \"Worker\": PaddleReplicaSpec,\n }", - Type: []string{"object"}, - AdditionalProperties: &spec.SchemaOrBool{ - Allows: true, - Schema: &spec.Schema{ - SchemaProps: spec.SchemaProps{ - Ref: ref("github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.ReplicaSpec"), - }, - }, - }, - }, - }, - }, - Required: []string{"runPolicy", "paddleReplicaSpecs"}, - }, - }, - Dependencies: []string{ - "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.PaddleElasticPolicy", "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.ReplicaSpec", "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.RunPolicy"}, - } -} - -func schema_pkg_apis_kubefloworg_v1_PyTorchJob(ref common.ReferenceCallback) common.OpenAPIDefinition { - return common.OpenAPIDefinition{ - Schema: spec.Schema{ - SchemaProps: spec.SchemaProps{ - Description: "PyTorchJob Represents a PyTorchJob resource.", - Type: []string{"object"}, - Properties: map[string]spec.Schema{ - "kind": { - SchemaProps: spec.SchemaProps{ - Description: "Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds", - Type: []string{"string"}, - Format: "", - }, - }, - "apiVersion": { - SchemaProps: spec.SchemaProps{ - Description: "APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources", - Type: []string{"string"}, - Format: "", - }, - }, - "metadata": { - SchemaProps: spec.SchemaProps{ - Default: map[string]interface{}{}, - Ref: ref("k8s.io/apimachinery/pkg/apis/meta/v1.ObjectMeta"), - }, - }, - "spec": { - SchemaProps: spec.SchemaProps{ - Description: "Specification of the desired state of the PyTorchJob.", - Default: map[string]interface{}{}, - Ref: ref("github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.PyTorchJobSpec"), - }, - }, - "status": { - SchemaProps: spec.SchemaProps{ - Description: "Most recently observed status of the PyTorchJob. Read-only (modified by the system).", - Default: map[string]interface{}{}, - Ref: ref("github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.JobStatus"), - }, - }, - }, - }, - }, - Dependencies: []string{ - "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.JobStatus", "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.PyTorchJobSpec", "k8s.io/apimachinery/pkg/apis/meta/v1.ObjectMeta"}, - } -} - -func schema_pkg_apis_kubefloworg_v1_PyTorchJobList(ref common.ReferenceCallback) common.OpenAPIDefinition { - return common.OpenAPIDefinition{ - Schema: spec.Schema{ - SchemaProps: spec.SchemaProps{ - Description: "PyTorchJobList is a list of PyTorchJobs.", - Type: []string{"object"}, - Properties: map[string]spec.Schema{ - "kind": { - SchemaProps: spec.SchemaProps{ - Description: "Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds", - Type: []string{"string"}, - Format: "", - }, - }, - "apiVersion": { - SchemaProps: spec.SchemaProps{ - Description: "APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources", - Type: []string{"string"}, - Format: "", - }, - }, - "metadata": { - SchemaProps: spec.SchemaProps{ - Description: "Standard list metadata.", - Default: map[string]interface{}{}, - Ref: ref("k8s.io/apimachinery/pkg/apis/meta/v1.ListMeta"), - }, - }, - "items": { - SchemaProps: spec.SchemaProps{ - Description: "List of PyTorchJobs.", - Type: []string{"array"}, - Items: &spec.SchemaOrArray{ - Schema: &spec.Schema{ - SchemaProps: spec.SchemaProps{ - Default: map[string]interface{}{}, - Ref: ref("github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.PyTorchJob"), - }, - }, - }, - }, - }, - }, - Required: []string{"items"}, - }, - }, - Dependencies: []string{ - "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.PyTorchJob", "k8s.io/apimachinery/pkg/apis/meta/v1.ListMeta"}, - } -} - -func schema_pkg_apis_kubefloworg_v1_PyTorchJobSpec(ref common.ReferenceCallback) common.OpenAPIDefinition { - return common.OpenAPIDefinition{ - Schema: spec.Schema{ - SchemaProps: spec.SchemaProps{ - Description: "PyTorchJobSpec is a desired state description of the PyTorchJob.", - Type: []string{"object"}, - Properties: map[string]spec.Schema{ - "runPolicy": { - SchemaProps: spec.SchemaProps{ - Description: "RunPolicy encapsulates various runtime policies of the distributed training job, for example how to clean up resources and how long the job can stay active.", - Default: map[string]interface{}{}, - Ref: ref("github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.RunPolicy"), - }, - }, - "elasticPolicy": { - SchemaProps: spec.SchemaProps{ - Ref: ref("github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.ElasticPolicy"), - }, - }, - "pytorchReplicaSpecs": { - SchemaProps: spec.SchemaProps{ - Description: "A map of PyTorchReplicaType (type) to ReplicaSpec (value). Specifies the PyTorch cluster configuration. For example,\n {\n \"Master\": PyTorchReplicaSpec,\n \"Worker\": PyTorchReplicaSpec,\n }", - Type: []string{"object"}, - AdditionalProperties: &spec.SchemaOrBool{ - Allows: true, - Schema: &spec.Schema{ - SchemaProps: spec.SchemaProps{ - Ref: ref("github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.ReplicaSpec"), - }, - }, - }, - }, - }, - "nprocPerNode": { - SchemaProps: spec.SchemaProps{ - Description: "Number of workers per node; supported values: [auto, cpu, gpu, int]. For more, https://github.com/pytorch/pytorch/blob/26f7f470df64d90e092081e39507e4ac751f55d6/torch/distributed/run.py#L629-L658. Defaults to auto.", - Type: []string{"string"}, - Format: "", - }, - }, - }, - Required: []string{"runPolicy", "pytorchReplicaSpecs"}, - }, - }, - Dependencies: []string{ - "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.ElasticPolicy", "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.ReplicaSpec", "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.RunPolicy"}, - } -} - -func schema_pkg_apis_kubefloworg_v1_RDZVConf(ref common.ReferenceCallback) common.OpenAPIDefinition { - return common.OpenAPIDefinition{ - Schema: spec.Schema{ - SchemaProps: spec.SchemaProps{ - Type: []string{"object"}, - Properties: map[string]spec.Schema{ - "key": { - SchemaProps: spec.SchemaProps{ - Type: []string{"string"}, - Format: "", - }, - }, - "value": { - SchemaProps: spec.SchemaProps{ - Type: []string{"string"}, - Format: "", - }, - }, - }, - }, - }, - } -} - -func schema_pkg_apis_kubefloworg_v1_ReplicaSpec(ref common.ReferenceCallback) common.OpenAPIDefinition { - return common.OpenAPIDefinition{ - Schema: spec.Schema{ - SchemaProps: spec.SchemaProps{ - Description: "ReplicaSpec is a description of the replica", - Type: []string{"object"}, - Properties: map[string]spec.Schema{ - "replicas": { - SchemaProps: spec.SchemaProps{ - Description: "Replicas is the desired number of replicas of the given template. If unspecified, defaults to 1.", - Type: []string{"integer"}, - Format: "int32", - }, - }, - "template": { - SchemaProps: spec.SchemaProps{ - Description: "Template is the object that describes the pod that will be created for this replica. RestartPolicy in PodTemplateSpec will be overide by RestartPolicy in ReplicaSpec", - Default: map[string]interface{}{}, - Ref: ref("k8s.io/api/core/v1.PodTemplateSpec"), - }, - }, - "restartPolicy": { - SchemaProps: spec.SchemaProps{ - Description: "Restart policy for all replicas within the job. One of Always, OnFailure, Never and ExitCode. Default to Never.", - Type: []string{"string"}, - Format: "", - }, - }, - }, - }, - }, - Dependencies: []string{ - "k8s.io/api/core/v1.PodTemplateSpec"}, - } -} - -func schema_pkg_apis_kubefloworg_v1_ReplicaStatus(ref common.ReferenceCallback) common.OpenAPIDefinition { - return common.OpenAPIDefinition{ - Schema: spec.Schema{ - SchemaProps: spec.SchemaProps{ - Description: "ReplicaStatus represents the current observed state of the replica.", - Type: []string{"object"}, - Properties: map[string]spec.Schema{ - "active": { - SchemaProps: spec.SchemaProps{ - Description: "The number of actively running pods.", - Type: []string{"integer"}, - Format: "int32", - }, - }, - "succeeded": { - SchemaProps: spec.SchemaProps{ - Description: "The number of pods which reached phase Succeeded.", - Type: []string{"integer"}, - Format: "int32", - }, - }, - "failed": { - SchemaProps: spec.SchemaProps{ - Description: "The number of pods which reached phase Failed.", - Type: []string{"integer"}, - Format: "int32", - }, - }, - "labelSelector": { - SchemaProps: spec.SchemaProps{ - Description: "Deprecated: Use Selector instead", - Ref: ref("k8s.io/apimachinery/pkg/apis/meta/v1.LabelSelector"), - }, - }, - "selector": { - SchemaProps: spec.SchemaProps{ - Description: "A Selector is a label query over a set of resources. The result of matchLabels and matchExpressions are ANDed. An empty Selector matches all objects. A null Selector matches no objects.", - Type: []string{"string"}, - Format: "", - }, - }, - }, - }, - }, - Dependencies: []string{ - "k8s.io/apimachinery/pkg/apis/meta/v1.LabelSelector"}, - } -} - -func schema_pkg_apis_kubefloworg_v1_RunPolicy(ref common.ReferenceCallback) common.OpenAPIDefinition { - return common.OpenAPIDefinition{ - Schema: spec.Schema{ - SchemaProps: spec.SchemaProps{ - Description: "RunPolicy encapsulates various runtime policies of the distributed training job, for example how to clean up resources and how long the job can stay active.", - Type: []string{"object"}, - Properties: map[string]spec.Schema{ - "cleanPodPolicy": { - SchemaProps: spec.SchemaProps{ - Description: "CleanPodPolicy defines the policy to kill pods after the job completes. Default to None.", - Type: []string{"string"}, - Format: "", - }, - }, - "ttlSecondsAfterFinished": { - SchemaProps: spec.SchemaProps{ - Description: "TTLSecondsAfterFinished is the TTL to clean up jobs. It may take extra ReconcilePeriod seconds for the cleanup, since reconcile gets called periodically. Default to infinite.", - Type: []string{"integer"}, - Format: "int32", - }, - }, - "activeDeadlineSeconds": { - SchemaProps: spec.SchemaProps{ - Description: "Specifies the duration in seconds relative to the startTime that the job may be active before the system tries to terminate it; value must be positive integer.", - Type: []string{"integer"}, - Format: "int64", - }, - }, - "backoffLimit": { - SchemaProps: spec.SchemaProps{ - Description: "Optional number of retries before marking this job failed.", - Type: []string{"integer"}, - Format: "int32", - }, - }, - "schedulingPolicy": { - SchemaProps: spec.SchemaProps{ - Description: "SchedulingPolicy defines the policy related to scheduling, e.g. gang-scheduling", - Ref: ref("github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.SchedulingPolicy"), - }, - }, - "suspend": { - SchemaProps: spec.SchemaProps{ - Description: "suspend specifies whether the Job controller should create Pods or not. If a Job is created with suspend set to true, no Pods are created by the Job controller. If a Job is suspended after creation (i.e. the flag goes from false to true), the Job controller will delete all active Pods and PodGroups associated with this Job. Users must design their workload to gracefully handle this. Suspending a Job will reset the StartTime field of the Job.\n\nDefaults to false.", - Type: []string{"boolean"}, - Format: "", - }, - }, - "managedBy": { - SchemaProps: spec.SchemaProps{ - Description: "ManagedBy is used to indicate the controller or entity that manages a job. The value must be either an empty, 'kubeflow.org/training-operator' or 'kueue.x-k8s.io/multikueue'. The training-operator reconciles a job which doesn't have this field at all or the field value is the reserved string 'kubeflow.org/training-operator', but delegates reconciling the job with 'kueue.x-k8s.io/multikueue' to the Kueue. The field is immutable.", - Type: []string{"string"}, - Format: "", - }, - }, - }, - }, - }, - Dependencies: []string{ - "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.SchedulingPolicy"}, - } -} - -func schema_pkg_apis_kubefloworg_v1_SchedulingPolicy(ref common.ReferenceCallback) common.OpenAPIDefinition { - return common.OpenAPIDefinition{ - Schema: spec.Schema{ - SchemaProps: spec.SchemaProps{ - Description: "SchedulingPolicy encapsulates various scheduling policies of the distributed training job, for example `minAvailable` for gang-scheduling.", - Type: []string{"object"}, - Properties: map[string]spec.Schema{ - "minAvailable": { - SchemaProps: spec.SchemaProps{ - Type: []string{"integer"}, - Format: "int32", - }, - }, - "queue": { - SchemaProps: spec.SchemaProps{ - Type: []string{"string"}, - Format: "", - }, - }, - "minResources": { - SchemaProps: spec.SchemaProps{ - Type: []string{"object"}, - AdditionalProperties: &spec.SchemaOrBool{ - Allows: true, - Schema: &spec.Schema{ - SchemaProps: spec.SchemaProps{ - Ref: ref("k8s.io/apimachinery/pkg/api/resource.Quantity"), - }, - }, - }, - }, - }, - "priorityClass": { - SchemaProps: spec.SchemaProps{ - Type: []string{"string"}, - Format: "", - }, - }, - "scheduleTimeoutSeconds": { - SchemaProps: spec.SchemaProps{ - Type: []string{"integer"}, - Format: "int32", - }, - }, - }, - }, - }, - Dependencies: []string{ - "k8s.io/apimachinery/pkg/api/resource.Quantity"}, - } -} - -func schema_pkg_apis_kubefloworg_v1_TFJob(ref common.ReferenceCallback) common.OpenAPIDefinition { - return common.OpenAPIDefinition{ - Schema: spec.Schema{ - SchemaProps: spec.SchemaProps{ - Description: "TFJob represents a TFJob resource.", - Type: []string{"object"}, - Properties: map[string]spec.Schema{ - "kind": { - SchemaProps: spec.SchemaProps{ - Description: "Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds", - Type: []string{"string"}, - Format: "", - }, - }, - "apiVersion": { - SchemaProps: spec.SchemaProps{ - Description: "APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources", - Type: []string{"string"}, - Format: "", - }, - }, - "metadata": { - SchemaProps: spec.SchemaProps{ - Default: map[string]interface{}{}, - Ref: ref("k8s.io/apimachinery/pkg/apis/meta/v1.ObjectMeta"), - }, - }, - "spec": { - SchemaProps: spec.SchemaProps{ - Description: "Specification of the desired state of the TFJob.", - Default: map[string]interface{}{}, - Ref: ref("github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.TFJobSpec"), - }, - }, - "status": { - SchemaProps: spec.SchemaProps{ - Description: "Most recently observed status of the TFJob. Populated by the system. Read-only.", - Default: map[string]interface{}{}, - Ref: ref("github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.JobStatus"), - }, - }, - }, - }, - }, - Dependencies: []string{ - "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.JobStatus", "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.TFJobSpec", "k8s.io/apimachinery/pkg/apis/meta/v1.ObjectMeta"}, - } -} - -func schema_pkg_apis_kubefloworg_v1_TFJobList(ref common.ReferenceCallback) common.OpenAPIDefinition { - return common.OpenAPIDefinition{ - Schema: spec.Schema{ - SchemaProps: spec.SchemaProps{ - Description: "TFJobList is a list of TFJobs.", - Type: []string{"object"}, - Properties: map[string]spec.Schema{ - "kind": { - SchemaProps: spec.SchemaProps{ - Description: "Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds", - Type: []string{"string"}, - Format: "", - }, - }, - "apiVersion": { - SchemaProps: spec.SchemaProps{ - Description: "APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources", - Type: []string{"string"}, - Format: "", - }, - }, - "metadata": { - SchemaProps: spec.SchemaProps{ - Description: "Standard list metadata.", - Default: map[string]interface{}{}, - Ref: ref("k8s.io/apimachinery/pkg/apis/meta/v1.ListMeta"), - }, - }, - "items": { - SchemaProps: spec.SchemaProps{ - Description: "List of TFJobs.", - Type: []string{"array"}, - Items: &spec.SchemaOrArray{ - Schema: &spec.Schema{ - SchemaProps: spec.SchemaProps{ - Default: map[string]interface{}{}, - Ref: ref("github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.TFJob"), - }, - }, - }, - }, - }, - }, - Required: []string{"items"}, - }, - }, - Dependencies: []string{ - "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.TFJob", "k8s.io/apimachinery/pkg/apis/meta/v1.ListMeta"}, - } -} - -func schema_pkg_apis_kubefloworg_v1_TFJobSpec(ref common.ReferenceCallback) common.OpenAPIDefinition { - return common.OpenAPIDefinition{ - Schema: spec.Schema{ - SchemaProps: spec.SchemaProps{ - Description: "TFJobSpec is a desired state description of the TFJob.", - Type: []string{"object"}, - Properties: map[string]spec.Schema{ - "runPolicy": { - SchemaProps: spec.SchemaProps{ - Description: "RunPolicy encapsulates various runtime policies of the distributed training job, for example how to clean up resources and how long the job can stay active.", - Default: map[string]interface{}{}, - Ref: ref("github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.RunPolicy"), - }, - }, - "successPolicy": { - SchemaProps: spec.SchemaProps{ - Description: "SuccessPolicy defines the policy to mark the TFJob as succeeded. Default to \"\", using the default rules.", - Type: []string{"string"}, - Format: "", - }, - }, - "tfReplicaSpecs": { - SchemaProps: spec.SchemaProps{ - Description: "A map of TFReplicaType (type) to ReplicaSpec (value). Specifies the TF cluster configuration. For example,\n {\n \"PS\": ReplicaSpec,\n \"Worker\": ReplicaSpec,\n }", - Type: []string{"object"}, - AdditionalProperties: &spec.SchemaOrBool{ - Allows: true, - Schema: &spec.Schema{ - SchemaProps: spec.SchemaProps{ - Ref: ref("github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.ReplicaSpec"), - }, - }, - }, - }, - }, - "enableDynamicWorker": { - SchemaProps: spec.SchemaProps{ - Description: "A switch to enable dynamic worker", - Type: []string{"boolean"}, - Format: "", - }, - }, - }, - Required: []string{"runPolicy", "tfReplicaSpecs"}, - }, - }, - Dependencies: []string{ - "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.ReplicaSpec", "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.RunPolicy"}, - } -} - -func schema_pkg_apis_kubefloworg_v1_XGBoostJob(ref common.ReferenceCallback) common.OpenAPIDefinition { - return common.OpenAPIDefinition{ - Schema: spec.Schema{ - SchemaProps: spec.SchemaProps{ - Description: "XGBoostJob is the Schema for the xgboostjobs API", - Type: []string{"object"}, - Properties: map[string]spec.Schema{ - "kind": { - SchemaProps: spec.SchemaProps{ - Description: "Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds", - Type: []string{"string"}, - Format: "", - }, - }, - "apiVersion": { - SchemaProps: spec.SchemaProps{ - Description: "APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources", - Type: []string{"string"}, - Format: "", - }, - }, - "metadata": { - SchemaProps: spec.SchemaProps{ - Default: map[string]interface{}{}, - Ref: ref("k8s.io/apimachinery/pkg/apis/meta/v1.ObjectMeta"), - }, - }, - "spec": { - SchemaProps: spec.SchemaProps{ - Default: map[string]interface{}{}, - Ref: ref("github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.XGBoostJobSpec"), - }, - }, - "status": { - SchemaProps: spec.SchemaProps{ - Default: map[string]interface{}{}, - Ref: ref("github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.JobStatus"), - }, - }, - }, - }, - }, - Dependencies: []string{ - "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.JobStatus", "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.XGBoostJobSpec", "k8s.io/apimachinery/pkg/apis/meta/v1.ObjectMeta"}, - } -} - -func schema_pkg_apis_kubefloworg_v1_XGBoostJobList(ref common.ReferenceCallback) common.OpenAPIDefinition { - return common.OpenAPIDefinition{ - Schema: spec.Schema{ - SchemaProps: spec.SchemaProps{ - Description: "XGBoostJobList contains a list of XGBoostJob", - Type: []string{"object"}, - Properties: map[string]spec.Schema{ - "kind": { - SchemaProps: spec.SchemaProps{ - Description: "Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds", - Type: []string{"string"}, - Format: "", - }, - }, - "apiVersion": { - SchemaProps: spec.SchemaProps{ - Description: "APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources", - Type: []string{"string"}, - Format: "", - }, - }, - "metadata": { - SchemaProps: spec.SchemaProps{ - Default: map[string]interface{}{}, - Ref: ref("k8s.io/apimachinery/pkg/apis/meta/v1.ListMeta"), - }, - }, - "items": { - SchemaProps: spec.SchemaProps{ - Type: []string{"array"}, - Items: &spec.SchemaOrArray{ - Schema: &spec.Schema{ - SchemaProps: spec.SchemaProps{ - Default: map[string]interface{}{}, - Ref: ref("github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.XGBoostJob"), - }, - }, - }, - }, - }, - }, - Required: []string{"items"}, - }, - }, - Dependencies: []string{ - "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.XGBoostJob", "k8s.io/apimachinery/pkg/apis/meta/v1.ListMeta"}, - } -} - -func schema_pkg_apis_kubefloworg_v1_XGBoostJobSpec(ref common.ReferenceCallback) common.OpenAPIDefinition { - return common.OpenAPIDefinition{ - Schema: spec.Schema{ - SchemaProps: spec.SchemaProps{ - Description: "XGBoostJobSpec defines the desired state of XGBoostJob", - Type: []string{"object"}, - Properties: map[string]spec.Schema{ - "runPolicy": { - SchemaProps: spec.SchemaProps{ - Description: "INSERT ADDITIONAL SPEC FIELDS - desired state of cluster Important: Run \"make\" to regenerate code after modifying this file", - Default: map[string]interface{}{}, - Ref: ref("github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.RunPolicy"), - }, - }, - "xgbReplicaSpecs": { - SchemaProps: spec.SchemaProps{ - Type: []string{"object"}, - AdditionalProperties: &spec.SchemaOrBool{ - Allows: true, - Schema: &spec.Schema{ - SchemaProps: spec.SchemaProps{ - Ref: ref("github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.ReplicaSpec"), - }, - }, - }, - }, - }, - }, - Required: []string{"runPolicy", "xgbReplicaSpecs"}, - }, - }, - Dependencies: []string{ - "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.ReplicaSpec", "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.RunPolicy"}, - } -} diff --git a/pkg/client/applyconfiguration/kubeflow.org/v1/elasticpolicy.go b/pkg/client/applyconfiguration/kubeflow.org/v1/elasticpolicy.go deleted file mode 100644 index d47cc0282c..0000000000 --- a/pkg/client/applyconfiguration/kubeflow.org/v1/elasticpolicy.go +++ /dev/null @@ -1,139 +0,0 @@ -// Copyright 2024 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by applyconfiguration-gen. DO NOT EDIT. - -package v1 - -import ( - v1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - v2 "k8s.io/api/autoscaling/v2" -) - -// ElasticPolicyApplyConfiguration represents a declarative configuration of the ElasticPolicy type for use -// with apply. -type ElasticPolicyApplyConfiguration struct { - MinReplicas *int32 `json:"minReplicas,omitempty"` - MaxReplicas *int32 `json:"maxReplicas,omitempty"` - RDZVBackend *v1.RDZVBackend `json:"rdzvBackend,omitempty"` - RDZVPort *int32 `json:"rdzvPort,omitempty"` - RDZVHost *string `json:"rdzvHost,omitempty"` - RDZVID *string `json:"rdzvId,omitempty"` - RDZVConf []RDZVConfApplyConfiguration `json:"rdzvConf,omitempty"` - Standalone *bool `json:"standalone,omitempty"` - NProcPerNode *int32 `json:"nProcPerNode,omitempty"` - MaxRestarts *int32 `json:"maxRestarts,omitempty"` - Metrics []v2.MetricSpec `json:"metrics,omitempty"` -} - -// ElasticPolicyApplyConfiguration constructs a declarative configuration of the ElasticPolicy type for use with -// apply. -func ElasticPolicy() *ElasticPolicyApplyConfiguration { - return &ElasticPolicyApplyConfiguration{} -} - -// WithMinReplicas sets the MinReplicas field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the MinReplicas field is set to the value of the last call. -func (b *ElasticPolicyApplyConfiguration) WithMinReplicas(value int32) *ElasticPolicyApplyConfiguration { - b.MinReplicas = &value - return b -} - -// WithMaxReplicas sets the MaxReplicas field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the MaxReplicas field is set to the value of the last call. -func (b *ElasticPolicyApplyConfiguration) WithMaxReplicas(value int32) *ElasticPolicyApplyConfiguration { - b.MaxReplicas = &value - return b -} - -// WithRDZVBackend sets the RDZVBackend field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the RDZVBackend field is set to the value of the last call. -func (b *ElasticPolicyApplyConfiguration) WithRDZVBackend(value v1.RDZVBackend) *ElasticPolicyApplyConfiguration { - b.RDZVBackend = &value - return b -} - -// WithRDZVPort sets the RDZVPort field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the RDZVPort field is set to the value of the last call. -func (b *ElasticPolicyApplyConfiguration) WithRDZVPort(value int32) *ElasticPolicyApplyConfiguration { - b.RDZVPort = &value - return b -} - -// WithRDZVHost sets the RDZVHost field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the RDZVHost field is set to the value of the last call. -func (b *ElasticPolicyApplyConfiguration) WithRDZVHost(value string) *ElasticPolicyApplyConfiguration { - b.RDZVHost = &value - return b -} - -// WithRDZVID sets the RDZVID field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the RDZVID field is set to the value of the last call. -func (b *ElasticPolicyApplyConfiguration) WithRDZVID(value string) *ElasticPolicyApplyConfiguration { - b.RDZVID = &value - return b -} - -// WithRDZVConf adds the given value to the RDZVConf field in the declarative configuration -// and returns the receiver, so that objects can be build by chaining "With" function invocations. -// If called multiple times, values provided by each call will be appended to the RDZVConf field. -func (b *ElasticPolicyApplyConfiguration) WithRDZVConf(values ...*RDZVConfApplyConfiguration) *ElasticPolicyApplyConfiguration { - for i := range values { - if values[i] == nil { - panic("nil value passed to WithRDZVConf") - } - b.RDZVConf = append(b.RDZVConf, *values[i]) - } - return b -} - -// WithStandalone sets the Standalone field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the Standalone field is set to the value of the last call. -func (b *ElasticPolicyApplyConfiguration) WithStandalone(value bool) *ElasticPolicyApplyConfiguration { - b.Standalone = &value - return b -} - -// WithNProcPerNode sets the NProcPerNode field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the NProcPerNode field is set to the value of the last call. -func (b *ElasticPolicyApplyConfiguration) WithNProcPerNode(value int32) *ElasticPolicyApplyConfiguration { - b.NProcPerNode = &value - return b -} - -// WithMaxRestarts sets the MaxRestarts field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the MaxRestarts field is set to the value of the last call. -func (b *ElasticPolicyApplyConfiguration) WithMaxRestarts(value int32) *ElasticPolicyApplyConfiguration { - b.MaxRestarts = &value - return b -} - -// WithMetrics adds the given value to the Metrics field in the declarative configuration -// and returns the receiver, so that objects can be build by chaining "With" function invocations. -// If called multiple times, values provided by each call will be appended to the Metrics field. -func (b *ElasticPolicyApplyConfiguration) WithMetrics(values ...v2.MetricSpec) *ElasticPolicyApplyConfiguration { - for i := range values { - b.Metrics = append(b.Metrics, values[i]) - } - return b -} diff --git a/pkg/client/applyconfiguration/kubeflow.org/v1/jaxjob.go b/pkg/client/applyconfiguration/kubeflow.org/v1/jaxjob.go deleted file mode 100644 index ac20c9403a..0000000000 --- a/pkg/client/applyconfiguration/kubeflow.org/v1/jaxjob.go +++ /dev/null @@ -1,223 +0,0 @@ -// Copyright 2024 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by applyconfiguration-gen. DO NOT EDIT. - -package v1 - -import ( - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - types "k8s.io/apimachinery/pkg/types" - v1 "k8s.io/client-go/applyconfigurations/meta/v1" -) - -// JAXJobApplyConfiguration represents a declarative configuration of the JAXJob type for use -// with apply. -type JAXJobApplyConfiguration struct { - v1.TypeMetaApplyConfiguration `json:",inline"` - *v1.ObjectMetaApplyConfiguration `json:"metadata,omitempty"` - Spec *JAXJobSpecApplyConfiguration `json:"spec,omitempty"` - Status *JobStatusApplyConfiguration `json:"status,omitempty"` -} - -// JAXJob constructs a declarative configuration of the JAXJob type for use with -// apply. -func JAXJob(name, namespace string) *JAXJobApplyConfiguration { - b := &JAXJobApplyConfiguration{} - b.WithName(name) - b.WithNamespace(namespace) - b.WithKind("JAXJob") - b.WithAPIVersion("kubeflow.org/v1") - return b -} - -// WithKind sets the Kind field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the Kind field is set to the value of the last call. -func (b *JAXJobApplyConfiguration) WithKind(value string) *JAXJobApplyConfiguration { - b.Kind = &value - return b -} - -// WithAPIVersion sets the APIVersion field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the APIVersion field is set to the value of the last call. -func (b *JAXJobApplyConfiguration) WithAPIVersion(value string) *JAXJobApplyConfiguration { - b.APIVersion = &value - return b -} - -// WithName sets the Name field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the Name field is set to the value of the last call. -func (b *JAXJobApplyConfiguration) WithName(value string) *JAXJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - b.Name = &value - return b -} - -// WithGenerateName sets the GenerateName field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the GenerateName field is set to the value of the last call. -func (b *JAXJobApplyConfiguration) WithGenerateName(value string) *JAXJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - b.GenerateName = &value - return b -} - -// WithNamespace sets the Namespace field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the Namespace field is set to the value of the last call. -func (b *JAXJobApplyConfiguration) WithNamespace(value string) *JAXJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - b.Namespace = &value - return b -} - -// WithUID sets the UID field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the UID field is set to the value of the last call. -func (b *JAXJobApplyConfiguration) WithUID(value types.UID) *JAXJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - b.UID = &value - return b -} - -// WithResourceVersion sets the ResourceVersion field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the ResourceVersion field is set to the value of the last call. -func (b *JAXJobApplyConfiguration) WithResourceVersion(value string) *JAXJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - b.ResourceVersion = &value - return b -} - -// WithGeneration sets the Generation field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the Generation field is set to the value of the last call. -func (b *JAXJobApplyConfiguration) WithGeneration(value int64) *JAXJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - b.Generation = &value - return b -} - -// WithCreationTimestamp sets the CreationTimestamp field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the CreationTimestamp field is set to the value of the last call. -func (b *JAXJobApplyConfiguration) WithCreationTimestamp(value metav1.Time) *JAXJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - b.CreationTimestamp = &value - return b -} - -// WithDeletionTimestamp sets the DeletionTimestamp field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the DeletionTimestamp field is set to the value of the last call. -func (b *JAXJobApplyConfiguration) WithDeletionTimestamp(value metav1.Time) *JAXJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - b.DeletionTimestamp = &value - return b -} - -// WithDeletionGracePeriodSeconds sets the DeletionGracePeriodSeconds field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the DeletionGracePeriodSeconds field is set to the value of the last call. -func (b *JAXJobApplyConfiguration) WithDeletionGracePeriodSeconds(value int64) *JAXJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - b.DeletionGracePeriodSeconds = &value - return b -} - -// WithLabels puts the entries into the Labels field in the declarative configuration -// and returns the receiver, so that objects can be build by chaining "With" function invocations. -// If called multiple times, the entries provided by each call will be put on the Labels field, -// overwriting an existing map entries in Labels field with the same key. -func (b *JAXJobApplyConfiguration) WithLabels(entries map[string]string) *JAXJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - if b.Labels == nil && len(entries) > 0 { - b.Labels = make(map[string]string, len(entries)) - } - for k, v := range entries { - b.Labels[k] = v - } - return b -} - -// WithAnnotations puts the entries into the Annotations field in the declarative configuration -// and returns the receiver, so that objects can be build by chaining "With" function invocations. -// If called multiple times, the entries provided by each call will be put on the Annotations field, -// overwriting an existing map entries in Annotations field with the same key. -func (b *JAXJobApplyConfiguration) WithAnnotations(entries map[string]string) *JAXJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - if b.Annotations == nil && len(entries) > 0 { - b.Annotations = make(map[string]string, len(entries)) - } - for k, v := range entries { - b.Annotations[k] = v - } - return b -} - -// WithOwnerReferences adds the given value to the OwnerReferences field in the declarative configuration -// and returns the receiver, so that objects can be build by chaining "With" function invocations. -// If called multiple times, values provided by each call will be appended to the OwnerReferences field. -func (b *JAXJobApplyConfiguration) WithOwnerReferences(values ...*v1.OwnerReferenceApplyConfiguration) *JAXJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - for i := range values { - if values[i] == nil { - panic("nil value passed to WithOwnerReferences") - } - b.OwnerReferences = append(b.OwnerReferences, *values[i]) - } - return b -} - -// WithFinalizers adds the given value to the Finalizers field in the declarative configuration -// and returns the receiver, so that objects can be build by chaining "With" function invocations. -// If called multiple times, values provided by each call will be appended to the Finalizers field. -func (b *JAXJobApplyConfiguration) WithFinalizers(values ...string) *JAXJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - for i := range values { - b.Finalizers = append(b.Finalizers, values[i]) - } - return b -} - -func (b *JAXJobApplyConfiguration) ensureObjectMetaApplyConfigurationExists() { - if b.ObjectMetaApplyConfiguration == nil { - b.ObjectMetaApplyConfiguration = &v1.ObjectMetaApplyConfiguration{} - } -} - -// WithSpec sets the Spec field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the Spec field is set to the value of the last call. -func (b *JAXJobApplyConfiguration) WithSpec(value *JAXJobSpecApplyConfiguration) *JAXJobApplyConfiguration { - b.Spec = value - return b -} - -// WithStatus sets the Status field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the Status field is set to the value of the last call. -func (b *JAXJobApplyConfiguration) WithStatus(value *JobStatusApplyConfiguration) *JAXJobApplyConfiguration { - b.Status = value - return b -} - -// GetName retrieves the value of the Name field in the declarative configuration. -func (b *JAXJobApplyConfiguration) GetName() *string { - b.ensureObjectMetaApplyConfigurationExists() - return b.Name -} diff --git a/pkg/client/applyconfiguration/kubeflow.org/v1/jaxjobspec.go b/pkg/client/applyconfiguration/kubeflow.org/v1/jaxjobspec.go deleted file mode 100644 index 7bd90de693..0000000000 --- a/pkg/client/applyconfiguration/kubeflow.org/v1/jaxjobspec.go +++ /dev/null @@ -1,56 +0,0 @@ -// Copyright 2024 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by applyconfiguration-gen. DO NOT EDIT. - -package v1 - -import ( - kubefloworgv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" -) - -// JAXJobSpecApplyConfiguration represents a declarative configuration of the JAXJobSpec type for use -// with apply. -type JAXJobSpecApplyConfiguration struct { - RunPolicy *RunPolicyApplyConfiguration `json:"runPolicy,omitempty"` - JAXReplicaSpecs map[kubefloworgv1.ReplicaType]*kubefloworgv1.ReplicaSpec `json:"jaxReplicaSpecs,omitempty"` -} - -// JAXJobSpecApplyConfiguration constructs a declarative configuration of the JAXJobSpec type for use with -// apply. -func JAXJobSpec() *JAXJobSpecApplyConfiguration { - return &JAXJobSpecApplyConfiguration{} -} - -// WithRunPolicy sets the RunPolicy field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the RunPolicy field is set to the value of the last call. -func (b *JAXJobSpecApplyConfiguration) WithRunPolicy(value *RunPolicyApplyConfiguration) *JAXJobSpecApplyConfiguration { - b.RunPolicy = value - return b -} - -// WithJAXReplicaSpecs puts the entries into the JAXReplicaSpecs field in the declarative configuration -// and returns the receiver, so that objects can be build by chaining "With" function invocations. -// If called multiple times, the entries provided by each call will be put on the JAXReplicaSpecs field, -// overwriting an existing map entries in JAXReplicaSpecs field with the same key. -func (b *JAXJobSpecApplyConfiguration) WithJAXReplicaSpecs(entries map[kubefloworgv1.ReplicaType]*kubefloworgv1.ReplicaSpec) *JAXJobSpecApplyConfiguration { - if b.JAXReplicaSpecs == nil && len(entries) > 0 { - b.JAXReplicaSpecs = make(map[kubefloworgv1.ReplicaType]*kubefloworgv1.ReplicaSpec, len(entries)) - } - for k, v := range entries { - b.JAXReplicaSpecs[k] = v - } - return b -} diff --git a/pkg/client/applyconfiguration/kubeflow.org/v1/jobcondition.go b/pkg/client/applyconfiguration/kubeflow.org/v1/jobcondition.go deleted file mode 100644 index 707ff7dd2f..0000000000 --- a/pkg/client/applyconfiguration/kubeflow.org/v1/jobcondition.go +++ /dev/null @@ -1,88 +0,0 @@ -// Copyright 2024 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by applyconfiguration-gen. DO NOT EDIT. - -package v1 - -import ( - v1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" -) - -// JobConditionApplyConfiguration represents a declarative configuration of the JobCondition type for use -// with apply. -type JobConditionApplyConfiguration struct { - Type *v1.JobConditionType `json:"type,omitempty"` - Status *corev1.ConditionStatus `json:"status,omitempty"` - Reason *string `json:"reason,omitempty"` - Message *string `json:"message,omitempty"` - LastUpdateTime *metav1.Time `json:"lastUpdateTime,omitempty"` - LastTransitionTime *metav1.Time `json:"lastTransitionTime,omitempty"` -} - -// JobConditionApplyConfiguration constructs a declarative configuration of the JobCondition type for use with -// apply. -func JobCondition() *JobConditionApplyConfiguration { - return &JobConditionApplyConfiguration{} -} - -// WithType sets the Type field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the Type field is set to the value of the last call. -func (b *JobConditionApplyConfiguration) WithType(value v1.JobConditionType) *JobConditionApplyConfiguration { - b.Type = &value - return b -} - -// WithStatus sets the Status field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the Status field is set to the value of the last call. -func (b *JobConditionApplyConfiguration) WithStatus(value corev1.ConditionStatus) *JobConditionApplyConfiguration { - b.Status = &value - return b -} - -// WithReason sets the Reason field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the Reason field is set to the value of the last call. -func (b *JobConditionApplyConfiguration) WithReason(value string) *JobConditionApplyConfiguration { - b.Reason = &value - return b -} - -// WithMessage sets the Message field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the Message field is set to the value of the last call. -func (b *JobConditionApplyConfiguration) WithMessage(value string) *JobConditionApplyConfiguration { - b.Message = &value - return b -} - -// WithLastUpdateTime sets the LastUpdateTime field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the LastUpdateTime field is set to the value of the last call. -func (b *JobConditionApplyConfiguration) WithLastUpdateTime(value metav1.Time) *JobConditionApplyConfiguration { - b.LastUpdateTime = &value - return b -} - -// WithLastTransitionTime sets the LastTransitionTime field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the LastTransitionTime field is set to the value of the last call. -func (b *JobConditionApplyConfiguration) WithLastTransitionTime(value metav1.Time) *JobConditionApplyConfiguration { - b.LastTransitionTime = &value - return b -} diff --git a/pkg/client/applyconfiguration/kubeflow.org/v1/jobstatus.go b/pkg/client/applyconfiguration/kubeflow.org/v1/jobstatus.go deleted file mode 100644 index 01346b6919..0000000000 --- a/pkg/client/applyconfiguration/kubeflow.org/v1/jobstatus.go +++ /dev/null @@ -1,89 +0,0 @@ -// Copyright 2024 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by applyconfiguration-gen. DO NOT EDIT. - -package v1 - -import ( - kubefloworgv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" -) - -// JobStatusApplyConfiguration represents a declarative configuration of the JobStatus type for use -// with apply. -type JobStatusApplyConfiguration struct { - Conditions []JobConditionApplyConfiguration `json:"conditions,omitempty"` - ReplicaStatuses map[kubefloworgv1.ReplicaType]*kubefloworgv1.ReplicaStatus `json:"replicaStatuses,omitempty"` - StartTime *metav1.Time `json:"startTime,omitempty"` - CompletionTime *metav1.Time `json:"completionTime,omitempty"` - LastReconcileTime *metav1.Time `json:"lastReconcileTime,omitempty"` -} - -// JobStatusApplyConfiguration constructs a declarative configuration of the JobStatus type for use with -// apply. -func JobStatus() *JobStatusApplyConfiguration { - return &JobStatusApplyConfiguration{} -} - -// WithConditions adds the given value to the Conditions field in the declarative configuration -// and returns the receiver, so that objects can be build by chaining "With" function invocations. -// If called multiple times, values provided by each call will be appended to the Conditions field. -func (b *JobStatusApplyConfiguration) WithConditions(values ...*JobConditionApplyConfiguration) *JobStatusApplyConfiguration { - for i := range values { - if values[i] == nil { - panic("nil value passed to WithConditions") - } - b.Conditions = append(b.Conditions, *values[i]) - } - return b -} - -// WithReplicaStatuses puts the entries into the ReplicaStatuses field in the declarative configuration -// and returns the receiver, so that objects can be build by chaining "With" function invocations. -// If called multiple times, the entries provided by each call will be put on the ReplicaStatuses field, -// overwriting an existing map entries in ReplicaStatuses field with the same key. -func (b *JobStatusApplyConfiguration) WithReplicaStatuses(entries map[kubefloworgv1.ReplicaType]*kubefloworgv1.ReplicaStatus) *JobStatusApplyConfiguration { - if b.ReplicaStatuses == nil && len(entries) > 0 { - b.ReplicaStatuses = make(map[kubefloworgv1.ReplicaType]*kubefloworgv1.ReplicaStatus, len(entries)) - } - for k, v := range entries { - b.ReplicaStatuses[k] = v - } - return b -} - -// WithStartTime sets the StartTime field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the StartTime field is set to the value of the last call. -func (b *JobStatusApplyConfiguration) WithStartTime(value metav1.Time) *JobStatusApplyConfiguration { - b.StartTime = &value - return b -} - -// WithCompletionTime sets the CompletionTime field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the CompletionTime field is set to the value of the last call. -func (b *JobStatusApplyConfiguration) WithCompletionTime(value metav1.Time) *JobStatusApplyConfiguration { - b.CompletionTime = &value - return b -} - -// WithLastReconcileTime sets the LastReconcileTime field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the LastReconcileTime field is set to the value of the last call. -func (b *JobStatusApplyConfiguration) WithLastReconcileTime(value metav1.Time) *JobStatusApplyConfiguration { - b.LastReconcileTime = &value - return b -} diff --git a/pkg/client/applyconfiguration/kubeflow.org/v1/mpijob.go b/pkg/client/applyconfiguration/kubeflow.org/v1/mpijob.go deleted file mode 100644 index 354c5e9fed..0000000000 --- a/pkg/client/applyconfiguration/kubeflow.org/v1/mpijob.go +++ /dev/null @@ -1,223 +0,0 @@ -// Copyright 2024 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by applyconfiguration-gen. DO NOT EDIT. - -package v1 - -import ( - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - types "k8s.io/apimachinery/pkg/types" - v1 "k8s.io/client-go/applyconfigurations/meta/v1" -) - -// MPIJobApplyConfiguration represents a declarative configuration of the MPIJob type for use -// with apply. -type MPIJobApplyConfiguration struct { - v1.TypeMetaApplyConfiguration `json:",inline"` - *v1.ObjectMetaApplyConfiguration `json:"metadata,omitempty"` - Spec *MPIJobSpecApplyConfiguration `json:"spec,omitempty"` - Status *JobStatusApplyConfiguration `json:"status,omitempty"` -} - -// MPIJob constructs a declarative configuration of the MPIJob type for use with -// apply. -func MPIJob(name, namespace string) *MPIJobApplyConfiguration { - b := &MPIJobApplyConfiguration{} - b.WithName(name) - b.WithNamespace(namespace) - b.WithKind("MPIJob") - b.WithAPIVersion("kubeflow.org/v1") - return b -} - -// WithKind sets the Kind field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the Kind field is set to the value of the last call. -func (b *MPIJobApplyConfiguration) WithKind(value string) *MPIJobApplyConfiguration { - b.Kind = &value - return b -} - -// WithAPIVersion sets the APIVersion field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the APIVersion field is set to the value of the last call. -func (b *MPIJobApplyConfiguration) WithAPIVersion(value string) *MPIJobApplyConfiguration { - b.APIVersion = &value - return b -} - -// WithName sets the Name field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the Name field is set to the value of the last call. -func (b *MPIJobApplyConfiguration) WithName(value string) *MPIJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - b.Name = &value - return b -} - -// WithGenerateName sets the GenerateName field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the GenerateName field is set to the value of the last call. -func (b *MPIJobApplyConfiguration) WithGenerateName(value string) *MPIJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - b.GenerateName = &value - return b -} - -// WithNamespace sets the Namespace field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the Namespace field is set to the value of the last call. -func (b *MPIJobApplyConfiguration) WithNamespace(value string) *MPIJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - b.Namespace = &value - return b -} - -// WithUID sets the UID field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the UID field is set to the value of the last call. -func (b *MPIJobApplyConfiguration) WithUID(value types.UID) *MPIJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - b.UID = &value - return b -} - -// WithResourceVersion sets the ResourceVersion field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the ResourceVersion field is set to the value of the last call. -func (b *MPIJobApplyConfiguration) WithResourceVersion(value string) *MPIJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - b.ResourceVersion = &value - return b -} - -// WithGeneration sets the Generation field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the Generation field is set to the value of the last call. -func (b *MPIJobApplyConfiguration) WithGeneration(value int64) *MPIJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - b.Generation = &value - return b -} - -// WithCreationTimestamp sets the CreationTimestamp field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the CreationTimestamp field is set to the value of the last call. -func (b *MPIJobApplyConfiguration) WithCreationTimestamp(value metav1.Time) *MPIJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - b.CreationTimestamp = &value - return b -} - -// WithDeletionTimestamp sets the DeletionTimestamp field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the DeletionTimestamp field is set to the value of the last call. -func (b *MPIJobApplyConfiguration) WithDeletionTimestamp(value metav1.Time) *MPIJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - b.DeletionTimestamp = &value - return b -} - -// WithDeletionGracePeriodSeconds sets the DeletionGracePeriodSeconds field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the DeletionGracePeriodSeconds field is set to the value of the last call. -func (b *MPIJobApplyConfiguration) WithDeletionGracePeriodSeconds(value int64) *MPIJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - b.DeletionGracePeriodSeconds = &value - return b -} - -// WithLabels puts the entries into the Labels field in the declarative configuration -// and returns the receiver, so that objects can be build by chaining "With" function invocations. -// If called multiple times, the entries provided by each call will be put on the Labels field, -// overwriting an existing map entries in Labels field with the same key. -func (b *MPIJobApplyConfiguration) WithLabels(entries map[string]string) *MPIJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - if b.Labels == nil && len(entries) > 0 { - b.Labels = make(map[string]string, len(entries)) - } - for k, v := range entries { - b.Labels[k] = v - } - return b -} - -// WithAnnotations puts the entries into the Annotations field in the declarative configuration -// and returns the receiver, so that objects can be build by chaining "With" function invocations. -// If called multiple times, the entries provided by each call will be put on the Annotations field, -// overwriting an existing map entries in Annotations field with the same key. -func (b *MPIJobApplyConfiguration) WithAnnotations(entries map[string]string) *MPIJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - if b.Annotations == nil && len(entries) > 0 { - b.Annotations = make(map[string]string, len(entries)) - } - for k, v := range entries { - b.Annotations[k] = v - } - return b -} - -// WithOwnerReferences adds the given value to the OwnerReferences field in the declarative configuration -// and returns the receiver, so that objects can be build by chaining "With" function invocations. -// If called multiple times, values provided by each call will be appended to the OwnerReferences field. -func (b *MPIJobApplyConfiguration) WithOwnerReferences(values ...*v1.OwnerReferenceApplyConfiguration) *MPIJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - for i := range values { - if values[i] == nil { - panic("nil value passed to WithOwnerReferences") - } - b.OwnerReferences = append(b.OwnerReferences, *values[i]) - } - return b -} - -// WithFinalizers adds the given value to the Finalizers field in the declarative configuration -// and returns the receiver, so that objects can be build by chaining "With" function invocations. -// If called multiple times, values provided by each call will be appended to the Finalizers field. -func (b *MPIJobApplyConfiguration) WithFinalizers(values ...string) *MPIJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - for i := range values { - b.Finalizers = append(b.Finalizers, values[i]) - } - return b -} - -func (b *MPIJobApplyConfiguration) ensureObjectMetaApplyConfigurationExists() { - if b.ObjectMetaApplyConfiguration == nil { - b.ObjectMetaApplyConfiguration = &v1.ObjectMetaApplyConfiguration{} - } -} - -// WithSpec sets the Spec field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the Spec field is set to the value of the last call. -func (b *MPIJobApplyConfiguration) WithSpec(value *MPIJobSpecApplyConfiguration) *MPIJobApplyConfiguration { - b.Spec = value - return b -} - -// WithStatus sets the Status field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the Status field is set to the value of the last call. -func (b *MPIJobApplyConfiguration) WithStatus(value *JobStatusApplyConfiguration) *MPIJobApplyConfiguration { - b.Status = value - return b -} - -// GetName retrieves the value of the Name field in the declarative configuration. -func (b *MPIJobApplyConfiguration) GetName() *string { - b.ensureObjectMetaApplyConfigurationExists() - return b.Name -} diff --git a/pkg/client/applyconfiguration/kubeflow.org/v1/mpijobspec.go b/pkg/client/applyconfiguration/kubeflow.org/v1/mpijobspec.go deleted file mode 100644 index 3fe53440b4..0000000000 --- a/pkg/client/applyconfiguration/kubeflow.org/v1/mpijobspec.go +++ /dev/null @@ -1,83 +0,0 @@ -// Copyright 2024 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by applyconfiguration-gen. DO NOT EDIT. - -package v1 - -import ( - v1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" -) - -// MPIJobSpecApplyConfiguration represents a declarative configuration of the MPIJobSpec type for use -// with apply. -type MPIJobSpecApplyConfiguration struct { - SlotsPerWorker *int32 `json:"slotsPerWorker,omitempty"` - CleanPodPolicy *v1.CleanPodPolicy `json:"cleanPodPolicy,omitempty"` - MPIReplicaSpecs map[v1.ReplicaType]*v1.ReplicaSpec `json:"mpiReplicaSpecs,omitempty"` - MainContainer *string `json:"mainContainer,omitempty"` - RunPolicy *RunPolicyApplyConfiguration `json:"runPolicy,omitempty"` -} - -// MPIJobSpecApplyConfiguration constructs a declarative configuration of the MPIJobSpec type for use with -// apply. -func MPIJobSpec() *MPIJobSpecApplyConfiguration { - return &MPIJobSpecApplyConfiguration{} -} - -// WithSlotsPerWorker sets the SlotsPerWorker field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the SlotsPerWorker field is set to the value of the last call. -func (b *MPIJobSpecApplyConfiguration) WithSlotsPerWorker(value int32) *MPIJobSpecApplyConfiguration { - b.SlotsPerWorker = &value - return b -} - -// WithCleanPodPolicy sets the CleanPodPolicy field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the CleanPodPolicy field is set to the value of the last call. -func (b *MPIJobSpecApplyConfiguration) WithCleanPodPolicy(value v1.CleanPodPolicy) *MPIJobSpecApplyConfiguration { - b.CleanPodPolicy = &value - return b -} - -// WithMPIReplicaSpecs puts the entries into the MPIReplicaSpecs field in the declarative configuration -// and returns the receiver, so that objects can be build by chaining "With" function invocations. -// If called multiple times, the entries provided by each call will be put on the MPIReplicaSpecs field, -// overwriting an existing map entries in MPIReplicaSpecs field with the same key. -func (b *MPIJobSpecApplyConfiguration) WithMPIReplicaSpecs(entries map[v1.ReplicaType]*v1.ReplicaSpec) *MPIJobSpecApplyConfiguration { - if b.MPIReplicaSpecs == nil && len(entries) > 0 { - b.MPIReplicaSpecs = make(map[v1.ReplicaType]*v1.ReplicaSpec, len(entries)) - } - for k, v := range entries { - b.MPIReplicaSpecs[k] = v - } - return b -} - -// WithMainContainer sets the MainContainer field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the MainContainer field is set to the value of the last call. -func (b *MPIJobSpecApplyConfiguration) WithMainContainer(value string) *MPIJobSpecApplyConfiguration { - b.MainContainer = &value - return b -} - -// WithRunPolicy sets the RunPolicy field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the RunPolicy field is set to the value of the last call. -func (b *MPIJobSpecApplyConfiguration) WithRunPolicy(value *RunPolicyApplyConfiguration) *MPIJobSpecApplyConfiguration { - b.RunPolicy = value - return b -} diff --git a/pkg/client/applyconfiguration/kubeflow.org/v1/paddleelasticpolicy.go b/pkg/client/applyconfiguration/kubeflow.org/v1/paddleelasticpolicy.go deleted file mode 100644 index 53e9165c1c..0000000000 --- a/pkg/client/applyconfiguration/kubeflow.org/v1/paddleelasticpolicy.go +++ /dev/null @@ -1,70 +0,0 @@ -// Copyright 2024 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by applyconfiguration-gen. DO NOT EDIT. - -package v1 - -import ( - v2 "k8s.io/api/autoscaling/v2" -) - -// PaddleElasticPolicyApplyConfiguration represents a declarative configuration of the PaddleElasticPolicy type for use -// with apply. -type PaddleElasticPolicyApplyConfiguration struct { - MinReplicas *int32 `json:"minReplicas,omitempty"` - MaxReplicas *int32 `json:"maxReplicas,omitempty"` - MaxRestarts *int32 `json:"maxRestarts,omitempty"` - Metrics []v2.MetricSpec `json:"metrics,omitempty"` -} - -// PaddleElasticPolicyApplyConfiguration constructs a declarative configuration of the PaddleElasticPolicy type for use with -// apply. -func PaddleElasticPolicy() *PaddleElasticPolicyApplyConfiguration { - return &PaddleElasticPolicyApplyConfiguration{} -} - -// WithMinReplicas sets the MinReplicas field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the MinReplicas field is set to the value of the last call. -func (b *PaddleElasticPolicyApplyConfiguration) WithMinReplicas(value int32) *PaddleElasticPolicyApplyConfiguration { - b.MinReplicas = &value - return b -} - -// WithMaxReplicas sets the MaxReplicas field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the MaxReplicas field is set to the value of the last call. -func (b *PaddleElasticPolicyApplyConfiguration) WithMaxReplicas(value int32) *PaddleElasticPolicyApplyConfiguration { - b.MaxReplicas = &value - return b -} - -// WithMaxRestarts sets the MaxRestarts field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the MaxRestarts field is set to the value of the last call. -func (b *PaddleElasticPolicyApplyConfiguration) WithMaxRestarts(value int32) *PaddleElasticPolicyApplyConfiguration { - b.MaxRestarts = &value - return b -} - -// WithMetrics adds the given value to the Metrics field in the declarative configuration -// and returns the receiver, so that objects can be build by chaining "With" function invocations. -// If called multiple times, values provided by each call will be appended to the Metrics field. -func (b *PaddleElasticPolicyApplyConfiguration) WithMetrics(values ...v2.MetricSpec) *PaddleElasticPolicyApplyConfiguration { - for i := range values { - b.Metrics = append(b.Metrics, values[i]) - } - return b -} diff --git a/pkg/client/applyconfiguration/kubeflow.org/v1/paddlejob.go b/pkg/client/applyconfiguration/kubeflow.org/v1/paddlejob.go deleted file mode 100644 index 51c65cb681..0000000000 --- a/pkg/client/applyconfiguration/kubeflow.org/v1/paddlejob.go +++ /dev/null @@ -1,223 +0,0 @@ -// Copyright 2024 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by applyconfiguration-gen. DO NOT EDIT. - -package v1 - -import ( - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - types "k8s.io/apimachinery/pkg/types" - v1 "k8s.io/client-go/applyconfigurations/meta/v1" -) - -// PaddleJobApplyConfiguration represents a declarative configuration of the PaddleJob type for use -// with apply. -type PaddleJobApplyConfiguration struct { - v1.TypeMetaApplyConfiguration `json:",inline"` - *v1.ObjectMetaApplyConfiguration `json:"metadata,omitempty"` - Spec *PaddleJobSpecApplyConfiguration `json:"spec,omitempty"` - Status *JobStatusApplyConfiguration `json:"status,omitempty"` -} - -// PaddleJob constructs a declarative configuration of the PaddleJob type for use with -// apply. -func PaddleJob(name, namespace string) *PaddleJobApplyConfiguration { - b := &PaddleJobApplyConfiguration{} - b.WithName(name) - b.WithNamespace(namespace) - b.WithKind("PaddleJob") - b.WithAPIVersion("kubeflow.org/v1") - return b -} - -// WithKind sets the Kind field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the Kind field is set to the value of the last call. -func (b *PaddleJobApplyConfiguration) WithKind(value string) *PaddleJobApplyConfiguration { - b.Kind = &value - return b -} - -// WithAPIVersion sets the APIVersion field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the APIVersion field is set to the value of the last call. -func (b *PaddleJobApplyConfiguration) WithAPIVersion(value string) *PaddleJobApplyConfiguration { - b.APIVersion = &value - return b -} - -// WithName sets the Name field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the Name field is set to the value of the last call. -func (b *PaddleJobApplyConfiguration) WithName(value string) *PaddleJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - b.Name = &value - return b -} - -// WithGenerateName sets the GenerateName field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the GenerateName field is set to the value of the last call. -func (b *PaddleJobApplyConfiguration) WithGenerateName(value string) *PaddleJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - b.GenerateName = &value - return b -} - -// WithNamespace sets the Namespace field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the Namespace field is set to the value of the last call. -func (b *PaddleJobApplyConfiguration) WithNamespace(value string) *PaddleJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - b.Namespace = &value - return b -} - -// WithUID sets the UID field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the UID field is set to the value of the last call. -func (b *PaddleJobApplyConfiguration) WithUID(value types.UID) *PaddleJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - b.UID = &value - return b -} - -// WithResourceVersion sets the ResourceVersion field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the ResourceVersion field is set to the value of the last call. -func (b *PaddleJobApplyConfiguration) WithResourceVersion(value string) *PaddleJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - b.ResourceVersion = &value - return b -} - -// WithGeneration sets the Generation field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the Generation field is set to the value of the last call. -func (b *PaddleJobApplyConfiguration) WithGeneration(value int64) *PaddleJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - b.Generation = &value - return b -} - -// WithCreationTimestamp sets the CreationTimestamp field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the CreationTimestamp field is set to the value of the last call. -func (b *PaddleJobApplyConfiguration) WithCreationTimestamp(value metav1.Time) *PaddleJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - b.CreationTimestamp = &value - return b -} - -// WithDeletionTimestamp sets the DeletionTimestamp field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the DeletionTimestamp field is set to the value of the last call. -func (b *PaddleJobApplyConfiguration) WithDeletionTimestamp(value metav1.Time) *PaddleJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - b.DeletionTimestamp = &value - return b -} - -// WithDeletionGracePeriodSeconds sets the DeletionGracePeriodSeconds field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the DeletionGracePeriodSeconds field is set to the value of the last call. -func (b *PaddleJobApplyConfiguration) WithDeletionGracePeriodSeconds(value int64) *PaddleJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - b.DeletionGracePeriodSeconds = &value - return b -} - -// WithLabels puts the entries into the Labels field in the declarative configuration -// and returns the receiver, so that objects can be build by chaining "With" function invocations. -// If called multiple times, the entries provided by each call will be put on the Labels field, -// overwriting an existing map entries in Labels field with the same key. -func (b *PaddleJobApplyConfiguration) WithLabels(entries map[string]string) *PaddleJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - if b.Labels == nil && len(entries) > 0 { - b.Labels = make(map[string]string, len(entries)) - } - for k, v := range entries { - b.Labels[k] = v - } - return b -} - -// WithAnnotations puts the entries into the Annotations field in the declarative configuration -// and returns the receiver, so that objects can be build by chaining "With" function invocations. -// If called multiple times, the entries provided by each call will be put on the Annotations field, -// overwriting an existing map entries in Annotations field with the same key. -func (b *PaddleJobApplyConfiguration) WithAnnotations(entries map[string]string) *PaddleJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - if b.Annotations == nil && len(entries) > 0 { - b.Annotations = make(map[string]string, len(entries)) - } - for k, v := range entries { - b.Annotations[k] = v - } - return b -} - -// WithOwnerReferences adds the given value to the OwnerReferences field in the declarative configuration -// and returns the receiver, so that objects can be build by chaining "With" function invocations. -// If called multiple times, values provided by each call will be appended to the OwnerReferences field. -func (b *PaddleJobApplyConfiguration) WithOwnerReferences(values ...*v1.OwnerReferenceApplyConfiguration) *PaddleJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - for i := range values { - if values[i] == nil { - panic("nil value passed to WithOwnerReferences") - } - b.OwnerReferences = append(b.OwnerReferences, *values[i]) - } - return b -} - -// WithFinalizers adds the given value to the Finalizers field in the declarative configuration -// and returns the receiver, so that objects can be build by chaining "With" function invocations. -// If called multiple times, values provided by each call will be appended to the Finalizers field. -func (b *PaddleJobApplyConfiguration) WithFinalizers(values ...string) *PaddleJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - for i := range values { - b.Finalizers = append(b.Finalizers, values[i]) - } - return b -} - -func (b *PaddleJobApplyConfiguration) ensureObjectMetaApplyConfigurationExists() { - if b.ObjectMetaApplyConfiguration == nil { - b.ObjectMetaApplyConfiguration = &v1.ObjectMetaApplyConfiguration{} - } -} - -// WithSpec sets the Spec field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the Spec field is set to the value of the last call. -func (b *PaddleJobApplyConfiguration) WithSpec(value *PaddleJobSpecApplyConfiguration) *PaddleJobApplyConfiguration { - b.Spec = value - return b -} - -// WithStatus sets the Status field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the Status field is set to the value of the last call. -func (b *PaddleJobApplyConfiguration) WithStatus(value *JobStatusApplyConfiguration) *PaddleJobApplyConfiguration { - b.Status = value - return b -} - -// GetName retrieves the value of the Name field in the declarative configuration. -func (b *PaddleJobApplyConfiguration) GetName() *string { - b.ensureObjectMetaApplyConfigurationExists() - return b.Name -} diff --git a/pkg/client/applyconfiguration/kubeflow.org/v1/paddlejobspec.go b/pkg/client/applyconfiguration/kubeflow.org/v1/paddlejobspec.go deleted file mode 100644 index a8f57fe346..0000000000 --- a/pkg/client/applyconfiguration/kubeflow.org/v1/paddlejobspec.go +++ /dev/null @@ -1,65 +0,0 @@ -// Copyright 2024 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by applyconfiguration-gen. DO NOT EDIT. - -package v1 - -import ( - kubefloworgv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" -) - -// PaddleJobSpecApplyConfiguration represents a declarative configuration of the PaddleJobSpec type for use -// with apply. -type PaddleJobSpecApplyConfiguration struct { - RunPolicy *RunPolicyApplyConfiguration `json:"runPolicy,omitempty"` - ElasticPolicy *PaddleElasticPolicyApplyConfiguration `json:"elasticPolicy,omitempty"` - PaddleReplicaSpecs map[kubefloworgv1.ReplicaType]*kubefloworgv1.ReplicaSpec `json:"paddleReplicaSpecs,omitempty"` -} - -// PaddleJobSpecApplyConfiguration constructs a declarative configuration of the PaddleJobSpec type for use with -// apply. -func PaddleJobSpec() *PaddleJobSpecApplyConfiguration { - return &PaddleJobSpecApplyConfiguration{} -} - -// WithRunPolicy sets the RunPolicy field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the RunPolicy field is set to the value of the last call. -func (b *PaddleJobSpecApplyConfiguration) WithRunPolicy(value *RunPolicyApplyConfiguration) *PaddleJobSpecApplyConfiguration { - b.RunPolicy = value - return b -} - -// WithElasticPolicy sets the ElasticPolicy field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the ElasticPolicy field is set to the value of the last call. -func (b *PaddleJobSpecApplyConfiguration) WithElasticPolicy(value *PaddleElasticPolicyApplyConfiguration) *PaddleJobSpecApplyConfiguration { - b.ElasticPolicy = value - return b -} - -// WithPaddleReplicaSpecs puts the entries into the PaddleReplicaSpecs field in the declarative configuration -// and returns the receiver, so that objects can be build by chaining "With" function invocations. -// If called multiple times, the entries provided by each call will be put on the PaddleReplicaSpecs field, -// overwriting an existing map entries in PaddleReplicaSpecs field with the same key. -func (b *PaddleJobSpecApplyConfiguration) WithPaddleReplicaSpecs(entries map[kubefloworgv1.ReplicaType]*kubefloworgv1.ReplicaSpec) *PaddleJobSpecApplyConfiguration { - if b.PaddleReplicaSpecs == nil && len(entries) > 0 { - b.PaddleReplicaSpecs = make(map[kubefloworgv1.ReplicaType]*kubefloworgv1.ReplicaSpec, len(entries)) - } - for k, v := range entries { - b.PaddleReplicaSpecs[k] = v - } - return b -} diff --git a/pkg/client/applyconfiguration/kubeflow.org/v1/pytorchjob.go b/pkg/client/applyconfiguration/kubeflow.org/v1/pytorchjob.go deleted file mode 100644 index 6fb1336a49..0000000000 --- a/pkg/client/applyconfiguration/kubeflow.org/v1/pytorchjob.go +++ /dev/null @@ -1,223 +0,0 @@ -// Copyright 2024 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by applyconfiguration-gen. DO NOT EDIT. - -package v1 - -import ( - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - types "k8s.io/apimachinery/pkg/types" - v1 "k8s.io/client-go/applyconfigurations/meta/v1" -) - -// PyTorchJobApplyConfiguration represents a declarative configuration of the PyTorchJob type for use -// with apply. -type PyTorchJobApplyConfiguration struct { - v1.TypeMetaApplyConfiguration `json:",inline"` - *v1.ObjectMetaApplyConfiguration `json:"metadata,omitempty"` - Spec *PyTorchJobSpecApplyConfiguration `json:"spec,omitempty"` - Status *JobStatusApplyConfiguration `json:"status,omitempty"` -} - -// PyTorchJob constructs a declarative configuration of the PyTorchJob type for use with -// apply. -func PyTorchJob(name, namespace string) *PyTorchJobApplyConfiguration { - b := &PyTorchJobApplyConfiguration{} - b.WithName(name) - b.WithNamespace(namespace) - b.WithKind("PyTorchJob") - b.WithAPIVersion("kubeflow.org/v1") - return b -} - -// WithKind sets the Kind field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the Kind field is set to the value of the last call. -func (b *PyTorchJobApplyConfiguration) WithKind(value string) *PyTorchJobApplyConfiguration { - b.Kind = &value - return b -} - -// WithAPIVersion sets the APIVersion field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the APIVersion field is set to the value of the last call. -func (b *PyTorchJobApplyConfiguration) WithAPIVersion(value string) *PyTorchJobApplyConfiguration { - b.APIVersion = &value - return b -} - -// WithName sets the Name field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the Name field is set to the value of the last call. -func (b *PyTorchJobApplyConfiguration) WithName(value string) *PyTorchJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - b.Name = &value - return b -} - -// WithGenerateName sets the GenerateName field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the GenerateName field is set to the value of the last call. -func (b *PyTorchJobApplyConfiguration) WithGenerateName(value string) *PyTorchJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - b.GenerateName = &value - return b -} - -// WithNamespace sets the Namespace field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the Namespace field is set to the value of the last call. -func (b *PyTorchJobApplyConfiguration) WithNamespace(value string) *PyTorchJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - b.Namespace = &value - return b -} - -// WithUID sets the UID field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the UID field is set to the value of the last call. -func (b *PyTorchJobApplyConfiguration) WithUID(value types.UID) *PyTorchJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - b.UID = &value - return b -} - -// WithResourceVersion sets the ResourceVersion field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the ResourceVersion field is set to the value of the last call. -func (b *PyTorchJobApplyConfiguration) WithResourceVersion(value string) *PyTorchJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - b.ResourceVersion = &value - return b -} - -// WithGeneration sets the Generation field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the Generation field is set to the value of the last call. -func (b *PyTorchJobApplyConfiguration) WithGeneration(value int64) *PyTorchJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - b.Generation = &value - return b -} - -// WithCreationTimestamp sets the CreationTimestamp field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the CreationTimestamp field is set to the value of the last call. -func (b *PyTorchJobApplyConfiguration) WithCreationTimestamp(value metav1.Time) *PyTorchJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - b.CreationTimestamp = &value - return b -} - -// WithDeletionTimestamp sets the DeletionTimestamp field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the DeletionTimestamp field is set to the value of the last call. -func (b *PyTorchJobApplyConfiguration) WithDeletionTimestamp(value metav1.Time) *PyTorchJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - b.DeletionTimestamp = &value - return b -} - -// WithDeletionGracePeriodSeconds sets the DeletionGracePeriodSeconds field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the DeletionGracePeriodSeconds field is set to the value of the last call. -func (b *PyTorchJobApplyConfiguration) WithDeletionGracePeriodSeconds(value int64) *PyTorchJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - b.DeletionGracePeriodSeconds = &value - return b -} - -// WithLabels puts the entries into the Labels field in the declarative configuration -// and returns the receiver, so that objects can be build by chaining "With" function invocations. -// If called multiple times, the entries provided by each call will be put on the Labels field, -// overwriting an existing map entries in Labels field with the same key. -func (b *PyTorchJobApplyConfiguration) WithLabels(entries map[string]string) *PyTorchJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - if b.Labels == nil && len(entries) > 0 { - b.Labels = make(map[string]string, len(entries)) - } - for k, v := range entries { - b.Labels[k] = v - } - return b -} - -// WithAnnotations puts the entries into the Annotations field in the declarative configuration -// and returns the receiver, so that objects can be build by chaining "With" function invocations. -// If called multiple times, the entries provided by each call will be put on the Annotations field, -// overwriting an existing map entries in Annotations field with the same key. -func (b *PyTorchJobApplyConfiguration) WithAnnotations(entries map[string]string) *PyTorchJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - if b.Annotations == nil && len(entries) > 0 { - b.Annotations = make(map[string]string, len(entries)) - } - for k, v := range entries { - b.Annotations[k] = v - } - return b -} - -// WithOwnerReferences adds the given value to the OwnerReferences field in the declarative configuration -// and returns the receiver, so that objects can be build by chaining "With" function invocations. -// If called multiple times, values provided by each call will be appended to the OwnerReferences field. -func (b *PyTorchJobApplyConfiguration) WithOwnerReferences(values ...*v1.OwnerReferenceApplyConfiguration) *PyTorchJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - for i := range values { - if values[i] == nil { - panic("nil value passed to WithOwnerReferences") - } - b.OwnerReferences = append(b.OwnerReferences, *values[i]) - } - return b -} - -// WithFinalizers adds the given value to the Finalizers field in the declarative configuration -// and returns the receiver, so that objects can be build by chaining "With" function invocations. -// If called multiple times, values provided by each call will be appended to the Finalizers field. -func (b *PyTorchJobApplyConfiguration) WithFinalizers(values ...string) *PyTorchJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - for i := range values { - b.Finalizers = append(b.Finalizers, values[i]) - } - return b -} - -func (b *PyTorchJobApplyConfiguration) ensureObjectMetaApplyConfigurationExists() { - if b.ObjectMetaApplyConfiguration == nil { - b.ObjectMetaApplyConfiguration = &v1.ObjectMetaApplyConfiguration{} - } -} - -// WithSpec sets the Spec field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the Spec field is set to the value of the last call. -func (b *PyTorchJobApplyConfiguration) WithSpec(value *PyTorchJobSpecApplyConfiguration) *PyTorchJobApplyConfiguration { - b.Spec = value - return b -} - -// WithStatus sets the Status field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the Status field is set to the value of the last call. -func (b *PyTorchJobApplyConfiguration) WithStatus(value *JobStatusApplyConfiguration) *PyTorchJobApplyConfiguration { - b.Status = value - return b -} - -// GetName retrieves the value of the Name field in the declarative configuration. -func (b *PyTorchJobApplyConfiguration) GetName() *string { - b.ensureObjectMetaApplyConfigurationExists() - return b.Name -} diff --git a/pkg/client/applyconfiguration/kubeflow.org/v1/pytorchjobspec.go b/pkg/client/applyconfiguration/kubeflow.org/v1/pytorchjobspec.go deleted file mode 100644 index 1a9ae0ceff..0000000000 --- a/pkg/client/applyconfiguration/kubeflow.org/v1/pytorchjobspec.go +++ /dev/null @@ -1,74 +0,0 @@ -// Copyright 2024 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by applyconfiguration-gen. DO NOT EDIT. - -package v1 - -import ( - kubefloworgv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" -) - -// PyTorchJobSpecApplyConfiguration represents a declarative configuration of the PyTorchJobSpec type for use -// with apply. -type PyTorchJobSpecApplyConfiguration struct { - RunPolicy *RunPolicyApplyConfiguration `json:"runPolicy,omitempty"` - ElasticPolicy *ElasticPolicyApplyConfiguration `json:"elasticPolicy,omitempty"` - PyTorchReplicaSpecs map[kubefloworgv1.ReplicaType]*kubefloworgv1.ReplicaSpec `json:"pytorchReplicaSpecs,omitempty"` - NprocPerNode *string `json:"nprocPerNode,omitempty"` -} - -// PyTorchJobSpecApplyConfiguration constructs a declarative configuration of the PyTorchJobSpec type for use with -// apply. -func PyTorchJobSpec() *PyTorchJobSpecApplyConfiguration { - return &PyTorchJobSpecApplyConfiguration{} -} - -// WithRunPolicy sets the RunPolicy field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the RunPolicy field is set to the value of the last call. -func (b *PyTorchJobSpecApplyConfiguration) WithRunPolicy(value *RunPolicyApplyConfiguration) *PyTorchJobSpecApplyConfiguration { - b.RunPolicy = value - return b -} - -// WithElasticPolicy sets the ElasticPolicy field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the ElasticPolicy field is set to the value of the last call. -func (b *PyTorchJobSpecApplyConfiguration) WithElasticPolicy(value *ElasticPolicyApplyConfiguration) *PyTorchJobSpecApplyConfiguration { - b.ElasticPolicy = value - return b -} - -// WithPyTorchReplicaSpecs puts the entries into the PyTorchReplicaSpecs field in the declarative configuration -// and returns the receiver, so that objects can be build by chaining "With" function invocations. -// If called multiple times, the entries provided by each call will be put on the PyTorchReplicaSpecs field, -// overwriting an existing map entries in PyTorchReplicaSpecs field with the same key. -func (b *PyTorchJobSpecApplyConfiguration) WithPyTorchReplicaSpecs(entries map[kubefloworgv1.ReplicaType]*kubefloworgv1.ReplicaSpec) *PyTorchJobSpecApplyConfiguration { - if b.PyTorchReplicaSpecs == nil && len(entries) > 0 { - b.PyTorchReplicaSpecs = make(map[kubefloworgv1.ReplicaType]*kubefloworgv1.ReplicaSpec, len(entries)) - } - for k, v := range entries { - b.PyTorchReplicaSpecs[k] = v - } - return b -} - -// WithNprocPerNode sets the NprocPerNode field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the NprocPerNode field is set to the value of the last call. -func (b *PyTorchJobSpecApplyConfiguration) WithNprocPerNode(value string) *PyTorchJobSpecApplyConfiguration { - b.NprocPerNode = &value - return b -} diff --git a/pkg/client/applyconfiguration/kubeflow.org/v1/rdzvconf.go b/pkg/client/applyconfiguration/kubeflow.org/v1/rdzvconf.go deleted file mode 100644 index 397a82477e..0000000000 --- a/pkg/client/applyconfiguration/kubeflow.org/v1/rdzvconf.go +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright 2024 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by applyconfiguration-gen. DO NOT EDIT. - -package v1 - -// RDZVConfApplyConfiguration represents a declarative configuration of the RDZVConf type for use -// with apply. -type RDZVConfApplyConfiguration struct { - Key *string `json:"key,omitempty"` - Value *string `json:"value,omitempty"` -} - -// RDZVConfApplyConfiguration constructs a declarative configuration of the RDZVConf type for use with -// apply. -func RDZVConf() *RDZVConfApplyConfiguration { - return &RDZVConfApplyConfiguration{} -} - -// WithKey sets the Key field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the Key field is set to the value of the last call. -func (b *RDZVConfApplyConfiguration) WithKey(value string) *RDZVConfApplyConfiguration { - b.Key = &value - return b -} - -// WithValue sets the Value field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the Value field is set to the value of the last call. -func (b *RDZVConfApplyConfiguration) WithValue(value string) *RDZVConfApplyConfiguration { - b.Value = &value - return b -} diff --git a/pkg/client/applyconfiguration/kubeflow.org/v1/replicaspec.go b/pkg/client/applyconfiguration/kubeflow.org/v1/replicaspec.go deleted file mode 100644 index 49fa1ff32e..0000000000 --- a/pkg/client/applyconfiguration/kubeflow.org/v1/replicaspec.go +++ /dev/null @@ -1,60 +0,0 @@ -// Copyright 2024 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by applyconfiguration-gen. DO NOT EDIT. - -package v1 - -import ( - kubefloworgv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - v1 "k8s.io/api/core/v1" -) - -// ReplicaSpecApplyConfiguration represents a declarative configuration of the ReplicaSpec type for use -// with apply. -type ReplicaSpecApplyConfiguration struct { - Replicas *int32 `json:"replicas,omitempty"` - Template *v1.PodTemplateSpec `json:"template,omitempty"` - RestartPolicy *kubefloworgv1.RestartPolicy `json:"restartPolicy,omitempty"` -} - -// ReplicaSpecApplyConfiguration constructs a declarative configuration of the ReplicaSpec type for use with -// apply. -func ReplicaSpec() *ReplicaSpecApplyConfiguration { - return &ReplicaSpecApplyConfiguration{} -} - -// WithReplicas sets the Replicas field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the Replicas field is set to the value of the last call. -func (b *ReplicaSpecApplyConfiguration) WithReplicas(value int32) *ReplicaSpecApplyConfiguration { - b.Replicas = &value - return b -} - -// WithTemplate sets the Template field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the Template field is set to the value of the last call. -func (b *ReplicaSpecApplyConfiguration) WithTemplate(value v1.PodTemplateSpec) *ReplicaSpecApplyConfiguration { - b.Template = &value - return b -} - -// WithRestartPolicy sets the RestartPolicy field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the RestartPolicy field is set to the value of the last call. -func (b *ReplicaSpecApplyConfiguration) WithRestartPolicy(value kubefloworgv1.RestartPolicy) *ReplicaSpecApplyConfiguration { - b.RestartPolicy = &value - return b -} diff --git a/pkg/client/applyconfiguration/kubeflow.org/v1/replicastatus.go b/pkg/client/applyconfiguration/kubeflow.org/v1/replicastatus.go deleted file mode 100644 index 1d34e4bd0a..0000000000 --- a/pkg/client/applyconfiguration/kubeflow.org/v1/replicastatus.go +++ /dev/null @@ -1,77 +0,0 @@ -// Copyright 2024 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by applyconfiguration-gen. DO NOT EDIT. - -package v1 - -import ( - v1 "k8s.io/client-go/applyconfigurations/meta/v1" -) - -// ReplicaStatusApplyConfiguration represents a declarative configuration of the ReplicaStatus type for use -// with apply. -type ReplicaStatusApplyConfiguration struct { - Active *int32 `json:"active,omitempty"` - Succeeded *int32 `json:"succeeded,omitempty"` - Failed *int32 `json:"failed,omitempty"` - LabelSelector *v1.LabelSelectorApplyConfiguration `json:"labelSelector,omitempty"` - Selector *string `json:"selector,omitempty"` -} - -// ReplicaStatusApplyConfiguration constructs a declarative configuration of the ReplicaStatus type for use with -// apply. -func ReplicaStatus() *ReplicaStatusApplyConfiguration { - return &ReplicaStatusApplyConfiguration{} -} - -// WithActive sets the Active field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the Active field is set to the value of the last call. -func (b *ReplicaStatusApplyConfiguration) WithActive(value int32) *ReplicaStatusApplyConfiguration { - b.Active = &value - return b -} - -// WithSucceeded sets the Succeeded field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the Succeeded field is set to the value of the last call. -func (b *ReplicaStatusApplyConfiguration) WithSucceeded(value int32) *ReplicaStatusApplyConfiguration { - b.Succeeded = &value - return b -} - -// WithFailed sets the Failed field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the Failed field is set to the value of the last call. -func (b *ReplicaStatusApplyConfiguration) WithFailed(value int32) *ReplicaStatusApplyConfiguration { - b.Failed = &value - return b -} - -// WithLabelSelector sets the LabelSelector field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the LabelSelector field is set to the value of the last call. -func (b *ReplicaStatusApplyConfiguration) WithLabelSelector(value *v1.LabelSelectorApplyConfiguration) *ReplicaStatusApplyConfiguration { - b.LabelSelector = value - return b -} - -// WithSelector sets the Selector field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the Selector field is set to the value of the last call. -func (b *ReplicaStatusApplyConfiguration) WithSelector(value string) *ReplicaStatusApplyConfiguration { - b.Selector = &value - return b -} diff --git a/pkg/client/applyconfiguration/kubeflow.org/v1/runpolicy.go b/pkg/client/applyconfiguration/kubeflow.org/v1/runpolicy.go deleted file mode 100644 index 76f5299c36..0000000000 --- a/pkg/client/applyconfiguration/kubeflow.org/v1/runpolicy.go +++ /dev/null @@ -1,95 +0,0 @@ -// Copyright 2024 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by applyconfiguration-gen. DO NOT EDIT. - -package v1 - -import ( - v1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" -) - -// RunPolicyApplyConfiguration represents a declarative configuration of the RunPolicy type for use -// with apply. -type RunPolicyApplyConfiguration struct { - CleanPodPolicy *v1.CleanPodPolicy `json:"cleanPodPolicy,omitempty"` - TTLSecondsAfterFinished *int32 `json:"ttlSecondsAfterFinished,omitempty"` - ActiveDeadlineSeconds *int64 `json:"activeDeadlineSeconds,omitempty"` - BackoffLimit *int32 `json:"backoffLimit,omitempty"` - SchedulingPolicy *SchedulingPolicyApplyConfiguration `json:"schedulingPolicy,omitempty"` - Suspend *bool `json:"suspend,omitempty"` - ManagedBy *string `json:"managedBy,omitempty"` -} - -// RunPolicyApplyConfiguration constructs a declarative configuration of the RunPolicy type for use with -// apply. -func RunPolicy() *RunPolicyApplyConfiguration { - return &RunPolicyApplyConfiguration{} -} - -// WithCleanPodPolicy sets the CleanPodPolicy field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the CleanPodPolicy field is set to the value of the last call. -func (b *RunPolicyApplyConfiguration) WithCleanPodPolicy(value v1.CleanPodPolicy) *RunPolicyApplyConfiguration { - b.CleanPodPolicy = &value - return b -} - -// WithTTLSecondsAfterFinished sets the TTLSecondsAfterFinished field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the TTLSecondsAfterFinished field is set to the value of the last call. -func (b *RunPolicyApplyConfiguration) WithTTLSecondsAfterFinished(value int32) *RunPolicyApplyConfiguration { - b.TTLSecondsAfterFinished = &value - return b -} - -// WithActiveDeadlineSeconds sets the ActiveDeadlineSeconds field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the ActiveDeadlineSeconds field is set to the value of the last call. -func (b *RunPolicyApplyConfiguration) WithActiveDeadlineSeconds(value int64) *RunPolicyApplyConfiguration { - b.ActiveDeadlineSeconds = &value - return b -} - -// WithBackoffLimit sets the BackoffLimit field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the BackoffLimit field is set to the value of the last call. -func (b *RunPolicyApplyConfiguration) WithBackoffLimit(value int32) *RunPolicyApplyConfiguration { - b.BackoffLimit = &value - return b -} - -// WithSchedulingPolicy sets the SchedulingPolicy field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the SchedulingPolicy field is set to the value of the last call. -func (b *RunPolicyApplyConfiguration) WithSchedulingPolicy(value *SchedulingPolicyApplyConfiguration) *RunPolicyApplyConfiguration { - b.SchedulingPolicy = value - return b -} - -// WithSuspend sets the Suspend field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the Suspend field is set to the value of the last call. -func (b *RunPolicyApplyConfiguration) WithSuspend(value bool) *RunPolicyApplyConfiguration { - b.Suspend = &value - return b -} - -// WithManagedBy sets the ManagedBy field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the ManagedBy field is set to the value of the last call. -func (b *RunPolicyApplyConfiguration) WithManagedBy(value string) *RunPolicyApplyConfiguration { - b.ManagedBy = &value - return b -} diff --git a/pkg/client/applyconfiguration/kubeflow.org/v1/schedulingpolicy.go b/pkg/client/applyconfiguration/kubeflow.org/v1/schedulingpolicy.go deleted file mode 100644 index c66cc436f0..0000000000 --- a/pkg/client/applyconfiguration/kubeflow.org/v1/schedulingpolicy.go +++ /dev/null @@ -1,78 +0,0 @@ -// Copyright 2024 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by applyconfiguration-gen. DO NOT EDIT. - -package v1 - -import ( - v1 "k8s.io/api/core/v1" - resource "k8s.io/apimachinery/pkg/api/resource" -) - -// SchedulingPolicyApplyConfiguration represents a declarative configuration of the SchedulingPolicy type for use -// with apply. -type SchedulingPolicyApplyConfiguration struct { - MinAvailable *int32 `json:"minAvailable,omitempty"` - Queue *string `json:"queue,omitempty"` - MinResources *map[v1.ResourceName]resource.Quantity `json:"minResources,omitempty"` - PriorityClass *string `json:"priorityClass,omitempty"` - ScheduleTimeoutSeconds *int32 `json:"scheduleTimeoutSeconds,omitempty"` -} - -// SchedulingPolicyApplyConfiguration constructs a declarative configuration of the SchedulingPolicy type for use with -// apply. -func SchedulingPolicy() *SchedulingPolicyApplyConfiguration { - return &SchedulingPolicyApplyConfiguration{} -} - -// WithMinAvailable sets the MinAvailable field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the MinAvailable field is set to the value of the last call. -func (b *SchedulingPolicyApplyConfiguration) WithMinAvailable(value int32) *SchedulingPolicyApplyConfiguration { - b.MinAvailable = &value - return b -} - -// WithQueue sets the Queue field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the Queue field is set to the value of the last call. -func (b *SchedulingPolicyApplyConfiguration) WithQueue(value string) *SchedulingPolicyApplyConfiguration { - b.Queue = &value - return b -} - -// WithMinResources sets the MinResources field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the MinResources field is set to the value of the last call. -func (b *SchedulingPolicyApplyConfiguration) WithMinResources(value map[v1.ResourceName]resource.Quantity) *SchedulingPolicyApplyConfiguration { - b.MinResources = &value - return b -} - -// WithPriorityClass sets the PriorityClass field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the PriorityClass field is set to the value of the last call. -func (b *SchedulingPolicyApplyConfiguration) WithPriorityClass(value string) *SchedulingPolicyApplyConfiguration { - b.PriorityClass = &value - return b -} - -// WithScheduleTimeoutSeconds sets the ScheduleTimeoutSeconds field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the ScheduleTimeoutSeconds field is set to the value of the last call. -func (b *SchedulingPolicyApplyConfiguration) WithScheduleTimeoutSeconds(value int32) *SchedulingPolicyApplyConfiguration { - b.ScheduleTimeoutSeconds = &value - return b -} diff --git a/pkg/client/applyconfiguration/kubeflow.org/v1/tfjob.go b/pkg/client/applyconfiguration/kubeflow.org/v1/tfjob.go deleted file mode 100644 index 61e08dd5f5..0000000000 --- a/pkg/client/applyconfiguration/kubeflow.org/v1/tfjob.go +++ /dev/null @@ -1,223 +0,0 @@ -// Copyright 2024 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by applyconfiguration-gen. DO NOT EDIT. - -package v1 - -import ( - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - types "k8s.io/apimachinery/pkg/types" - v1 "k8s.io/client-go/applyconfigurations/meta/v1" -) - -// TFJobApplyConfiguration represents a declarative configuration of the TFJob type for use -// with apply. -type TFJobApplyConfiguration struct { - v1.TypeMetaApplyConfiguration `json:",inline"` - *v1.ObjectMetaApplyConfiguration `json:"metadata,omitempty"` - Spec *TFJobSpecApplyConfiguration `json:"spec,omitempty"` - Status *JobStatusApplyConfiguration `json:"status,omitempty"` -} - -// TFJob constructs a declarative configuration of the TFJob type for use with -// apply. -func TFJob(name, namespace string) *TFJobApplyConfiguration { - b := &TFJobApplyConfiguration{} - b.WithName(name) - b.WithNamespace(namespace) - b.WithKind("TFJob") - b.WithAPIVersion("kubeflow.org/v1") - return b -} - -// WithKind sets the Kind field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the Kind field is set to the value of the last call. -func (b *TFJobApplyConfiguration) WithKind(value string) *TFJobApplyConfiguration { - b.Kind = &value - return b -} - -// WithAPIVersion sets the APIVersion field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the APIVersion field is set to the value of the last call. -func (b *TFJobApplyConfiguration) WithAPIVersion(value string) *TFJobApplyConfiguration { - b.APIVersion = &value - return b -} - -// WithName sets the Name field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the Name field is set to the value of the last call. -func (b *TFJobApplyConfiguration) WithName(value string) *TFJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - b.Name = &value - return b -} - -// WithGenerateName sets the GenerateName field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the GenerateName field is set to the value of the last call. -func (b *TFJobApplyConfiguration) WithGenerateName(value string) *TFJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - b.GenerateName = &value - return b -} - -// WithNamespace sets the Namespace field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the Namespace field is set to the value of the last call. -func (b *TFJobApplyConfiguration) WithNamespace(value string) *TFJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - b.Namespace = &value - return b -} - -// WithUID sets the UID field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the UID field is set to the value of the last call. -func (b *TFJobApplyConfiguration) WithUID(value types.UID) *TFJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - b.UID = &value - return b -} - -// WithResourceVersion sets the ResourceVersion field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the ResourceVersion field is set to the value of the last call. -func (b *TFJobApplyConfiguration) WithResourceVersion(value string) *TFJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - b.ResourceVersion = &value - return b -} - -// WithGeneration sets the Generation field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the Generation field is set to the value of the last call. -func (b *TFJobApplyConfiguration) WithGeneration(value int64) *TFJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - b.Generation = &value - return b -} - -// WithCreationTimestamp sets the CreationTimestamp field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the CreationTimestamp field is set to the value of the last call. -func (b *TFJobApplyConfiguration) WithCreationTimestamp(value metav1.Time) *TFJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - b.CreationTimestamp = &value - return b -} - -// WithDeletionTimestamp sets the DeletionTimestamp field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the DeletionTimestamp field is set to the value of the last call. -func (b *TFJobApplyConfiguration) WithDeletionTimestamp(value metav1.Time) *TFJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - b.DeletionTimestamp = &value - return b -} - -// WithDeletionGracePeriodSeconds sets the DeletionGracePeriodSeconds field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the DeletionGracePeriodSeconds field is set to the value of the last call. -func (b *TFJobApplyConfiguration) WithDeletionGracePeriodSeconds(value int64) *TFJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - b.DeletionGracePeriodSeconds = &value - return b -} - -// WithLabels puts the entries into the Labels field in the declarative configuration -// and returns the receiver, so that objects can be build by chaining "With" function invocations. -// If called multiple times, the entries provided by each call will be put on the Labels field, -// overwriting an existing map entries in Labels field with the same key. -func (b *TFJobApplyConfiguration) WithLabels(entries map[string]string) *TFJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - if b.Labels == nil && len(entries) > 0 { - b.Labels = make(map[string]string, len(entries)) - } - for k, v := range entries { - b.Labels[k] = v - } - return b -} - -// WithAnnotations puts the entries into the Annotations field in the declarative configuration -// and returns the receiver, so that objects can be build by chaining "With" function invocations. -// If called multiple times, the entries provided by each call will be put on the Annotations field, -// overwriting an existing map entries in Annotations field with the same key. -func (b *TFJobApplyConfiguration) WithAnnotations(entries map[string]string) *TFJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - if b.Annotations == nil && len(entries) > 0 { - b.Annotations = make(map[string]string, len(entries)) - } - for k, v := range entries { - b.Annotations[k] = v - } - return b -} - -// WithOwnerReferences adds the given value to the OwnerReferences field in the declarative configuration -// and returns the receiver, so that objects can be build by chaining "With" function invocations. -// If called multiple times, values provided by each call will be appended to the OwnerReferences field. -func (b *TFJobApplyConfiguration) WithOwnerReferences(values ...*v1.OwnerReferenceApplyConfiguration) *TFJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - for i := range values { - if values[i] == nil { - panic("nil value passed to WithOwnerReferences") - } - b.OwnerReferences = append(b.OwnerReferences, *values[i]) - } - return b -} - -// WithFinalizers adds the given value to the Finalizers field in the declarative configuration -// and returns the receiver, so that objects can be build by chaining "With" function invocations. -// If called multiple times, values provided by each call will be appended to the Finalizers field. -func (b *TFJobApplyConfiguration) WithFinalizers(values ...string) *TFJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - for i := range values { - b.Finalizers = append(b.Finalizers, values[i]) - } - return b -} - -func (b *TFJobApplyConfiguration) ensureObjectMetaApplyConfigurationExists() { - if b.ObjectMetaApplyConfiguration == nil { - b.ObjectMetaApplyConfiguration = &v1.ObjectMetaApplyConfiguration{} - } -} - -// WithSpec sets the Spec field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the Spec field is set to the value of the last call. -func (b *TFJobApplyConfiguration) WithSpec(value *TFJobSpecApplyConfiguration) *TFJobApplyConfiguration { - b.Spec = value - return b -} - -// WithStatus sets the Status field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the Status field is set to the value of the last call. -func (b *TFJobApplyConfiguration) WithStatus(value *JobStatusApplyConfiguration) *TFJobApplyConfiguration { - b.Status = value - return b -} - -// GetName retrieves the value of the Name field in the declarative configuration. -func (b *TFJobApplyConfiguration) GetName() *string { - b.ensureObjectMetaApplyConfigurationExists() - return b.Name -} diff --git a/pkg/client/applyconfiguration/kubeflow.org/v1/tfjobspec.go b/pkg/client/applyconfiguration/kubeflow.org/v1/tfjobspec.go deleted file mode 100644 index defbd9c7fc..0000000000 --- a/pkg/client/applyconfiguration/kubeflow.org/v1/tfjobspec.go +++ /dev/null @@ -1,74 +0,0 @@ -// Copyright 2024 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by applyconfiguration-gen. DO NOT EDIT. - -package v1 - -import ( - kubefloworgv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" -) - -// TFJobSpecApplyConfiguration represents a declarative configuration of the TFJobSpec type for use -// with apply. -type TFJobSpecApplyConfiguration struct { - RunPolicy *RunPolicyApplyConfiguration `json:"runPolicy,omitempty"` - SuccessPolicy *kubefloworgv1.SuccessPolicy `json:"successPolicy,omitempty"` - TFReplicaSpecs map[kubefloworgv1.ReplicaType]*kubefloworgv1.ReplicaSpec `json:"tfReplicaSpecs,omitempty"` - EnableDynamicWorker *bool `json:"enableDynamicWorker,omitempty"` -} - -// TFJobSpecApplyConfiguration constructs a declarative configuration of the TFJobSpec type for use with -// apply. -func TFJobSpec() *TFJobSpecApplyConfiguration { - return &TFJobSpecApplyConfiguration{} -} - -// WithRunPolicy sets the RunPolicy field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the RunPolicy field is set to the value of the last call. -func (b *TFJobSpecApplyConfiguration) WithRunPolicy(value *RunPolicyApplyConfiguration) *TFJobSpecApplyConfiguration { - b.RunPolicy = value - return b -} - -// WithSuccessPolicy sets the SuccessPolicy field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the SuccessPolicy field is set to the value of the last call. -func (b *TFJobSpecApplyConfiguration) WithSuccessPolicy(value kubefloworgv1.SuccessPolicy) *TFJobSpecApplyConfiguration { - b.SuccessPolicy = &value - return b -} - -// WithTFReplicaSpecs puts the entries into the TFReplicaSpecs field in the declarative configuration -// and returns the receiver, so that objects can be build by chaining "With" function invocations. -// If called multiple times, the entries provided by each call will be put on the TFReplicaSpecs field, -// overwriting an existing map entries in TFReplicaSpecs field with the same key. -func (b *TFJobSpecApplyConfiguration) WithTFReplicaSpecs(entries map[kubefloworgv1.ReplicaType]*kubefloworgv1.ReplicaSpec) *TFJobSpecApplyConfiguration { - if b.TFReplicaSpecs == nil && len(entries) > 0 { - b.TFReplicaSpecs = make(map[kubefloworgv1.ReplicaType]*kubefloworgv1.ReplicaSpec, len(entries)) - } - for k, v := range entries { - b.TFReplicaSpecs[k] = v - } - return b -} - -// WithEnableDynamicWorker sets the EnableDynamicWorker field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the EnableDynamicWorker field is set to the value of the last call. -func (b *TFJobSpecApplyConfiguration) WithEnableDynamicWorker(value bool) *TFJobSpecApplyConfiguration { - b.EnableDynamicWorker = &value - return b -} diff --git a/pkg/client/applyconfiguration/kubeflow.org/v1/xgboostjob.go b/pkg/client/applyconfiguration/kubeflow.org/v1/xgboostjob.go deleted file mode 100644 index 7c27f76105..0000000000 --- a/pkg/client/applyconfiguration/kubeflow.org/v1/xgboostjob.go +++ /dev/null @@ -1,223 +0,0 @@ -// Copyright 2024 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by applyconfiguration-gen. DO NOT EDIT. - -package v1 - -import ( - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - types "k8s.io/apimachinery/pkg/types" - v1 "k8s.io/client-go/applyconfigurations/meta/v1" -) - -// XGBoostJobApplyConfiguration represents a declarative configuration of the XGBoostJob type for use -// with apply. -type XGBoostJobApplyConfiguration struct { - v1.TypeMetaApplyConfiguration `json:",inline"` - *v1.ObjectMetaApplyConfiguration `json:"metadata,omitempty"` - Spec *XGBoostJobSpecApplyConfiguration `json:"spec,omitempty"` - Status *JobStatusApplyConfiguration `json:"status,omitempty"` -} - -// XGBoostJob constructs a declarative configuration of the XGBoostJob type for use with -// apply. -func XGBoostJob(name, namespace string) *XGBoostJobApplyConfiguration { - b := &XGBoostJobApplyConfiguration{} - b.WithName(name) - b.WithNamespace(namespace) - b.WithKind("XGBoostJob") - b.WithAPIVersion("kubeflow.org/v1") - return b -} - -// WithKind sets the Kind field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the Kind field is set to the value of the last call. -func (b *XGBoostJobApplyConfiguration) WithKind(value string) *XGBoostJobApplyConfiguration { - b.Kind = &value - return b -} - -// WithAPIVersion sets the APIVersion field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the APIVersion field is set to the value of the last call. -func (b *XGBoostJobApplyConfiguration) WithAPIVersion(value string) *XGBoostJobApplyConfiguration { - b.APIVersion = &value - return b -} - -// WithName sets the Name field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the Name field is set to the value of the last call. -func (b *XGBoostJobApplyConfiguration) WithName(value string) *XGBoostJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - b.Name = &value - return b -} - -// WithGenerateName sets the GenerateName field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the GenerateName field is set to the value of the last call. -func (b *XGBoostJobApplyConfiguration) WithGenerateName(value string) *XGBoostJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - b.GenerateName = &value - return b -} - -// WithNamespace sets the Namespace field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the Namespace field is set to the value of the last call. -func (b *XGBoostJobApplyConfiguration) WithNamespace(value string) *XGBoostJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - b.Namespace = &value - return b -} - -// WithUID sets the UID field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the UID field is set to the value of the last call. -func (b *XGBoostJobApplyConfiguration) WithUID(value types.UID) *XGBoostJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - b.UID = &value - return b -} - -// WithResourceVersion sets the ResourceVersion field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the ResourceVersion field is set to the value of the last call. -func (b *XGBoostJobApplyConfiguration) WithResourceVersion(value string) *XGBoostJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - b.ResourceVersion = &value - return b -} - -// WithGeneration sets the Generation field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the Generation field is set to the value of the last call. -func (b *XGBoostJobApplyConfiguration) WithGeneration(value int64) *XGBoostJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - b.Generation = &value - return b -} - -// WithCreationTimestamp sets the CreationTimestamp field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the CreationTimestamp field is set to the value of the last call. -func (b *XGBoostJobApplyConfiguration) WithCreationTimestamp(value metav1.Time) *XGBoostJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - b.CreationTimestamp = &value - return b -} - -// WithDeletionTimestamp sets the DeletionTimestamp field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the DeletionTimestamp field is set to the value of the last call. -func (b *XGBoostJobApplyConfiguration) WithDeletionTimestamp(value metav1.Time) *XGBoostJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - b.DeletionTimestamp = &value - return b -} - -// WithDeletionGracePeriodSeconds sets the DeletionGracePeriodSeconds field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the DeletionGracePeriodSeconds field is set to the value of the last call. -func (b *XGBoostJobApplyConfiguration) WithDeletionGracePeriodSeconds(value int64) *XGBoostJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - b.DeletionGracePeriodSeconds = &value - return b -} - -// WithLabels puts the entries into the Labels field in the declarative configuration -// and returns the receiver, so that objects can be build by chaining "With" function invocations. -// If called multiple times, the entries provided by each call will be put on the Labels field, -// overwriting an existing map entries in Labels field with the same key. -func (b *XGBoostJobApplyConfiguration) WithLabels(entries map[string]string) *XGBoostJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - if b.Labels == nil && len(entries) > 0 { - b.Labels = make(map[string]string, len(entries)) - } - for k, v := range entries { - b.Labels[k] = v - } - return b -} - -// WithAnnotations puts the entries into the Annotations field in the declarative configuration -// and returns the receiver, so that objects can be build by chaining "With" function invocations. -// If called multiple times, the entries provided by each call will be put on the Annotations field, -// overwriting an existing map entries in Annotations field with the same key. -func (b *XGBoostJobApplyConfiguration) WithAnnotations(entries map[string]string) *XGBoostJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - if b.Annotations == nil && len(entries) > 0 { - b.Annotations = make(map[string]string, len(entries)) - } - for k, v := range entries { - b.Annotations[k] = v - } - return b -} - -// WithOwnerReferences adds the given value to the OwnerReferences field in the declarative configuration -// and returns the receiver, so that objects can be build by chaining "With" function invocations. -// If called multiple times, values provided by each call will be appended to the OwnerReferences field. -func (b *XGBoostJobApplyConfiguration) WithOwnerReferences(values ...*v1.OwnerReferenceApplyConfiguration) *XGBoostJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - for i := range values { - if values[i] == nil { - panic("nil value passed to WithOwnerReferences") - } - b.OwnerReferences = append(b.OwnerReferences, *values[i]) - } - return b -} - -// WithFinalizers adds the given value to the Finalizers field in the declarative configuration -// and returns the receiver, so that objects can be build by chaining "With" function invocations. -// If called multiple times, values provided by each call will be appended to the Finalizers field. -func (b *XGBoostJobApplyConfiguration) WithFinalizers(values ...string) *XGBoostJobApplyConfiguration { - b.ensureObjectMetaApplyConfigurationExists() - for i := range values { - b.Finalizers = append(b.Finalizers, values[i]) - } - return b -} - -func (b *XGBoostJobApplyConfiguration) ensureObjectMetaApplyConfigurationExists() { - if b.ObjectMetaApplyConfiguration == nil { - b.ObjectMetaApplyConfiguration = &v1.ObjectMetaApplyConfiguration{} - } -} - -// WithSpec sets the Spec field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the Spec field is set to the value of the last call. -func (b *XGBoostJobApplyConfiguration) WithSpec(value *XGBoostJobSpecApplyConfiguration) *XGBoostJobApplyConfiguration { - b.Spec = value - return b -} - -// WithStatus sets the Status field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the Status field is set to the value of the last call. -func (b *XGBoostJobApplyConfiguration) WithStatus(value *JobStatusApplyConfiguration) *XGBoostJobApplyConfiguration { - b.Status = value - return b -} - -// GetName retrieves the value of the Name field in the declarative configuration. -func (b *XGBoostJobApplyConfiguration) GetName() *string { - b.ensureObjectMetaApplyConfigurationExists() - return b.Name -} diff --git a/pkg/client/applyconfiguration/kubeflow.org/v1/xgboostjobspec.go b/pkg/client/applyconfiguration/kubeflow.org/v1/xgboostjobspec.go deleted file mode 100644 index 77f81c4da7..0000000000 --- a/pkg/client/applyconfiguration/kubeflow.org/v1/xgboostjobspec.go +++ /dev/null @@ -1,56 +0,0 @@ -// Copyright 2024 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by applyconfiguration-gen. DO NOT EDIT. - -package v1 - -import ( - kubefloworgv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" -) - -// XGBoostJobSpecApplyConfiguration represents a declarative configuration of the XGBoostJobSpec type for use -// with apply. -type XGBoostJobSpecApplyConfiguration struct { - RunPolicy *RunPolicyApplyConfiguration `json:"runPolicy,omitempty"` - XGBReplicaSpecs map[kubefloworgv1.ReplicaType]*kubefloworgv1.ReplicaSpec `json:"xgbReplicaSpecs,omitempty"` -} - -// XGBoostJobSpecApplyConfiguration constructs a declarative configuration of the XGBoostJobSpec type for use with -// apply. -func XGBoostJobSpec() *XGBoostJobSpecApplyConfiguration { - return &XGBoostJobSpecApplyConfiguration{} -} - -// WithRunPolicy sets the RunPolicy field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the RunPolicy field is set to the value of the last call. -func (b *XGBoostJobSpecApplyConfiguration) WithRunPolicy(value *RunPolicyApplyConfiguration) *XGBoostJobSpecApplyConfiguration { - b.RunPolicy = value - return b -} - -// WithXGBReplicaSpecs puts the entries into the XGBReplicaSpecs field in the declarative configuration -// and returns the receiver, so that objects can be build by chaining "With" function invocations. -// If called multiple times, the entries provided by each call will be put on the XGBReplicaSpecs field, -// overwriting an existing map entries in XGBReplicaSpecs field with the same key. -func (b *XGBoostJobSpecApplyConfiguration) WithXGBReplicaSpecs(entries map[kubefloworgv1.ReplicaType]*kubefloworgv1.ReplicaSpec) *XGBoostJobSpecApplyConfiguration { - if b.XGBReplicaSpecs == nil && len(entries) > 0 { - b.XGBReplicaSpecs = make(map[kubefloworgv1.ReplicaType]*kubefloworgv1.ReplicaSpec, len(entries)) - } - for k, v := range entries { - b.XGBReplicaSpecs[k] = v - } - return b -} diff --git a/pkg/client/applyconfiguration/utils.go b/pkg/client/applyconfiguration/utils.go index 19f5d0d69c..7b3cdc7105 100644 --- a/pkg/client/applyconfiguration/utils.go +++ b/pkg/client/applyconfiguration/utils.go @@ -17,10 +17,8 @@ package applyconfiguration import ( - v1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" v2alpha1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v2alpha1" internal "github.com/kubeflow/training-operator/pkg/client/applyconfiguration/internal" - kubefloworgv1 "github.com/kubeflow/training-operator/pkg/client/applyconfiguration/kubeflow.org/v1" kubefloworgv2alpha1 "github.com/kubeflow/training-operator/pkg/client/applyconfiguration/kubeflow.org/v2alpha1" runtime "k8s.io/apimachinery/pkg/runtime" schema "k8s.io/apimachinery/pkg/runtime/schema" @@ -31,51 +29,7 @@ import ( // apply configuration type exists for the given GroupVersionKind. func ForKind(kind schema.GroupVersionKind) interface{} { switch kind { - // Group=kubeflow.org, Version=v1 - case v1.SchemeGroupVersion.WithKind("ElasticPolicy"): - return &kubefloworgv1.ElasticPolicyApplyConfiguration{} - case v1.SchemeGroupVersion.WithKind("JAXJob"): - return &kubefloworgv1.JAXJobApplyConfiguration{} - case v1.SchemeGroupVersion.WithKind("JAXJobSpec"): - return &kubefloworgv1.JAXJobSpecApplyConfiguration{} - case v1.SchemeGroupVersion.WithKind("JobCondition"): - return &kubefloworgv1.JobConditionApplyConfiguration{} - case v1.SchemeGroupVersion.WithKind("JobStatus"): - return &kubefloworgv1.JobStatusApplyConfiguration{} - case v1.SchemeGroupVersion.WithKind("MPIJob"): - return &kubefloworgv1.MPIJobApplyConfiguration{} - case v1.SchemeGroupVersion.WithKind("MPIJobSpec"): - return &kubefloworgv1.MPIJobSpecApplyConfiguration{} - case v1.SchemeGroupVersion.WithKind("PaddleElasticPolicy"): - return &kubefloworgv1.PaddleElasticPolicyApplyConfiguration{} - case v1.SchemeGroupVersion.WithKind("PaddleJob"): - return &kubefloworgv1.PaddleJobApplyConfiguration{} - case v1.SchemeGroupVersion.WithKind("PaddleJobSpec"): - return &kubefloworgv1.PaddleJobSpecApplyConfiguration{} - case v1.SchemeGroupVersion.WithKind("PyTorchJob"): - return &kubefloworgv1.PyTorchJobApplyConfiguration{} - case v1.SchemeGroupVersion.WithKind("PyTorchJobSpec"): - return &kubefloworgv1.PyTorchJobSpecApplyConfiguration{} - case v1.SchemeGroupVersion.WithKind("RDZVConf"): - return &kubefloworgv1.RDZVConfApplyConfiguration{} - case v1.SchemeGroupVersion.WithKind("ReplicaSpec"): - return &kubefloworgv1.ReplicaSpecApplyConfiguration{} - case v1.SchemeGroupVersion.WithKind("ReplicaStatus"): - return &kubefloworgv1.ReplicaStatusApplyConfiguration{} - case v1.SchemeGroupVersion.WithKind("RunPolicy"): - return &kubefloworgv1.RunPolicyApplyConfiguration{} - case v1.SchemeGroupVersion.WithKind("SchedulingPolicy"): - return &kubefloworgv1.SchedulingPolicyApplyConfiguration{} - case v1.SchemeGroupVersion.WithKind("TFJob"): - return &kubefloworgv1.TFJobApplyConfiguration{} - case v1.SchemeGroupVersion.WithKind("TFJobSpec"): - return &kubefloworgv1.TFJobSpecApplyConfiguration{} - case v1.SchemeGroupVersion.WithKind("XGBoostJob"): - return &kubefloworgv1.XGBoostJobApplyConfiguration{} - case v1.SchemeGroupVersion.WithKind("XGBoostJobSpec"): - return &kubefloworgv1.XGBoostJobSpecApplyConfiguration{} - - // Group=kubeflow.org, Version=v2alpha1 + // Group=kubeflow.org, Version=v2alpha1 case v2alpha1.SchemeGroupVersion.WithKind("ClusterTrainingRuntime"): return &kubefloworgv2alpha1.ClusterTrainingRuntimeApplyConfiguration{} case v2alpha1.SchemeGroupVersion.WithKind("ContainerOverride"): diff --git a/pkg/client/clientset/versioned/clientset.go b/pkg/client/clientset/versioned/clientset.go index 891d9508a8..a854037d06 100644 --- a/pkg/client/clientset/versioned/clientset.go +++ b/pkg/client/clientset/versioned/clientset.go @@ -20,7 +20,6 @@ import ( "fmt" "net/http" - kubeflowv1 "github.com/kubeflow/training-operator/pkg/client/clientset/versioned/typed/kubeflow.org/v1" kubeflowv2alpha1 "github.com/kubeflow/training-operator/pkg/client/clientset/versioned/typed/kubeflow.org/v2alpha1" discovery "k8s.io/client-go/discovery" rest "k8s.io/client-go/rest" @@ -29,22 +28,15 @@ import ( type Interface interface { Discovery() discovery.DiscoveryInterface - KubeflowV1() kubeflowv1.KubeflowV1Interface KubeflowV2alpha1() kubeflowv2alpha1.KubeflowV2alpha1Interface } // Clientset contains the clients for groups. type Clientset struct { *discovery.DiscoveryClient - kubeflowV1 *kubeflowv1.KubeflowV1Client kubeflowV2alpha1 *kubeflowv2alpha1.KubeflowV2alpha1Client } -// KubeflowV1 retrieves the KubeflowV1Client -func (c *Clientset) KubeflowV1() kubeflowv1.KubeflowV1Interface { - return c.kubeflowV1 -} - // KubeflowV2alpha1 retrieves the KubeflowV2alpha1Client func (c *Clientset) KubeflowV2alpha1() kubeflowv2alpha1.KubeflowV2alpha1Interface { return c.kubeflowV2alpha1 @@ -94,10 +86,6 @@ func NewForConfigAndClient(c *rest.Config, httpClient *http.Client) (*Clientset, var cs Clientset var err error - cs.kubeflowV1, err = kubeflowv1.NewForConfigAndClient(&configShallowCopy, httpClient) - if err != nil { - return nil, err - } cs.kubeflowV2alpha1, err = kubeflowv2alpha1.NewForConfigAndClient(&configShallowCopy, httpClient) if err != nil { return nil, err @@ -123,7 +111,6 @@ func NewForConfigOrDie(c *rest.Config) *Clientset { // New creates a new Clientset for the given RESTClient. func New(c rest.Interface) *Clientset { var cs Clientset - cs.kubeflowV1 = kubeflowv1.New(c) cs.kubeflowV2alpha1 = kubeflowv2alpha1.New(c) cs.DiscoveryClient = discovery.NewDiscoveryClient(c) diff --git a/pkg/client/clientset/versioned/fake/clientset_generated.go b/pkg/client/clientset/versioned/fake/clientset_generated.go index 3bb8feb7e8..c6a14fb1cb 100644 --- a/pkg/client/clientset/versioned/fake/clientset_generated.go +++ b/pkg/client/clientset/versioned/fake/clientset_generated.go @@ -19,8 +19,6 @@ package fake import ( applyconfiguration "github.com/kubeflow/training-operator/pkg/client/applyconfiguration" clientset "github.com/kubeflow/training-operator/pkg/client/clientset/versioned" - kubeflowv1 "github.com/kubeflow/training-operator/pkg/client/clientset/versioned/typed/kubeflow.org/v1" - fakekubeflowv1 "github.com/kubeflow/training-operator/pkg/client/clientset/versioned/typed/kubeflow.org/v1/fake" kubeflowv2alpha1 "github.com/kubeflow/training-operator/pkg/client/clientset/versioned/typed/kubeflow.org/v2alpha1" fakekubeflowv2alpha1 "github.com/kubeflow/training-operator/pkg/client/clientset/versioned/typed/kubeflow.org/v2alpha1/fake" "k8s.io/apimachinery/pkg/runtime" @@ -116,11 +114,6 @@ var ( _ testing.FakeClient = &Clientset{} ) -// KubeflowV1 retrieves the KubeflowV1Client -func (c *Clientset) KubeflowV1() kubeflowv1.KubeflowV1Interface { - return &fakekubeflowv1.FakeKubeflowV1{Fake: &c.Fake} -} - // KubeflowV2alpha1 retrieves the KubeflowV2alpha1Client func (c *Clientset) KubeflowV2alpha1() kubeflowv2alpha1.KubeflowV2alpha1Interface { return &fakekubeflowv2alpha1.FakeKubeflowV2alpha1{Fake: &c.Fake} diff --git a/pkg/client/clientset/versioned/fake/register.go b/pkg/client/clientset/versioned/fake/register.go index 4a1bb11117..854bed50c1 100644 --- a/pkg/client/clientset/versioned/fake/register.go +++ b/pkg/client/clientset/versioned/fake/register.go @@ -17,7 +17,6 @@ package fake import ( - kubeflowv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" kubeflowv2alpha1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v2alpha1" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" runtime "k8s.io/apimachinery/pkg/runtime" @@ -30,7 +29,6 @@ var scheme = runtime.NewScheme() var codecs = serializer.NewCodecFactory(scheme) var localSchemeBuilder = runtime.SchemeBuilder{ - kubeflowv1.AddToScheme, kubeflowv2alpha1.AddToScheme, } diff --git a/pkg/client/clientset/versioned/scheme/register.go b/pkg/client/clientset/versioned/scheme/register.go index efc66b1aaa..9833636a9c 100644 --- a/pkg/client/clientset/versioned/scheme/register.go +++ b/pkg/client/clientset/versioned/scheme/register.go @@ -17,7 +17,6 @@ package scheme import ( - kubeflowv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" kubeflowv2alpha1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v2alpha1" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" runtime "k8s.io/apimachinery/pkg/runtime" @@ -30,7 +29,6 @@ var Scheme = runtime.NewScheme() var Codecs = serializer.NewCodecFactory(Scheme) var ParameterCodec = runtime.NewParameterCodec(Scheme) var localSchemeBuilder = runtime.SchemeBuilder{ - kubeflowv1.AddToScheme, kubeflowv2alpha1.AddToScheme, } diff --git a/pkg/client/clientset/versioned/typed/kubeflow.org/v1/doc.go b/pkg/client/clientset/versioned/typed/kubeflow.org/v1/doc.go deleted file mode 100644 index a40f7e875b..0000000000 --- a/pkg/client/clientset/versioned/typed/kubeflow.org/v1/doc.go +++ /dev/null @@ -1,18 +0,0 @@ -// Copyright 2024 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by client-gen. DO NOT EDIT. - -// This package has the automatically generated typed clients. -package v1 diff --git a/pkg/client/clientset/versioned/typed/kubeflow.org/v1/fake/doc.go b/pkg/client/clientset/versioned/typed/kubeflow.org/v1/fake/doc.go deleted file mode 100644 index 424308ee3b..0000000000 --- a/pkg/client/clientset/versioned/typed/kubeflow.org/v1/fake/doc.go +++ /dev/null @@ -1,18 +0,0 @@ -// Copyright 2024 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by client-gen. DO NOT EDIT. - -// Package fake has the automatically generated clients. -package fake diff --git a/pkg/client/clientset/versioned/typed/kubeflow.org/v1/fake/fake_jaxjob.go b/pkg/client/clientset/versioned/typed/kubeflow.org/v1/fake/fake_jaxjob.go deleted file mode 100644 index 988e5b9956..0000000000 --- a/pkg/client/clientset/versioned/typed/kubeflow.org/v1/fake/fake_jaxjob.go +++ /dev/null @@ -1,195 +0,0 @@ -// Copyright 2024 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by client-gen. DO NOT EDIT. - -package fake - -import ( - "context" - json "encoding/json" - "fmt" - - v1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - kubefloworgv1 "github.com/kubeflow/training-operator/pkg/client/applyconfiguration/kubeflow.org/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - labels "k8s.io/apimachinery/pkg/labels" - types "k8s.io/apimachinery/pkg/types" - watch "k8s.io/apimachinery/pkg/watch" - testing "k8s.io/client-go/testing" -) - -// FakeJAXJobs implements JAXJobInterface -type FakeJAXJobs struct { - Fake *FakeKubeflowV1 - ns string -} - -var jaxjobsResource = v1.SchemeGroupVersion.WithResource("jaxjobs") - -var jaxjobsKind = v1.SchemeGroupVersion.WithKind("JAXJob") - -// Get takes name of the jAXJob, and returns the corresponding jAXJob object, and an error if there is any. -func (c *FakeJAXJobs) Get(ctx context.Context, name string, options metav1.GetOptions) (result *v1.JAXJob, err error) { - emptyResult := &v1.JAXJob{} - obj, err := c.Fake. - Invokes(testing.NewGetActionWithOptions(jaxjobsResource, c.ns, name, options), emptyResult) - - if obj == nil { - return emptyResult, err - } - return obj.(*v1.JAXJob), err -} - -// List takes label and field selectors, and returns the list of JAXJobs that match those selectors. -func (c *FakeJAXJobs) List(ctx context.Context, opts metav1.ListOptions) (result *v1.JAXJobList, err error) { - emptyResult := &v1.JAXJobList{} - obj, err := c.Fake. - Invokes(testing.NewListActionWithOptions(jaxjobsResource, jaxjobsKind, c.ns, opts), emptyResult) - - if obj == nil { - return emptyResult, err - } - - label, _, _ := testing.ExtractFromListOptions(opts) - if label == nil { - label = labels.Everything() - } - list := &v1.JAXJobList{ListMeta: obj.(*v1.JAXJobList).ListMeta} - for _, item := range obj.(*v1.JAXJobList).Items { - if label.Matches(labels.Set(item.Labels)) { - list.Items = append(list.Items, item) - } - } - return list, err -} - -// Watch returns a watch.Interface that watches the requested jAXJobs. -func (c *FakeJAXJobs) Watch(ctx context.Context, opts metav1.ListOptions) (watch.Interface, error) { - return c.Fake. - InvokesWatch(testing.NewWatchActionWithOptions(jaxjobsResource, c.ns, opts)) - -} - -// Create takes the representation of a jAXJob and creates it. Returns the server's representation of the jAXJob, and an error, if there is any. -func (c *FakeJAXJobs) Create(ctx context.Context, jAXJob *v1.JAXJob, opts metav1.CreateOptions) (result *v1.JAXJob, err error) { - emptyResult := &v1.JAXJob{} - obj, err := c.Fake. - Invokes(testing.NewCreateActionWithOptions(jaxjobsResource, c.ns, jAXJob, opts), emptyResult) - - if obj == nil { - return emptyResult, err - } - return obj.(*v1.JAXJob), err -} - -// Update takes the representation of a jAXJob and updates it. Returns the server's representation of the jAXJob, and an error, if there is any. -func (c *FakeJAXJobs) Update(ctx context.Context, jAXJob *v1.JAXJob, opts metav1.UpdateOptions) (result *v1.JAXJob, err error) { - emptyResult := &v1.JAXJob{} - obj, err := c.Fake. - Invokes(testing.NewUpdateActionWithOptions(jaxjobsResource, c.ns, jAXJob, opts), emptyResult) - - if obj == nil { - return emptyResult, err - } - return obj.(*v1.JAXJob), err -} - -// UpdateStatus was generated because the type contains a Status member. -// Add a +genclient:noStatus comment above the type to avoid generating UpdateStatus(). -func (c *FakeJAXJobs) UpdateStatus(ctx context.Context, jAXJob *v1.JAXJob, opts metav1.UpdateOptions) (result *v1.JAXJob, err error) { - emptyResult := &v1.JAXJob{} - obj, err := c.Fake. - Invokes(testing.NewUpdateSubresourceActionWithOptions(jaxjobsResource, "status", c.ns, jAXJob, opts), emptyResult) - - if obj == nil { - return emptyResult, err - } - return obj.(*v1.JAXJob), err -} - -// Delete takes name of the jAXJob and deletes it. Returns an error if one occurs. -func (c *FakeJAXJobs) Delete(ctx context.Context, name string, opts metav1.DeleteOptions) error { - _, err := c.Fake. - Invokes(testing.NewDeleteActionWithOptions(jaxjobsResource, c.ns, name, opts), &v1.JAXJob{}) - - return err -} - -// DeleteCollection deletes a collection of objects. -func (c *FakeJAXJobs) DeleteCollection(ctx context.Context, opts metav1.DeleteOptions, listOpts metav1.ListOptions) error { - action := testing.NewDeleteCollectionActionWithOptions(jaxjobsResource, c.ns, opts, listOpts) - - _, err := c.Fake.Invokes(action, &v1.JAXJobList{}) - return err -} - -// Patch applies the patch and returns the patched jAXJob. -func (c *FakeJAXJobs) Patch(ctx context.Context, name string, pt types.PatchType, data []byte, opts metav1.PatchOptions, subresources ...string) (result *v1.JAXJob, err error) { - emptyResult := &v1.JAXJob{} - obj, err := c.Fake. - Invokes(testing.NewPatchSubresourceActionWithOptions(jaxjobsResource, c.ns, name, pt, data, opts, subresources...), emptyResult) - - if obj == nil { - return emptyResult, err - } - return obj.(*v1.JAXJob), err -} - -// Apply takes the given apply declarative configuration, applies it and returns the applied jAXJob. -func (c *FakeJAXJobs) Apply(ctx context.Context, jAXJob *kubefloworgv1.JAXJobApplyConfiguration, opts metav1.ApplyOptions) (result *v1.JAXJob, err error) { - if jAXJob == nil { - return nil, fmt.Errorf("jAXJob provided to Apply must not be nil") - } - data, err := json.Marshal(jAXJob) - if err != nil { - return nil, err - } - name := jAXJob.Name - if name == nil { - return nil, fmt.Errorf("jAXJob.Name must be provided to Apply") - } - emptyResult := &v1.JAXJob{} - obj, err := c.Fake. - Invokes(testing.NewPatchSubresourceActionWithOptions(jaxjobsResource, c.ns, *name, types.ApplyPatchType, data, opts.ToPatchOptions()), emptyResult) - - if obj == nil { - return emptyResult, err - } - return obj.(*v1.JAXJob), err -} - -// ApplyStatus was generated because the type contains a Status member. -// Add a +genclient:noStatus comment above the type to avoid generating ApplyStatus(). -func (c *FakeJAXJobs) ApplyStatus(ctx context.Context, jAXJob *kubefloworgv1.JAXJobApplyConfiguration, opts metav1.ApplyOptions) (result *v1.JAXJob, err error) { - if jAXJob == nil { - return nil, fmt.Errorf("jAXJob provided to Apply must not be nil") - } - data, err := json.Marshal(jAXJob) - if err != nil { - return nil, err - } - name := jAXJob.Name - if name == nil { - return nil, fmt.Errorf("jAXJob.Name must be provided to Apply") - } - emptyResult := &v1.JAXJob{} - obj, err := c.Fake. - Invokes(testing.NewPatchSubresourceActionWithOptions(jaxjobsResource, c.ns, *name, types.ApplyPatchType, data, opts.ToPatchOptions(), "status"), emptyResult) - - if obj == nil { - return emptyResult, err - } - return obj.(*v1.JAXJob), err -} diff --git a/pkg/client/clientset/versioned/typed/kubeflow.org/v1/fake/fake_kubeflow.org_client.go b/pkg/client/clientset/versioned/typed/kubeflow.org/v1/fake/fake_kubeflow.org_client.go deleted file mode 100644 index 9b6a64203f..0000000000 --- a/pkg/client/clientset/versioned/typed/kubeflow.org/v1/fake/fake_kubeflow.org_client.go +++ /dev/null @@ -1,58 +0,0 @@ -// Copyright 2024 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by client-gen. DO NOT EDIT. - -package fake - -import ( - v1 "github.com/kubeflow/training-operator/pkg/client/clientset/versioned/typed/kubeflow.org/v1" - rest "k8s.io/client-go/rest" - testing "k8s.io/client-go/testing" -) - -type FakeKubeflowV1 struct { - *testing.Fake -} - -func (c *FakeKubeflowV1) JAXJobs(namespace string) v1.JAXJobInterface { - return &FakeJAXJobs{c, namespace} -} - -func (c *FakeKubeflowV1) MPIJobs(namespace string) v1.MPIJobInterface { - return &FakeMPIJobs{c, namespace} -} - -func (c *FakeKubeflowV1) PaddleJobs(namespace string) v1.PaddleJobInterface { - return &FakePaddleJobs{c, namespace} -} - -func (c *FakeKubeflowV1) PyTorchJobs(namespace string) v1.PyTorchJobInterface { - return &FakePyTorchJobs{c, namespace} -} - -func (c *FakeKubeflowV1) TFJobs(namespace string) v1.TFJobInterface { - return &FakeTFJobs{c, namespace} -} - -func (c *FakeKubeflowV1) XGBoostJobs(namespace string) v1.XGBoostJobInterface { - return &FakeXGBoostJobs{c, namespace} -} - -// RESTClient returns a RESTClient that is used to communicate -// with API server by this client implementation. -func (c *FakeKubeflowV1) RESTClient() rest.Interface { - var ret *rest.RESTClient - return ret -} diff --git a/pkg/client/clientset/versioned/typed/kubeflow.org/v1/fake/fake_mpijob.go b/pkg/client/clientset/versioned/typed/kubeflow.org/v1/fake/fake_mpijob.go deleted file mode 100644 index 0a1d1e9b9e..0000000000 --- a/pkg/client/clientset/versioned/typed/kubeflow.org/v1/fake/fake_mpijob.go +++ /dev/null @@ -1,195 +0,0 @@ -// Copyright 2024 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by client-gen. DO NOT EDIT. - -package fake - -import ( - "context" - json "encoding/json" - "fmt" - - v1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - kubefloworgv1 "github.com/kubeflow/training-operator/pkg/client/applyconfiguration/kubeflow.org/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - labels "k8s.io/apimachinery/pkg/labels" - types "k8s.io/apimachinery/pkg/types" - watch "k8s.io/apimachinery/pkg/watch" - testing "k8s.io/client-go/testing" -) - -// FakeMPIJobs implements MPIJobInterface -type FakeMPIJobs struct { - Fake *FakeKubeflowV1 - ns string -} - -var mpijobsResource = v1.SchemeGroupVersion.WithResource("mpijobs") - -var mpijobsKind = v1.SchemeGroupVersion.WithKind("MPIJob") - -// Get takes name of the mPIJob, and returns the corresponding mPIJob object, and an error if there is any. -func (c *FakeMPIJobs) Get(ctx context.Context, name string, options metav1.GetOptions) (result *v1.MPIJob, err error) { - emptyResult := &v1.MPIJob{} - obj, err := c.Fake. - Invokes(testing.NewGetActionWithOptions(mpijobsResource, c.ns, name, options), emptyResult) - - if obj == nil { - return emptyResult, err - } - return obj.(*v1.MPIJob), err -} - -// List takes label and field selectors, and returns the list of MPIJobs that match those selectors. -func (c *FakeMPIJobs) List(ctx context.Context, opts metav1.ListOptions) (result *v1.MPIJobList, err error) { - emptyResult := &v1.MPIJobList{} - obj, err := c.Fake. - Invokes(testing.NewListActionWithOptions(mpijobsResource, mpijobsKind, c.ns, opts), emptyResult) - - if obj == nil { - return emptyResult, err - } - - label, _, _ := testing.ExtractFromListOptions(opts) - if label == nil { - label = labels.Everything() - } - list := &v1.MPIJobList{ListMeta: obj.(*v1.MPIJobList).ListMeta} - for _, item := range obj.(*v1.MPIJobList).Items { - if label.Matches(labels.Set(item.Labels)) { - list.Items = append(list.Items, item) - } - } - return list, err -} - -// Watch returns a watch.Interface that watches the requested mPIJobs. -func (c *FakeMPIJobs) Watch(ctx context.Context, opts metav1.ListOptions) (watch.Interface, error) { - return c.Fake. - InvokesWatch(testing.NewWatchActionWithOptions(mpijobsResource, c.ns, opts)) - -} - -// Create takes the representation of a mPIJob and creates it. Returns the server's representation of the mPIJob, and an error, if there is any. -func (c *FakeMPIJobs) Create(ctx context.Context, mPIJob *v1.MPIJob, opts metav1.CreateOptions) (result *v1.MPIJob, err error) { - emptyResult := &v1.MPIJob{} - obj, err := c.Fake. - Invokes(testing.NewCreateActionWithOptions(mpijobsResource, c.ns, mPIJob, opts), emptyResult) - - if obj == nil { - return emptyResult, err - } - return obj.(*v1.MPIJob), err -} - -// Update takes the representation of a mPIJob and updates it. Returns the server's representation of the mPIJob, and an error, if there is any. -func (c *FakeMPIJobs) Update(ctx context.Context, mPIJob *v1.MPIJob, opts metav1.UpdateOptions) (result *v1.MPIJob, err error) { - emptyResult := &v1.MPIJob{} - obj, err := c.Fake. - Invokes(testing.NewUpdateActionWithOptions(mpijobsResource, c.ns, mPIJob, opts), emptyResult) - - if obj == nil { - return emptyResult, err - } - return obj.(*v1.MPIJob), err -} - -// UpdateStatus was generated because the type contains a Status member. -// Add a +genclient:noStatus comment above the type to avoid generating UpdateStatus(). -func (c *FakeMPIJobs) UpdateStatus(ctx context.Context, mPIJob *v1.MPIJob, opts metav1.UpdateOptions) (result *v1.MPIJob, err error) { - emptyResult := &v1.MPIJob{} - obj, err := c.Fake. - Invokes(testing.NewUpdateSubresourceActionWithOptions(mpijobsResource, "status", c.ns, mPIJob, opts), emptyResult) - - if obj == nil { - return emptyResult, err - } - return obj.(*v1.MPIJob), err -} - -// Delete takes name of the mPIJob and deletes it. Returns an error if one occurs. -func (c *FakeMPIJobs) Delete(ctx context.Context, name string, opts metav1.DeleteOptions) error { - _, err := c.Fake. - Invokes(testing.NewDeleteActionWithOptions(mpijobsResource, c.ns, name, opts), &v1.MPIJob{}) - - return err -} - -// DeleteCollection deletes a collection of objects. -func (c *FakeMPIJobs) DeleteCollection(ctx context.Context, opts metav1.DeleteOptions, listOpts metav1.ListOptions) error { - action := testing.NewDeleteCollectionActionWithOptions(mpijobsResource, c.ns, opts, listOpts) - - _, err := c.Fake.Invokes(action, &v1.MPIJobList{}) - return err -} - -// Patch applies the patch and returns the patched mPIJob. -func (c *FakeMPIJobs) Patch(ctx context.Context, name string, pt types.PatchType, data []byte, opts metav1.PatchOptions, subresources ...string) (result *v1.MPIJob, err error) { - emptyResult := &v1.MPIJob{} - obj, err := c.Fake. - Invokes(testing.NewPatchSubresourceActionWithOptions(mpijobsResource, c.ns, name, pt, data, opts, subresources...), emptyResult) - - if obj == nil { - return emptyResult, err - } - return obj.(*v1.MPIJob), err -} - -// Apply takes the given apply declarative configuration, applies it and returns the applied mPIJob. -func (c *FakeMPIJobs) Apply(ctx context.Context, mPIJob *kubefloworgv1.MPIJobApplyConfiguration, opts metav1.ApplyOptions) (result *v1.MPIJob, err error) { - if mPIJob == nil { - return nil, fmt.Errorf("mPIJob provided to Apply must not be nil") - } - data, err := json.Marshal(mPIJob) - if err != nil { - return nil, err - } - name := mPIJob.Name - if name == nil { - return nil, fmt.Errorf("mPIJob.Name must be provided to Apply") - } - emptyResult := &v1.MPIJob{} - obj, err := c.Fake. - Invokes(testing.NewPatchSubresourceActionWithOptions(mpijobsResource, c.ns, *name, types.ApplyPatchType, data, opts.ToPatchOptions()), emptyResult) - - if obj == nil { - return emptyResult, err - } - return obj.(*v1.MPIJob), err -} - -// ApplyStatus was generated because the type contains a Status member. -// Add a +genclient:noStatus comment above the type to avoid generating ApplyStatus(). -func (c *FakeMPIJobs) ApplyStatus(ctx context.Context, mPIJob *kubefloworgv1.MPIJobApplyConfiguration, opts metav1.ApplyOptions) (result *v1.MPIJob, err error) { - if mPIJob == nil { - return nil, fmt.Errorf("mPIJob provided to Apply must not be nil") - } - data, err := json.Marshal(mPIJob) - if err != nil { - return nil, err - } - name := mPIJob.Name - if name == nil { - return nil, fmt.Errorf("mPIJob.Name must be provided to Apply") - } - emptyResult := &v1.MPIJob{} - obj, err := c.Fake. - Invokes(testing.NewPatchSubresourceActionWithOptions(mpijobsResource, c.ns, *name, types.ApplyPatchType, data, opts.ToPatchOptions(), "status"), emptyResult) - - if obj == nil { - return emptyResult, err - } - return obj.(*v1.MPIJob), err -} diff --git a/pkg/client/clientset/versioned/typed/kubeflow.org/v1/fake/fake_paddlejob.go b/pkg/client/clientset/versioned/typed/kubeflow.org/v1/fake/fake_paddlejob.go deleted file mode 100644 index 39388f682a..0000000000 --- a/pkg/client/clientset/versioned/typed/kubeflow.org/v1/fake/fake_paddlejob.go +++ /dev/null @@ -1,195 +0,0 @@ -// Copyright 2024 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by client-gen. DO NOT EDIT. - -package fake - -import ( - "context" - json "encoding/json" - "fmt" - - v1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - kubefloworgv1 "github.com/kubeflow/training-operator/pkg/client/applyconfiguration/kubeflow.org/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - labels "k8s.io/apimachinery/pkg/labels" - types "k8s.io/apimachinery/pkg/types" - watch "k8s.io/apimachinery/pkg/watch" - testing "k8s.io/client-go/testing" -) - -// FakePaddleJobs implements PaddleJobInterface -type FakePaddleJobs struct { - Fake *FakeKubeflowV1 - ns string -} - -var paddlejobsResource = v1.SchemeGroupVersion.WithResource("paddlejobs") - -var paddlejobsKind = v1.SchemeGroupVersion.WithKind("PaddleJob") - -// Get takes name of the paddleJob, and returns the corresponding paddleJob object, and an error if there is any. -func (c *FakePaddleJobs) Get(ctx context.Context, name string, options metav1.GetOptions) (result *v1.PaddleJob, err error) { - emptyResult := &v1.PaddleJob{} - obj, err := c.Fake. - Invokes(testing.NewGetActionWithOptions(paddlejobsResource, c.ns, name, options), emptyResult) - - if obj == nil { - return emptyResult, err - } - return obj.(*v1.PaddleJob), err -} - -// List takes label and field selectors, and returns the list of PaddleJobs that match those selectors. -func (c *FakePaddleJobs) List(ctx context.Context, opts metav1.ListOptions) (result *v1.PaddleJobList, err error) { - emptyResult := &v1.PaddleJobList{} - obj, err := c.Fake. - Invokes(testing.NewListActionWithOptions(paddlejobsResource, paddlejobsKind, c.ns, opts), emptyResult) - - if obj == nil { - return emptyResult, err - } - - label, _, _ := testing.ExtractFromListOptions(opts) - if label == nil { - label = labels.Everything() - } - list := &v1.PaddleJobList{ListMeta: obj.(*v1.PaddleJobList).ListMeta} - for _, item := range obj.(*v1.PaddleJobList).Items { - if label.Matches(labels.Set(item.Labels)) { - list.Items = append(list.Items, item) - } - } - return list, err -} - -// Watch returns a watch.Interface that watches the requested paddleJobs. -func (c *FakePaddleJobs) Watch(ctx context.Context, opts metav1.ListOptions) (watch.Interface, error) { - return c.Fake. - InvokesWatch(testing.NewWatchActionWithOptions(paddlejobsResource, c.ns, opts)) - -} - -// Create takes the representation of a paddleJob and creates it. Returns the server's representation of the paddleJob, and an error, if there is any. -func (c *FakePaddleJobs) Create(ctx context.Context, paddleJob *v1.PaddleJob, opts metav1.CreateOptions) (result *v1.PaddleJob, err error) { - emptyResult := &v1.PaddleJob{} - obj, err := c.Fake. - Invokes(testing.NewCreateActionWithOptions(paddlejobsResource, c.ns, paddleJob, opts), emptyResult) - - if obj == nil { - return emptyResult, err - } - return obj.(*v1.PaddleJob), err -} - -// Update takes the representation of a paddleJob and updates it. Returns the server's representation of the paddleJob, and an error, if there is any. -func (c *FakePaddleJobs) Update(ctx context.Context, paddleJob *v1.PaddleJob, opts metav1.UpdateOptions) (result *v1.PaddleJob, err error) { - emptyResult := &v1.PaddleJob{} - obj, err := c.Fake. - Invokes(testing.NewUpdateActionWithOptions(paddlejobsResource, c.ns, paddleJob, opts), emptyResult) - - if obj == nil { - return emptyResult, err - } - return obj.(*v1.PaddleJob), err -} - -// UpdateStatus was generated because the type contains a Status member. -// Add a +genclient:noStatus comment above the type to avoid generating UpdateStatus(). -func (c *FakePaddleJobs) UpdateStatus(ctx context.Context, paddleJob *v1.PaddleJob, opts metav1.UpdateOptions) (result *v1.PaddleJob, err error) { - emptyResult := &v1.PaddleJob{} - obj, err := c.Fake. - Invokes(testing.NewUpdateSubresourceActionWithOptions(paddlejobsResource, "status", c.ns, paddleJob, opts), emptyResult) - - if obj == nil { - return emptyResult, err - } - return obj.(*v1.PaddleJob), err -} - -// Delete takes name of the paddleJob and deletes it. Returns an error if one occurs. -func (c *FakePaddleJobs) Delete(ctx context.Context, name string, opts metav1.DeleteOptions) error { - _, err := c.Fake. - Invokes(testing.NewDeleteActionWithOptions(paddlejobsResource, c.ns, name, opts), &v1.PaddleJob{}) - - return err -} - -// DeleteCollection deletes a collection of objects. -func (c *FakePaddleJobs) DeleteCollection(ctx context.Context, opts metav1.DeleteOptions, listOpts metav1.ListOptions) error { - action := testing.NewDeleteCollectionActionWithOptions(paddlejobsResource, c.ns, opts, listOpts) - - _, err := c.Fake.Invokes(action, &v1.PaddleJobList{}) - return err -} - -// Patch applies the patch and returns the patched paddleJob. -func (c *FakePaddleJobs) Patch(ctx context.Context, name string, pt types.PatchType, data []byte, opts metav1.PatchOptions, subresources ...string) (result *v1.PaddleJob, err error) { - emptyResult := &v1.PaddleJob{} - obj, err := c.Fake. - Invokes(testing.NewPatchSubresourceActionWithOptions(paddlejobsResource, c.ns, name, pt, data, opts, subresources...), emptyResult) - - if obj == nil { - return emptyResult, err - } - return obj.(*v1.PaddleJob), err -} - -// Apply takes the given apply declarative configuration, applies it and returns the applied paddleJob. -func (c *FakePaddleJobs) Apply(ctx context.Context, paddleJob *kubefloworgv1.PaddleJobApplyConfiguration, opts metav1.ApplyOptions) (result *v1.PaddleJob, err error) { - if paddleJob == nil { - return nil, fmt.Errorf("paddleJob provided to Apply must not be nil") - } - data, err := json.Marshal(paddleJob) - if err != nil { - return nil, err - } - name := paddleJob.Name - if name == nil { - return nil, fmt.Errorf("paddleJob.Name must be provided to Apply") - } - emptyResult := &v1.PaddleJob{} - obj, err := c.Fake. - Invokes(testing.NewPatchSubresourceActionWithOptions(paddlejobsResource, c.ns, *name, types.ApplyPatchType, data, opts.ToPatchOptions()), emptyResult) - - if obj == nil { - return emptyResult, err - } - return obj.(*v1.PaddleJob), err -} - -// ApplyStatus was generated because the type contains a Status member. -// Add a +genclient:noStatus comment above the type to avoid generating ApplyStatus(). -func (c *FakePaddleJobs) ApplyStatus(ctx context.Context, paddleJob *kubefloworgv1.PaddleJobApplyConfiguration, opts metav1.ApplyOptions) (result *v1.PaddleJob, err error) { - if paddleJob == nil { - return nil, fmt.Errorf("paddleJob provided to Apply must not be nil") - } - data, err := json.Marshal(paddleJob) - if err != nil { - return nil, err - } - name := paddleJob.Name - if name == nil { - return nil, fmt.Errorf("paddleJob.Name must be provided to Apply") - } - emptyResult := &v1.PaddleJob{} - obj, err := c.Fake. - Invokes(testing.NewPatchSubresourceActionWithOptions(paddlejobsResource, c.ns, *name, types.ApplyPatchType, data, opts.ToPatchOptions(), "status"), emptyResult) - - if obj == nil { - return emptyResult, err - } - return obj.(*v1.PaddleJob), err -} diff --git a/pkg/client/clientset/versioned/typed/kubeflow.org/v1/fake/fake_pytorchjob.go b/pkg/client/clientset/versioned/typed/kubeflow.org/v1/fake/fake_pytorchjob.go deleted file mode 100644 index 526d8d68ec..0000000000 --- a/pkg/client/clientset/versioned/typed/kubeflow.org/v1/fake/fake_pytorchjob.go +++ /dev/null @@ -1,195 +0,0 @@ -// Copyright 2024 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by client-gen. DO NOT EDIT. - -package fake - -import ( - "context" - json "encoding/json" - "fmt" - - v1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - kubefloworgv1 "github.com/kubeflow/training-operator/pkg/client/applyconfiguration/kubeflow.org/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - labels "k8s.io/apimachinery/pkg/labels" - types "k8s.io/apimachinery/pkg/types" - watch "k8s.io/apimachinery/pkg/watch" - testing "k8s.io/client-go/testing" -) - -// FakePyTorchJobs implements PyTorchJobInterface -type FakePyTorchJobs struct { - Fake *FakeKubeflowV1 - ns string -} - -var pytorchjobsResource = v1.SchemeGroupVersion.WithResource("pytorchjobs") - -var pytorchjobsKind = v1.SchemeGroupVersion.WithKind("PyTorchJob") - -// Get takes name of the pyTorchJob, and returns the corresponding pyTorchJob object, and an error if there is any. -func (c *FakePyTorchJobs) Get(ctx context.Context, name string, options metav1.GetOptions) (result *v1.PyTorchJob, err error) { - emptyResult := &v1.PyTorchJob{} - obj, err := c.Fake. - Invokes(testing.NewGetActionWithOptions(pytorchjobsResource, c.ns, name, options), emptyResult) - - if obj == nil { - return emptyResult, err - } - return obj.(*v1.PyTorchJob), err -} - -// List takes label and field selectors, and returns the list of PyTorchJobs that match those selectors. -func (c *FakePyTorchJobs) List(ctx context.Context, opts metav1.ListOptions) (result *v1.PyTorchJobList, err error) { - emptyResult := &v1.PyTorchJobList{} - obj, err := c.Fake. - Invokes(testing.NewListActionWithOptions(pytorchjobsResource, pytorchjobsKind, c.ns, opts), emptyResult) - - if obj == nil { - return emptyResult, err - } - - label, _, _ := testing.ExtractFromListOptions(opts) - if label == nil { - label = labels.Everything() - } - list := &v1.PyTorchJobList{ListMeta: obj.(*v1.PyTorchJobList).ListMeta} - for _, item := range obj.(*v1.PyTorchJobList).Items { - if label.Matches(labels.Set(item.Labels)) { - list.Items = append(list.Items, item) - } - } - return list, err -} - -// Watch returns a watch.Interface that watches the requested pyTorchJobs. -func (c *FakePyTorchJobs) Watch(ctx context.Context, opts metav1.ListOptions) (watch.Interface, error) { - return c.Fake. - InvokesWatch(testing.NewWatchActionWithOptions(pytorchjobsResource, c.ns, opts)) - -} - -// Create takes the representation of a pyTorchJob and creates it. Returns the server's representation of the pyTorchJob, and an error, if there is any. -func (c *FakePyTorchJobs) Create(ctx context.Context, pyTorchJob *v1.PyTorchJob, opts metav1.CreateOptions) (result *v1.PyTorchJob, err error) { - emptyResult := &v1.PyTorchJob{} - obj, err := c.Fake. - Invokes(testing.NewCreateActionWithOptions(pytorchjobsResource, c.ns, pyTorchJob, opts), emptyResult) - - if obj == nil { - return emptyResult, err - } - return obj.(*v1.PyTorchJob), err -} - -// Update takes the representation of a pyTorchJob and updates it. Returns the server's representation of the pyTorchJob, and an error, if there is any. -func (c *FakePyTorchJobs) Update(ctx context.Context, pyTorchJob *v1.PyTorchJob, opts metav1.UpdateOptions) (result *v1.PyTorchJob, err error) { - emptyResult := &v1.PyTorchJob{} - obj, err := c.Fake. - Invokes(testing.NewUpdateActionWithOptions(pytorchjobsResource, c.ns, pyTorchJob, opts), emptyResult) - - if obj == nil { - return emptyResult, err - } - return obj.(*v1.PyTorchJob), err -} - -// UpdateStatus was generated because the type contains a Status member. -// Add a +genclient:noStatus comment above the type to avoid generating UpdateStatus(). -func (c *FakePyTorchJobs) UpdateStatus(ctx context.Context, pyTorchJob *v1.PyTorchJob, opts metav1.UpdateOptions) (result *v1.PyTorchJob, err error) { - emptyResult := &v1.PyTorchJob{} - obj, err := c.Fake. - Invokes(testing.NewUpdateSubresourceActionWithOptions(pytorchjobsResource, "status", c.ns, pyTorchJob, opts), emptyResult) - - if obj == nil { - return emptyResult, err - } - return obj.(*v1.PyTorchJob), err -} - -// Delete takes name of the pyTorchJob and deletes it. Returns an error if one occurs. -func (c *FakePyTorchJobs) Delete(ctx context.Context, name string, opts metav1.DeleteOptions) error { - _, err := c.Fake. - Invokes(testing.NewDeleteActionWithOptions(pytorchjobsResource, c.ns, name, opts), &v1.PyTorchJob{}) - - return err -} - -// DeleteCollection deletes a collection of objects. -func (c *FakePyTorchJobs) DeleteCollection(ctx context.Context, opts metav1.DeleteOptions, listOpts metav1.ListOptions) error { - action := testing.NewDeleteCollectionActionWithOptions(pytorchjobsResource, c.ns, opts, listOpts) - - _, err := c.Fake.Invokes(action, &v1.PyTorchJobList{}) - return err -} - -// Patch applies the patch and returns the patched pyTorchJob. -func (c *FakePyTorchJobs) Patch(ctx context.Context, name string, pt types.PatchType, data []byte, opts metav1.PatchOptions, subresources ...string) (result *v1.PyTorchJob, err error) { - emptyResult := &v1.PyTorchJob{} - obj, err := c.Fake. - Invokes(testing.NewPatchSubresourceActionWithOptions(pytorchjobsResource, c.ns, name, pt, data, opts, subresources...), emptyResult) - - if obj == nil { - return emptyResult, err - } - return obj.(*v1.PyTorchJob), err -} - -// Apply takes the given apply declarative configuration, applies it and returns the applied pyTorchJob. -func (c *FakePyTorchJobs) Apply(ctx context.Context, pyTorchJob *kubefloworgv1.PyTorchJobApplyConfiguration, opts metav1.ApplyOptions) (result *v1.PyTorchJob, err error) { - if pyTorchJob == nil { - return nil, fmt.Errorf("pyTorchJob provided to Apply must not be nil") - } - data, err := json.Marshal(pyTorchJob) - if err != nil { - return nil, err - } - name := pyTorchJob.Name - if name == nil { - return nil, fmt.Errorf("pyTorchJob.Name must be provided to Apply") - } - emptyResult := &v1.PyTorchJob{} - obj, err := c.Fake. - Invokes(testing.NewPatchSubresourceActionWithOptions(pytorchjobsResource, c.ns, *name, types.ApplyPatchType, data, opts.ToPatchOptions()), emptyResult) - - if obj == nil { - return emptyResult, err - } - return obj.(*v1.PyTorchJob), err -} - -// ApplyStatus was generated because the type contains a Status member. -// Add a +genclient:noStatus comment above the type to avoid generating ApplyStatus(). -func (c *FakePyTorchJobs) ApplyStatus(ctx context.Context, pyTorchJob *kubefloworgv1.PyTorchJobApplyConfiguration, opts metav1.ApplyOptions) (result *v1.PyTorchJob, err error) { - if pyTorchJob == nil { - return nil, fmt.Errorf("pyTorchJob provided to Apply must not be nil") - } - data, err := json.Marshal(pyTorchJob) - if err != nil { - return nil, err - } - name := pyTorchJob.Name - if name == nil { - return nil, fmt.Errorf("pyTorchJob.Name must be provided to Apply") - } - emptyResult := &v1.PyTorchJob{} - obj, err := c.Fake. - Invokes(testing.NewPatchSubresourceActionWithOptions(pytorchjobsResource, c.ns, *name, types.ApplyPatchType, data, opts.ToPatchOptions(), "status"), emptyResult) - - if obj == nil { - return emptyResult, err - } - return obj.(*v1.PyTorchJob), err -} diff --git a/pkg/client/clientset/versioned/typed/kubeflow.org/v1/fake/fake_tfjob.go b/pkg/client/clientset/versioned/typed/kubeflow.org/v1/fake/fake_tfjob.go deleted file mode 100644 index 3673e9265b..0000000000 --- a/pkg/client/clientset/versioned/typed/kubeflow.org/v1/fake/fake_tfjob.go +++ /dev/null @@ -1,195 +0,0 @@ -// Copyright 2024 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by client-gen. DO NOT EDIT. - -package fake - -import ( - "context" - json "encoding/json" - "fmt" - - v1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - kubefloworgv1 "github.com/kubeflow/training-operator/pkg/client/applyconfiguration/kubeflow.org/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - labels "k8s.io/apimachinery/pkg/labels" - types "k8s.io/apimachinery/pkg/types" - watch "k8s.io/apimachinery/pkg/watch" - testing "k8s.io/client-go/testing" -) - -// FakeTFJobs implements TFJobInterface -type FakeTFJobs struct { - Fake *FakeKubeflowV1 - ns string -} - -var tfjobsResource = v1.SchemeGroupVersion.WithResource("tfjobs") - -var tfjobsKind = v1.SchemeGroupVersion.WithKind("TFJob") - -// Get takes name of the tFJob, and returns the corresponding tFJob object, and an error if there is any. -func (c *FakeTFJobs) Get(ctx context.Context, name string, options metav1.GetOptions) (result *v1.TFJob, err error) { - emptyResult := &v1.TFJob{} - obj, err := c.Fake. - Invokes(testing.NewGetActionWithOptions(tfjobsResource, c.ns, name, options), emptyResult) - - if obj == nil { - return emptyResult, err - } - return obj.(*v1.TFJob), err -} - -// List takes label and field selectors, and returns the list of TFJobs that match those selectors. -func (c *FakeTFJobs) List(ctx context.Context, opts metav1.ListOptions) (result *v1.TFJobList, err error) { - emptyResult := &v1.TFJobList{} - obj, err := c.Fake. - Invokes(testing.NewListActionWithOptions(tfjobsResource, tfjobsKind, c.ns, opts), emptyResult) - - if obj == nil { - return emptyResult, err - } - - label, _, _ := testing.ExtractFromListOptions(opts) - if label == nil { - label = labels.Everything() - } - list := &v1.TFJobList{ListMeta: obj.(*v1.TFJobList).ListMeta} - for _, item := range obj.(*v1.TFJobList).Items { - if label.Matches(labels.Set(item.Labels)) { - list.Items = append(list.Items, item) - } - } - return list, err -} - -// Watch returns a watch.Interface that watches the requested tFJobs. -func (c *FakeTFJobs) Watch(ctx context.Context, opts metav1.ListOptions) (watch.Interface, error) { - return c.Fake. - InvokesWatch(testing.NewWatchActionWithOptions(tfjobsResource, c.ns, opts)) - -} - -// Create takes the representation of a tFJob and creates it. Returns the server's representation of the tFJob, and an error, if there is any. -func (c *FakeTFJobs) Create(ctx context.Context, tFJob *v1.TFJob, opts metav1.CreateOptions) (result *v1.TFJob, err error) { - emptyResult := &v1.TFJob{} - obj, err := c.Fake. - Invokes(testing.NewCreateActionWithOptions(tfjobsResource, c.ns, tFJob, opts), emptyResult) - - if obj == nil { - return emptyResult, err - } - return obj.(*v1.TFJob), err -} - -// Update takes the representation of a tFJob and updates it. Returns the server's representation of the tFJob, and an error, if there is any. -func (c *FakeTFJobs) Update(ctx context.Context, tFJob *v1.TFJob, opts metav1.UpdateOptions) (result *v1.TFJob, err error) { - emptyResult := &v1.TFJob{} - obj, err := c.Fake. - Invokes(testing.NewUpdateActionWithOptions(tfjobsResource, c.ns, tFJob, opts), emptyResult) - - if obj == nil { - return emptyResult, err - } - return obj.(*v1.TFJob), err -} - -// UpdateStatus was generated because the type contains a Status member. -// Add a +genclient:noStatus comment above the type to avoid generating UpdateStatus(). -func (c *FakeTFJobs) UpdateStatus(ctx context.Context, tFJob *v1.TFJob, opts metav1.UpdateOptions) (result *v1.TFJob, err error) { - emptyResult := &v1.TFJob{} - obj, err := c.Fake. - Invokes(testing.NewUpdateSubresourceActionWithOptions(tfjobsResource, "status", c.ns, tFJob, opts), emptyResult) - - if obj == nil { - return emptyResult, err - } - return obj.(*v1.TFJob), err -} - -// Delete takes name of the tFJob and deletes it. Returns an error if one occurs. -func (c *FakeTFJobs) Delete(ctx context.Context, name string, opts metav1.DeleteOptions) error { - _, err := c.Fake. - Invokes(testing.NewDeleteActionWithOptions(tfjobsResource, c.ns, name, opts), &v1.TFJob{}) - - return err -} - -// DeleteCollection deletes a collection of objects. -func (c *FakeTFJobs) DeleteCollection(ctx context.Context, opts metav1.DeleteOptions, listOpts metav1.ListOptions) error { - action := testing.NewDeleteCollectionActionWithOptions(tfjobsResource, c.ns, opts, listOpts) - - _, err := c.Fake.Invokes(action, &v1.TFJobList{}) - return err -} - -// Patch applies the patch and returns the patched tFJob. -func (c *FakeTFJobs) Patch(ctx context.Context, name string, pt types.PatchType, data []byte, opts metav1.PatchOptions, subresources ...string) (result *v1.TFJob, err error) { - emptyResult := &v1.TFJob{} - obj, err := c.Fake. - Invokes(testing.NewPatchSubresourceActionWithOptions(tfjobsResource, c.ns, name, pt, data, opts, subresources...), emptyResult) - - if obj == nil { - return emptyResult, err - } - return obj.(*v1.TFJob), err -} - -// Apply takes the given apply declarative configuration, applies it and returns the applied tFJob. -func (c *FakeTFJobs) Apply(ctx context.Context, tFJob *kubefloworgv1.TFJobApplyConfiguration, opts metav1.ApplyOptions) (result *v1.TFJob, err error) { - if tFJob == nil { - return nil, fmt.Errorf("tFJob provided to Apply must not be nil") - } - data, err := json.Marshal(tFJob) - if err != nil { - return nil, err - } - name := tFJob.Name - if name == nil { - return nil, fmt.Errorf("tFJob.Name must be provided to Apply") - } - emptyResult := &v1.TFJob{} - obj, err := c.Fake. - Invokes(testing.NewPatchSubresourceActionWithOptions(tfjobsResource, c.ns, *name, types.ApplyPatchType, data, opts.ToPatchOptions()), emptyResult) - - if obj == nil { - return emptyResult, err - } - return obj.(*v1.TFJob), err -} - -// ApplyStatus was generated because the type contains a Status member. -// Add a +genclient:noStatus comment above the type to avoid generating ApplyStatus(). -func (c *FakeTFJobs) ApplyStatus(ctx context.Context, tFJob *kubefloworgv1.TFJobApplyConfiguration, opts metav1.ApplyOptions) (result *v1.TFJob, err error) { - if tFJob == nil { - return nil, fmt.Errorf("tFJob provided to Apply must not be nil") - } - data, err := json.Marshal(tFJob) - if err != nil { - return nil, err - } - name := tFJob.Name - if name == nil { - return nil, fmt.Errorf("tFJob.Name must be provided to Apply") - } - emptyResult := &v1.TFJob{} - obj, err := c.Fake. - Invokes(testing.NewPatchSubresourceActionWithOptions(tfjobsResource, c.ns, *name, types.ApplyPatchType, data, opts.ToPatchOptions(), "status"), emptyResult) - - if obj == nil { - return emptyResult, err - } - return obj.(*v1.TFJob), err -} diff --git a/pkg/client/clientset/versioned/typed/kubeflow.org/v1/fake/fake_xgboostjob.go b/pkg/client/clientset/versioned/typed/kubeflow.org/v1/fake/fake_xgboostjob.go deleted file mode 100644 index ada50b3bc4..0000000000 --- a/pkg/client/clientset/versioned/typed/kubeflow.org/v1/fake/fake_xgboostjob.go +++ /dev/null @@ -1,195 +0,0 @@ -// Copyright 2024 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by client-gen. DO NOT EDIT. - -package fake - -import ( - "context" - json "encoding/json" - "fmt" - - v1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - kubefloworgv1 "github.com/kubeflow/training-operator/pkg/client/applyconfiguration/kubeflow.org/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - labels "k8s.io/apimachinery/pkg/labels" - types "k8s.io/apimachinery/pkg/types" - watch "k8s.io/apimachinery/pkg/watch" - testing "k8s.io/client-go/testing" -) - -// FakeXGBoostJobs implements XGBoostJobInterface -type FakeXGBoostJobs struct { - Fake *FakeKubeflowV1 - ns string -} - -var xgboostjobsResource = v1.SchemeGroupVersion.WithResource("xgboostjobs") - -var xgboostjobsKind = v1.SchemeGroupVersion.WithKind("XGBoostJob") - -// Get takes name of the xGBoostJob, and returns the corresponding xGBoostJob object, and an error if there is any. -func (c *FakeXGBoostJobs) Get(ctx context.Context, name string, options metav1.GetOptions) (result *v1.XGBoostJob, err error) { - emptyResult := &v1.XGBoostJob{} - obj, err := c.Fake. - Invokes(testing.NewGetActionWithOptions(xgboostjobsResource, c.ns, name, options), emptyResult) - - if obj == nil { - return emptyResult, err - } - return obj.(*v1.XGBoostJob), err -} - -// List takes label and field selectors, and returns the list of XGBoostJobs that match those selectors. -func (c *FakeXGBoostJobs) List(ctx context.Context, opts metav1.ListOptions) (result *v1.XGBoostJobList, err error) { - emptyResult := &v1.XGBoostJobList{} - obj, err := c.Fake. - Invokes(testing.NewListActionWithOptions(xgboostjobsResource, xgboostjobsKind, c.ns, opts), emptyResult) - - if obj == nil { - return emptyResult, err - } - - label, _, _ := testing.ExtractFromListOptions(opts) - if label == nil { - label = labels.Everything() - } - list := &v1.XGBoostJobList{ListMeta: obj.(*v1.XGBoostJobList).ListMeta} - for _, item := range obj.(*v1.XGBoostJobList).Items { - if label.Matches(labels.Set(item.Labels)) { - list.Items = append(list.Items, item) - } - } - return list, err -} - -// Watch returns a watch.Interface that watches the requested xGBoostJobs. -func (c *FakeXGBoostJobs) Watch(ctx context.Context, opts metav1.ListOptions) (watch.Interface, error) { - return c.Fake. - InvokesWatch(testing.NewWatchActionWithOptions(xgboostjobsResource, c.ns, opts)) - -} - -// Create takes the representation of a xGBoostJob and creates it. Returns the server's representation of the xGBoostJob, and an error, if there is any. -func (c *FakeXGBoostJobs) Create(ctx context.Context, xGBoostJob *v1.XGBoostJob, opts metav1.CreateOptions) (result *v1.XGBoostJob, err error) { - emptyResult := &v1.XGBoostJob{} - obj, err := c.Fake. - Invokes(testing.NewCreateActionWithOptions(xgboostjobsResource, c.ns, xGBoostJob, opts), emptyResult) - - if obj == nil { - return emptyResult, err - } - return obj.(*v1.XGBoostJob), err -} - -// Update takes the representation of a xGBoostJob and updates it. Returns the server's representation of the xGBoostJob, and an error, if there is any. -func (c *FakeXGBoostJobs) Update(ctx context.Context, xGBoostJob *v1.XGBoostJob, opts metav1.UpdateOptions) (result *v1.XGBoostJob, err error) { - emptyResult := &v1.XGBoostJob{} - obj, err := c.Fake. - Invokes(testing.NewUpdateActionWithOptions(xgboostjobsResource, c.ns, xGBoostJob, opts), emptyResult) - - if obj == nil { - return emptyResult, err - } - return obj.(*v1.XGBoostJob), err -} - -// UpdateStatus was generated because the type contains a Status member. -// Add a +genclient:noStatus comment above the type to avoid generating UpdateStatus(). -func (c *FakeXGBoostJobs) UpdateStatus(ctx context.Context, xGBoostJob *v1.XGBoostJob, opts metav1.UpdateOptions) (result *v1.XGBoostJob, err error) { - emptyResult := &v1.XGBoostJob{} - obj, err := c.Fake. - Invokes(testing.NewUpdateSubresourceActionWithOptions(xgboostjobsResource, "status", c.ns, xGBoostJob, opts), emptyResult) - - if obj == nil { - return emptyResult, err - } - return obj.(*v1.XGBoostJob), err -} - -// Delete takes name of the xGBoostJob and deletes it. Returns an error if one occurs. -func (c *FakeXGBoostJobs) Delete(ctx context.Context, name string, opts metav1.DeleteOptions) error { - _, err := c.Fake. - Invokes(testing.NewDeleteActionWithOptions(xgboostjobsResource, c.ns, name, opts), &v1.XGBoostJob{}) - - return err -} - -// DeleteCollection deletes a collection of objects. -func (c *FakeXGBoostJobs) DeleteCollection(ctx context.Context, opts metav1.DeleteOptions, listOpts metav1.ListOptions) error { - action := testing.NewDeleteCollectionActionWithOptions(xgboostjobsResource, c.ns, opts, listOpts) - - _, err := c.Fake.Invokes(action, &v1.XGBoostJobList{}) - return err -} - -// Patch applies the patch and returns the patched xGBoostJob. -func (c *FakeXGBoostJobs) Patch(ctx context.Context, name string, pt types.PatchType, data []byte, opts metav1.PatchOptions, subresources ...string) (result *v1.XGBoostJob, err error) { - emptyResult := &v1.XGBoostJob{} - obj, err := c.Fake. - Invokes(testing.NewPatchSubresourceActionWithOptions(xgboostjobsResource, c.ns, name, pt, data, opts, subresources...), emptyResult) - - if obj == nil { - return emptyResult, err - } - return obj.(*v1.XGBoostJob), err -} - -// Apply takes the given apply declarative configuration, applies it and returns the applied xGBoostJob. -func (c *FakeXGBoostJobs) Apply(ctx context.Context, xGBoostJob *kubefloworgv1.XGBoostJobApplyConfiguration, opts metav1.ApplyOptions) (result *v1.XGBoostJob, err error) { - if xGBoostJob == nil { - return nil, fmt.Errorf("xGBoostJob provided to Apply must not be nil") - } - data, err := json.Marshal(xGBoostJob) - if err != nil { - return nil, err - } - name := xGBoostJob.Name - if name == nil { - return nil, fmt.Errorf("xGBoostJob.Name must be provided to Apply") - } - emptyResult := &v1.XGBoostJob{} - obj, err := c.Fake. - Invokes(testing.NewPatchSubresourceActionWithOptions(xgboostjobsResource, c.ns, *name, types.ApplyPatchType, data, opts.ToPatchOptions()), emptyResult) - - if obj == nil { - return emptyResult, err - } - return obj.(*v1.XGBoostJob), err -} - -// ApplyStatus was generated because the type contains a Status member. -// Add a +genclient:noStatus comment above the type to avoid generating ApplyStatus(). -func (c *FakeXGBoostJobs) ApplyStatus(ctx context.Context, xGBoostJob *kubefloworgv1.XGBoostJobApplyConfiguration, opts metav1.ApplyOptions) (result *v1.XGBoostJob, err error) { - if xGBoostJob == nil { - return nil, fmt.Errorf("xGBoostJob provided to Apply must not be nil") - } - data, err := json.Marshal(xGBoostJob) - if err != nil { - return nil, err - } - name := xGBoostJob.Name - if name == nil { - return nil, fmt.Errorf("xGBoostJob.Name must be provided to Apply") - } - emptyResult := &v1.XGBoostJob{} - obj, err := c.Fake. - Invokes(testing.NewPatchSubresourceActionWithOptions(xgboostjobsResource, c.ns, *name, types.ApplyPatchType, data, opts.ToPatchOptions(), "status"), emptyResult) - - if obj == nil { - return emptyResult, err - } - return obj.(*v1.XGBoostJob), err -} diff --git a/pkg/client/clientset/versioned/typed/kubeflow.org/v1/generated_expansion.go b/pkg/client/clientset/versioned/typed/kubeflow.org/v1/generated_expansion.go deleted file mode 100644 index 5cd0ff8041..0000000000 --- a/pkg/client/clientset/versioned/typed/kubeflow.org/v1/generated_expansion.go +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright 2024 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by client-gen. DO NOT EDIT. - -package v1 - -type JAXJobExpansion interface{} - -type MPIJobExpansion interface{} - -type PaddleJobExpansion interface{} - -type PyTorchJobExpansion interface{} - -type TFJobExpansion interface{} - -type XGBoostJobExpansion interface{} diff --git a/pkg/client/clientset/versioned/typed/kubeflow.org/v1/jaxjob.go b/pkg/client/clientset/versioned/typed/kubeflow.org/v1/jaxjob.go deleted file mode 100644 index b70a281b06..0000000000 --- a/pkg/client/clientset/versioned/typed/kubeflow.org/v1/jaxjob.go +++ /dev/null @@ -1,71 +0,0 @@ -// Copyright 2024 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by client-gen. DO NOT EDIT. - -package v1 - -import ( - "context" - - v1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - kubefloworgv1 "github.com/kubeflow/training-operator/pkg/client/applyconfiguration/kubeflow.org/v1" - scheme "github.com/kubeflow/training-operator/pkg/client/clientset/versioned/scheme" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - types "k8s.io/apimachinery/pkg/types" - watch "k8s.io/apimachinery/pkg/watch" - gentype "k8s.io/client-go/gentype" -) - -// JAXJobsGetter has a method to return a JAXJobInterface. -// A group's client should implement this interface. -type JAXJobsGetter interface { - JAXJobs(namespace string) JAXJobInterface -} - -// JAXJobInterface has methods to work with JAXJob resources. -type JAXJobInterface interface { - Create(ctx context.Context, jAXJob *v1.JAXJob, opts metav1.CreateOptions) (*v1.JAXJob, error) - Update(ctx context.Context, jAXJob *v1.JAXJob, opts metav1.UpdateOptions) (*v1.JAXJob, error) - // Add a +genclient:noStatus comment above the type to avoid generating UpdateStatus(). - UpdateStatus(ctx context.Context, jAXJob *v1.JAXJob, opts metav1.UpdateOptions) (*v1.JAXJob, error) - Delete(ctx context.Context, name string, opts metav1.DeleteOptions) error - DeleteCollection(ctx context.Context, opts metav1.DeleteOptions, listOpts metav1.ListOptions) error - Get(ctx context.Context, name string, opts metav1.GetOptions) (*v1.JAXJob, error) - List(ctx context.Context, opts metav1.ListOptions) (*v1.JAXJobList, error) - Watch(ctx context.Context, opts metav1.ListOptions) (watch.Interface, error) - Patch(ctx context.Context, name string, pt types.PatchType, data []byte, opts metav1.PatchOptions, subresources ...string) (result *v1.JAXJob, err error) - Apply(ctx context.Context, jAXJob *kubefloworgv1.JAXJobApplyConfiguration, opts metav1.ApplyOptions) (result *v1.JAXJob, err error) - // Add a +genclient:noStatus comment above the type to avoid generating ApplyStatus(). - ApplyStatus(ctx context.Context, jAXJob *kubefloworgv1.JAXJobApplyConfiguration, opts metav1.ApplyOptions) (result *v1.JAXJob, err error) - JAXJobExpansion -} - -// jAXJobs implements JAXJobInterface -type jAXJobs struct { - *gentype.ClientWithListAndApply[*v1.JAXJob, *v1.JAXJobList, *kubefloworgv1.JAXJobApplyConfiguration] -} - -// newJAXJobs returns a JAXJobs -func newJAXJobs(c *KubeflowV1Client, namespace string) *jAXJobs { - return &jAXJobs{ - gentype.NewClientWithListAndApply[*v1.JAXJob, *v1.JAXJobList, *kubefloworgv1.JAXJobApplyConfiguration]( - "jaxjobs", - c.RESTClient(), - scheme.ParameterCodec, - namespace, - func() *v1.JAXJob { return &v1.JAXJob{} }, - func() *v1.JAXJobList { return &v1.JAXJobList{} }), - } -} diff --git a/pkg/client/clientset/versioned/typed/kubeflow.org/v1/kubeflow.org_client.go b/pkg/client/clientset/versioned/typed/kubeflow.org/v1/kubeflow.org_client.go deleted file mode 100644 index 10ab62757e..0000000000 --- a/pkg/client/clientset/versioned/typed/kubeflow.org/v1/kubeflow.org_client.go +++ /dev/null @@ -1,130 +0,0 @@ -// Copyright 2024 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by client-gen. DO NOT EDIT. - -package v1 - -import ( - "net/http" - - v1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - "github.com/kubeflow/training-operator/pkg/client/clientset/versioned/scheme" - rest "k8s.io/client-go/rest" -) - -type KubeflowV1Interface interface { - RESTClient() rest.Interface - JAXJobsGetter - MPIJobsGetter - PaddleJobsGetter - PyTorchJobsGetter - TFJobsGetter - XGBoostJobsGetter -} - -// KubeflowV1Client is used to interact with features provided by the kubeflow.org group. -type KubeflowV1Client struct { - restClient rest.Interface -} - -func (c *KubeflowV1Client) JAXJobs(namespace string) JAXJobInterface { - return newJAXJobs(c, namespace) -} - -func (c *KubeflowV1Client) MPIJobs(namespace string) MPIJobInterface { - return newMPIJobs(c, namespace) -} - -func (c *KubeflowV1Client) PaddleJobs(namespace string) PaddleJobInterface { - return newPaddleJobs(c, namespace) -} - -func (c *KubeflowV1Client) PyTorchJobs(namespace string) PyTorchJobInterface { - return newPyTorchJobs(c, namespace) -} - -func (c *KubeflowV1Client) TFJobs(namespace string) TFJobInterface { - return newTFJobs(c, namespace) -} - -func (c *KubeflowV1Client) XGBoostJobs(namespace string) XGBoostJobInterface { - return newXGBoostJobs(c, namespace) -} - -// NewForConfig creates a new KubeflowV1Client for the given config. -// NewForConfig is equivalent to NewForConfigAndClient(c, httpClient), -// where httpClient was generated with rest.HTTPClientFor(c). -func NewForConfig(c *rest.Config) (*KubeflowV1Client, error) { - config := *c - if err := setConfigDefaults(&config); err != nil { - return nil, err - } - httpClient, err := rest.HTTPClientFor(&config) - if err != nil { - return nil, err - } - return NewForConfigAndClient(&config, httpClient) -} - -// NewForConfigAndClient creates a new KubeflowV1Client for the given config and http client. -// Note the http client provided takes precedence over the configured transport values. -func NewForConfigAndClient(c *rest.Config, h *http.Client) (*KubeflowV1Client, error) { - config := *c - if err := setConfigDefaults(&config); err != nil { - return nil, err - } - client, err := rest.RESTClientForConfigAndClient(&config, h) - if err != nil { - return nil, err - } - return &KubeflowV1Client{client}, nil -} - -// NewForConfigOrDie creates a new KubeflowV1Client for the given config and -// panics if there is an error in the config. -func NewForConfigOrDie(c *rest.Config) *KubeflowV1Client { - client, err := NewForConfig(c) - if err != nil { - panic(err) - } - return client -} - -// New creates a new KubeflowV1Client for the given RESTClient. -func New(c rest.Interface) *KubeflowV1Client { - return &KubeflowV1Client{c} -} - -func setConfigDefaults(config *rest.Config) error { - gv := v1.SchemeGroupVersion - config.GroupVersion = &gv - config.APIPath = "/apis" - config.NegotiatedSerializer = scheme.Codecs.WithoutConversion() - - if config.UserAgent == "" { - config.UserAgent = rest.DefaultKubernetesUserAgent() - } - - return nil -} - -// RESTClient returns a RESTClient that is used to communicate -// with API server by this client implementation. -func (c *KubeflowV1Client) RESTClient() rest.Interface { - if c == nil { - return nil - } - return c.restClient -} diff --git a/pkg/client/clientset/versioned/typed/kubeflow.org/v1/mpijob.go b/pkg/client/clientset/versioned/typed/kubeflow.org/v1/mpijob.go deleted file mode 100644 index 79e6a6e06a..0000000000 --- a/pkg/client/clientset/versioned/typed/kubeflow.org/v1/mpijob.go +++ /dev/null @@ -1,71 +0,0 @@ -// Copyright 2024 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by client-gen. DO NOT EDIT. - -package v1 - -import ( - "context" - - v1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - kubefloworgv1 "github.com/kubeflow/training-operator/pkg/client/applyconfiguration/kubeflow.org/v1" - scheme "github.com/kubeflow/training-operator/pkg/client/clientset/versioned/scheme" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - types "k8s.io/apimachinery/pkg/types" - watch "k8s.io/apimachinery/pkg/watch" - gentype "k8s.io/client-go/gentype" -) - -// MPIJobsGetter has a method to return a MPIJobInterface. -// A group's client should implement this interface. -type MPIJobsGetter interface { - MPIJobs(namespace string) MPIJobInterface -} - -// MPIJobInterface has methods to work with MPIJob resources. -type MPIJobInterface interface { - Create(ctx context.Context, mPIJob *v1.MPIJob, opts metav1.CreateOptions) (*v1.MPIJob, error) - Update(ctx context.Context, mPIJob *v1.MPIJob, opts metav1.UpdateOptions) (*v1.MPIJob, error) - // Add a +genclient:noStatus comment above the type to avoid generating UpdateStatus(). - UpdateStatus(ctx context.Context, mPIJob *v1.MPIJob, opts metav1.UpdateOptions) (*v1.MPIJob, error) - Delete(ctx context.Context, name string, opts metav1.DeleteOptions) error - DeleteCollection(ctx context.Context, opts metav1.DeleteOptions, listOpts metav1.ListOptions) error - Get(ctx context.Context, name string, opts metav1.GetOptions) (*v1.MPIJob, error) - List(ctx context.Context, opts metav1.ListOptions) (*v1.MPIJobList, error) - Watch(ctx context.Context, opts metav1.ListOptions) (watch.Interface, error) - Patch(ctx context.Context, name string, pt types.PatchType, data []byte, opts metav1.PatchOptions, subresources ...string) (result *v1.MPIJob, err error) - Apply(ctx context.Context, mPIJob *kubefloworgv1.MPIJobApplyConfiguration, opts metav1.ApplyOptions) (result *v1.MPIJob, err error) - // Add a +genclient:noStatus comment above the type to avoid generating ApplyStatus(). - ApplyStatus(ctx context.Context, mPIJob *kubefloworgv1.MPIJobApplyConfiguration, opts metav1.ApplyOptions) (result *v1.MPIJob, err error) - MPIJobExpansion -} - -// mPIJobs implements MPIJobInterface -type mPIJobs struct { - *gentype.ClientWithListAndApply[*v1.MPIJob, *v1.MPIJobList, *kubefloworgv1.MPIJobApplyConfiguration] -} - -// newMPIJobs returns a MPIJobs -func newMPIJobs(c *KubeflowV1Client, namespace string) *mPIJobs { - return &mPIJobs{ - gentype.NewClientWithListAndApply[*v1.MPIJob, *v1.MPIJobList, *kubefloworgv1.MPIJobApplyConfiguration]( - "mpijobs", - c.RESTClient(), - scheme.ParameterCodec, - namespace, - func() *v1.MPIJob { return &v1.MPIJob{} }, - func() *v1.MPIJobList { return &v1.MPIJobList{} }), - } -} diff --git a/pkg/client/clientset/versioned/typed/kubeflow.org/v1/paddlejob.go b/pkg/client/clientset/versioned/typed/kubeflow.org/v1/paddlejob.go deleted file mode 100644 index 19c6cd5f63..0000000000 --- a/pkg/client/clientset/versioned/typed/kubeflow.org/v1/paddlejob.go +++ /dev/null @@ -1,71 +0,0 @@ -// Copyright 2024 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by client-gen. DO NOT EDIT. - -package v1 - -import ( - "context" - - v1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - kubefloworgv1 "github.com/kubeflow/training-operator/pkg/client/applyconfiguration/kubeflow.org/v1" - scheme "github.com/kubeflow/training-operator/pkg/client/clientset/versioned/scheme" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - types "k8s.io/apimachinery/pkg/types" - watch "k8s.io/apimachinery/pkg/watch" - gentype "k8s.io/client-go/gentype" -) - -// PaddleJobsGetter has a method to return a PaddleJobInterface. -// A group's client should implement this interface. -type PaddleJobsGetter interface { - PaddleJobs(namespace string) PaddleJobInterface -} - -// PaddleJobInterface has methods to work with PaddleJob resources. -type PaddleJobInterface interface { - Create(ctx context.Context, paddleJob *v1.PaddleJob, opts metav1.CreateOptions) (*v1.PaddleJob, error) - Update(ctx context.Context, paddleJob *v1.PaddleJob, opts metav1.UpdateOptions) (*v1.PaddleJob, error) - // Add a +genclient:noStatus comment above the type to avoid generating UpdateStatus(). - UpdateStatus(ctx context.Context, paddleJob *v1.PaddleJob, opts metav1.UpdateOptions) (*v1.PaddleJob, error) - Delete(ctx context.Context, name string, opts metav1.DeleteOptions) error - DeleteCollection(ctx context.Context, opts metav1.DeleteOptions, listOpts metav1.ListOptions) error - Get(ctx context.Context, name string, opts metav1.GetOptions) (*v1.PaddleJob, error) - List(ctx context.Context, opts metav1.ListOptions) (*v1.PaddleJobList, error) - Watch(ctx context.Context, opts metav1.ListOptions) (watch.Interface, error) - Patch(ctx context.Context, name string, pt types.PatchType, data []byte, opts metav1.PatchOptions, subresources ...string) (result *v1.PaddleJob, err error) - Apply(ctx context.Context, paddleJob *kubefloworgv1.PaddleJobApplyConfiguration, opts metav1.ApplyOptions) (result *v1.PaddleJob, err error) - // Add a +genclient:noStatus comment above the type to avoid generating ApplyStatus(). - ApplyStatus(ctx context.Context, paddleJob *kubefloworgv1.PaddleJobApplyConfiguration, opts metav1.ApplyOptions) (result *v1.PaddleJob, err error) - PaddleJobExpansion -} - -// paddleJobs implements PaddleJobInterface -type paddleJobs struct { - *gentype.ClientWithListAndApply[*v1.PaddleJob, *v1.PaddleJobList, *kubefloworgv1.PaddleJobApplyConfiguration] -} - -// newPaddleJobs returns a PaddleJobs -func newPaddleJobs(c *KubeflowV1Client, namespace string) *paddleJobs { - return &paddleJobs{ - gentype.NewClientWithListAndApply[*v1.PaddleJob, *v1.PaddleJobList, *kubefloworgv1.PaddleJobApplyConfiguration]( - "paddlejobs", - c.RESTClient(), - scheme.ParameterCodec, - namespace, - func() *v1.PaddleJob { return &v1.PaddleJob{} }, - func() *v1.PaddleJobList { return &v1.PaddleJobList{} }), - } -} diff --git a/pkg/client/clientset/versioned/typed/kubeflow.org/v1/pytorchjob.go b/pkg/client/clientset/versioned/typed/kubeflow.org/v1/pytorchjob.go deleted file mode 100644 index 042c0e2e59..0000000000 --- a/pkg/client/clientset/versioned/typed/kubeflow.org/v1/pytorchjob.go +++ /dev/null @@ -1,71 +0,0 @@ -// Copyright 2024 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by client-gen. DO NOT EDIT. - -package v1 - -import ( - "context" - - v1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - kubefloworgv1 "github.com/kubeflow/training-operator/pkg/client/applyconfiguration/kubeflow.org/v1" - scheme "github.com/kubeflow/training-operator/pkg/client/clientset/versioned/scheme" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - types "k8s.io/apimachinery/pkg/types" - watch "k8s.io/apimachinery/pkg/watch" - gentype "k8s.io/client-go/gentype" -) - -// PyTorchJobsGetter has a method to return a PyTorchJobInterface. -// A group's client should implement this interface. -type PyTorchJobsGetter interface { - PyTorchJobs(namespace string) PyTorchJobInterface -} - -// PyTorchJobInterface has methods to work with PyTorchJob resources. -type PyTorchJobInterface interface { - Create(ctx context.Context, pyTorchJob *v1.PyTorchJob, opts metav1.CreateOptions) (*v1.PyTorchJob, error) - Update(ctx context.Context, pyTorchJob *v1.PyTorchJob, opts metav1.UpdateOptions) (*v1.PyTorchJob, error) - // Add a +genclient:noStatus comment above the type to avoid generating UpdateStatus(). - UpdateStatus(ctx context.Context, pyTorchJob *v1.PyTorchJob, opts metav1.UpdateOptions) (*v1.PyTorchJob, error) - Delete(ctx context.Context, name string, opts metav1.DeleteOptions) error - DeleteCollection(ctx context.Context, opts metav1.DeleteOptions, listOpts metav1.ListOptions) error - Get(ctx context.Context, name string, opts metav1.GetOptions) (*v1.PyTorchJob, error) - List(ctx context.Context, opts metav1.ListOptions) (*v1.PyTorchJobList, error) - Watch(ctx context.Context, opts metav1.ListOptions) (watch.Interface, error) - Patch(ctx context.Context, name string, pt types.PatchType, data []byte, opts metav1.PatchOptions, subresources ...string) (result *v1.PyTorchJob, err error) - Apply(ctx context.Context, pyTorchJob *kubefloworgv1.PyTorchJobApplyConfiguration, opts metav1.ApplyOptions) (result *v1.PyTorchJob, err error) - // Add a +genclient:noStatus comment above the type to avoid generating ApplyStatus(). - ApplyStatus(ctx context.Context, pyTorchJob *kubefloworgv1.PyTorchJobApplyConfiguration, opts metav1.ApplyOptions) (result *v1.PyTorchJob, err error) - PyTorchJobExpansion -} - -// pyTorchJobs implements PyTorchJobInterface -type pyTorchJobs struct { - *gentype.ClientWithListAndApply[*v1.PyTorchJob, *v1.PyTorchJobList, *kubefloworgv1.PyTorchJobApplyConfiguration] -} - -// newPyTorchJobs returns a PyTorchJobs -func newPyTorchJobs(c *KubeflowV1Client, namespace string) *pyTorchJobs { - return &pyTorchJobs{ - gentype.NewClientWithListAndApply[*v1.PyTorchJob, *v1.PyTorchJobList, *kubefloworgv1.PyTorchJobApplyConfiguration]( - "pytorchjobs", - c.RESTClient(), - scheme.ParameterCodec, - namespace, - func() *v1.PyTorchJob { return &v1.PyTorchJob{} }, - func() *v1.PyTorchJobList { return &v1.PyTorchJobList{} }), - } -} diff --git a/pkg/client/clientset/versioned/typed/kubeflow.org/v1/tfjob.go b/pkg/client/clientset/versioned/typed/kubeflow.org/v1/tfjob.go deleted file mode 100644 index b69cf60bb4..0000000000 --- a/pkg/client/clientset/versioned/typed/kubeflow.org/v1/tfjob.go +++ /dev/null @@ -1,71 +0,0 @@ -// Copyright 2024 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by client-gen. DO NOT EDIT. - -package v1 - -import ( - "context" - - v1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - kubefloworgv1 "github.com/kubeflow/training-operator/pkg/client/applyconfiguration/kubeflow.org/v1" - scheme "github.com/kubeflow/training-operator/pkg/client/clientset/versioned/scheme" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - types "k8s.io/apimachinery/pkg/types" - watch "k8s.io/apimachinery/pkg/watch" - gentype "k8s.io/client-go/gentype" -) - -// TFJobsGetter has a method to return a TFJobInterface. -// A group's client should implement this interface. -type TFJobsGetter interface { - TFJobs(namespace string) TFJobInterface -} - -// TFJobInterface has methods to work with TFJob resources. -type TFJobInterface interface { - Create(ctx context.Context, tFJob *v1.TFJob, opts metav1.CreateOptions) (*v1.TFJob, error) - Update(ctx context.Context, tFJob *v1.TFJob, opts metav1.UpdateOptions) (*v1.TFJob, error) - // Add a +genclient:noStatus comment above the type to avoid generating UpdateStatus(). - UpdateStatus(ctx context.Context, tFJob *v1.TFJob, opts metav1.UpdateOptions) (*v1.TFJob, error) - Delete(ctx context.Context, name string, opts metav1.DeleteOptions) error - DeleteCollection(ctx context.Context, opts metav1.DeleteOptions, listOpts metav1.ListOptions) error - Get(ctx context.Context, name string, opts metav1.GetOptions) (*v1.TFJob, error) - List(ctx context.Context, opts metav1.ListOptions) (*v1.TFJobList, error) - Watch(ctx context.Context, opts metav1.ListOptions) (watch.Interface, error) - Patch(ctx context.Context, name string, pt types.PatchType, data []byte, opts metav1.PatchOptions, subresources ...string) (result *v1.TFJob, err error) - Apply(ctx context.Context, tFJob *kubefloworgv1.TFJobApplyConfiguration, opts metav1.ApplyOptions) (result *v1.TFJob, err error) - // Add a +genclient:noStatus comment above the type to avoid generating ApplyStatus(). - ApplyStatus(ctx context.Context, tFJob *kubefloworgv1.TFJobApplyConfiguration, opts metav1.ApplyOptions) (result *v1.TFJob, err error) - TFJobExpansion -} - -// tFJobs implements TFJobInterface -type tFJobs struct { - *gentype.ClientWithListAndApply[*v1.TFJob, *v1.TFJobList, *kubefloworgv1.TFJobApplyConfiguration] -} - -// newTFJobs returns a TFJobs -func newTFJobs(c *KubeflowV1Client, namespace string) *tFJobs { - return &tFJobs{ - gentype.NewClientWithListAndApply[*v1.TFJob, *v1.TFJobList, *kubefloworgv1.TFJobApplyConfiguration]( - "tfjobs", - c.RESTClient(), - scheme.ParameterCodec, - namespace, - func() *v1.TFJob { return &v1.TFJob{} }, - func() *v1.TFJobList { return &v1.TFJobList{} }), - } -} diff --git a/pkg/client/clientset/versioned/typed/kubeflow.org/v1/xgboostjob.go b/pkg/client/clientset/versioned/typed/kubeflow.org/v1/xgboostjob.go deleted file mode 100644 index e04ff3fe22..0000000000 --- a/pkg/client/clientset/versioned/typed/kubeflow.org/v1/xgboostjob.go +++ /dev/null @@ -1,71 +0,0 @@ -// Copyright 2024 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by client-gen. DO NOT EDIT. - -package v1 - -import ( - "context" - - v1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - kubefloworgv1 "github.com/kubeflow/training-operator/pkg/client/applyconfiguration/kubeflow.org/v1" - scheme "github.com/kubeflow/training-operator/pkg/client/clientset/versioned/scheme" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - types "k8s.io/apimachinery/pkg/types" - watch "k8s.io/apimachinery/pkg/watch" - gentype "k8s.io/client-go/gentype" -) - -// XGBoostJobsGetter has a method to return a XGBoostJobInterface. -// A group's client should implement this interface. -type XGBoostJobsGetter interface { - XGBoostJobs(namespace string) XGBoostJobInterface -} - -// XGBoostJobInterface has methods to work with XGBoostJob resources. -type XGBoostJobInterface interface { - Create(ctx context.Context, xGBoostJob *v1.XGBoostJob, opts metav1.CreateOptions) (*v1.XGBoostJob, error) - Update(ctx context.Context, xGBoostJob *v1.XGBoostJob, opts metav1.UpdateOptions) (*v1.XGBoostJob, error) - // Add a +genclient:noStatus comment above the type to avoid generating UpdateStatus(). - UpdateStatus(ctx context.Context, xGBoostJob *v1.XGBoostJob, opts metav1.UpdateOptions) (*v1.XGBoostJob, error) - Delete(ctx context.Context, name string, opts metav1.DeleteOptions) error - DeleteCollection(ctx context.Context, opts metav1.DeleteOptions, listOpts metav1.ListOptions) error - Get(ctx context.Context, name string, opts metav1.GetOptions) (*v1.XGBoostJob, error) - List(ctx context.Context, opts metav1.ListOptions) (*v1.XGBoostJobList, error) - Watch(ctx context.Context, opts metav1.ListOptions) (watch.Interface, error) - Patch(ctx context.Context, name string, pt types.PatchType, data []byte, opts metav1.PatchOptions, subresources ...string) (result *v1.XGBoostJob, err error) - Apply(ctx context.Context, xGBoostJob *kubefloworgv1.XGBoostJobApplyConfiguration, opts metav1.ApplyOptions) (result *v1.XGBoostJob, err error) - // Add a +genclient:noStatus comment above the type to avoid generating ApplyStatus(). - ApplyStatus(ctx context.Context, xGBoostJob *kubefloworgv1.XGBoostJobApplyConfiguration, opts metav1.ApplyOptions) (result *v1.XGBoostJob, err error) - XGBoostJobExpansion -} - -// xGBoostJobs implements XGBoostJobInterface -type xGBoostJobs struct { - *gentype.ClientWithListAndApply[*v1.XGBoostJob, *v1.XGBoostJobList, *kubefloworgv1.XGBoostJobApplyConfiguration] -} - -// newXGBoostJobs returns a XGBoostJobs -func newXGBoostJobs(c *KubeflowV1Client, namespace string) *xGBoostJobs { - return &xGBoostJobs{ - gentype.NewClientWithListAndApply[*v1.XGBoostJob, *v1.XGBoostJobList, *kubefloworgv1.XGBoostJobApplyConfiguration]( - "xgboostjobs", - c.RESTClient(), - scheme.ParameterCodec, - namespace, - func() *v1.XGBoostJob { return &v1.XGBoostJob{} }, - func() *v1.XGBoostJobList { return &v1.XGBoostJobList{} }), - } -} diff --git a/pkg/client/informers/externalversions/generic.go b/pkg/client/informers/externalversions/generic.go index 76f3d9452f..579930a2db 100644 --- a/pkg/client/informers/externalversions/generic.go +++ b/pkg/client/informers/externalversions/generic.go @@ -19,7 +19,6 @@ package externalversions import ( "fmt" - v1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" v2alpha1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v2alpha1" schema "k8s.io/apimachinery/pkg/runtime/schema" cache "k8s.io/client-go/tools/cache" @@ -51,21 +50,7 @@ func (f *genericInformer) Lister() cache.GenericLister { // TODO extend this to unknown resources with a client pool func (f *sharedInformerFactory) ForResource(resource schema.GroupVersionResource) (GenericInformer, error) { switch resource { - // Group=kubeflow.org, Version=v1 - case v1.SchemeGroupVersion.WithResource("jaxjobs"): - return &genericInformer{resource: resource.GroupResource(), informer: f.Kubeflow().V1().JAXJobs().Informer()}, nil - case v1.SchemeGroupVersion.WithResource("mpijobs"): - return &genericInformer{resource: resource.GroupResource(), informer: f.Kubeflow().V1().MPIJobs().Informer()}, nil - case v1.SchemeGroupVersion.WithResource("paddlejobs"): - return &genericInformer{resource: resource.GroupResource(), informer: f.Kubeflow().V1().PaddleJobs().Informer()}, nil - case v1.SchemeGroupVersion.WithResource("pytorchjobs"): - return &genericInformer{resource: resource.GroupResource(), informer: f.Kubeflow().V1().PyTorchJobs().Informer()}, nil - case v1.SchemeGroupVersion.WithResource("tfjobs"): - return &genericInformer{resource: resource.GroupResource(), informer: f.Kubeflow().V1().TFJobs().Informer()}, nil - case v1.SchemeGroupVersion.WithResource("xgboostjobs"): - return &genericInformer{resource: resource.GroupResource(), informer: f.Kubeflow().V1().XGBoostJobs().Informer()}, nil - - // Group=kubeflow.org, Version=v2alpha1 + // Group=kubeflow.org, Version=v2alpha1 case v2alpha1.SchemeGroupVersion.WithResource("clustertrainingruntimes"): return &genericInformer{resource: resource.GroupResource(), informer: f.Kubeflow().V2alpha1().ClusterTrainingRuntimes().Informer()}, nil case v2alpha1.SchemeGroupVersion.WithResource("trainjobs"): diff --git a/pkg/client/informers/externalversions/kubeflow.org/interface.go b/pkg/client/informers/externalversions/kubeflow.org/interface.go index 7bdf46abd8..92bf00700c 100644 --- a/pkg/client/informers/externalversions/kubeflow.org/interface.go +++ b/pkg/client/informers/externalversions/kubeflow.org/interface.go @@ -18,14 +18,11 @@ package kubeflow import ( internalinterfaces "github.com/kubeflow/training-operator/pkg/client/informers/externalversions/internalinterfaces" - v1 "github.com/kubeflow/training-operator/pkg/client/informers/externalversions/kubeflow.org/v1" v2alpha1 "github.com/kubeflow/training-operator/pkg/client/informers/externalversions/kubeflow.org/v2alpha1" ) // Interface provides access to each of this group's versions. type Interface interface { - // V1 provides access to shared informers for resources in V1. - V1() v1.Interface // V2alpha1 provides access to shared informers for resources in V2alpha1. V2alpha1() v2alpha1.Interface } @@ -41,11 +38,6 @@ func New(f internalinterfaces.SharedInformerFactory, namespace string, tweakList return &group{factory: f, namespace: namespace, tweakListOptions: tweakListOptions} } -// V1 returns a new v1.Interface. -func (g *group) V1() v1.Interface { - return v1.New(g.factory, g.namespace, g.tweakListOptions) -} - // V2alpha1 returns a new v2alpha1.Interface. func (g *group) V2alpha1() v2alpha1.Interface { return v2alpha1.New(g.factory, g.namespace, g.tweakListOptions) diff --git a/pkg/client/informers/externalversions/kubeflow.org/v1/interface.go b/pkg/client/informers/externalversions/kubeflow.org/v1/interface.go deleted file mode 100644 index c0e80ebcb8..0000000000 --- a/pkg/client/informers/externalversions/kubeflow.org/v1/interface.go +++ /dev/null @@ -1,78 +0,0 @@ -// Copyright 2024 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by informer-gen. DO NOT EDIT. - -package v1 - -import ( - internalinterfaces "github.com/kubeflow/training-operator/pkg/client/informers/externalversions/internalinterfaces" -) - -// Interface provides access to all the informers in this group version. -type Interface interface { - // JAXJobs returns a JAXJobInformer. - JAXJobs() JAXJobInformer - // MPIJobs returns a MPIJobInformer. - MPIJobs() MPIJobInformer - // PaddleJobs returns a PaddleJobInformer. - PaddleJobs() PaddleJobInformer - // PyTorchJobs returns a PyTorchJobInformer. - PyTorchJobs() PyTorchJobInformer - // TFJobs returns a TFJobInformer. - TFJobs() TFJobInformer - // XGBoostJobs returns a XGBoostJobInformer. - XGBoostJobs() XGBoostJobInformer -} - -type version struct { - factory internalinterfaces.SharedInformerFactory - namespace string - tweakListOptions internalinterfaces.TweakListOptionsFunc -} - -// New returns a new Interface. -func New(f internalinterfaces.SharedInformerFactory, namespace string, tweakListOptions internalinterfaces.TweakListOptionsFunc) Interface { - return &version{factory: f, namespace: namespace, tweakListOptions: tweakListOptions} -} - -// JAXJobs returns a JAXJobInformer. -func (v *version) JAXJobs() JAXJobInformer { - return &jAXJobInformer{factory: v.factory, namespace: v.namespace, tweakListOptions: v.tweakListOptions} -} - -// MPIJobs returns a MPIJobInformer. -func (v *version) MPIJobs() MPIJobInformer { - return &mPIJobInformer{factory: v.factory, namespace: v.namespace, tweakListOptions: v.tweakListOptions} -} - -// PaddleJobs returns a PaddleJobInformer. -func (v *version) PaddleJobs() PaddleJobInformer { - return &paddleJobInformer{factory: v.factory, namespace: v.namespace, tweakListOptions: v.tweakListOptions} -} - -// PyTorchJobs returns a PyTorchJobInformer. -func (v *version) PyTorchJobs() PyTorchJobInformer { - return &pyTorchJobInformer{factory: v.factory, namespace: v.namespace, tweakListOptions: v.tweakListOptions} -} - -// TFJobs returns a TFJobInformer. -func (v *version) TFJobs() TFJobInformer { - return &tFJobInformer{factory: v.factory, namespace: v.namespace, tweakListOptions: v.tweakListOptions} -} - -// XGBoostJobs returns a XGBoostJobInformer. -func (v *version) XGBoostJobs() XGBoostJobInformer { - return &xGBoostJobInformer{factory: v.factory, namespace: v.namespace, tweakListOptions: v.tweakListOptions} -} diff --git a/pkg/client/informers/externalversions/kubeflow.org/v1/jaxjob.go b/pkg/client/informers/externalversions/kubeflow.org/v1/jaxjob.go deleted file mode 100644 index 91504f6351..0000000000 --- a/pkg/client/informers/externalversions/kubeflow.org/v1/jaxjob.go +++ /dev/null @@ -1,88 +0,0 @@ -// Copyright 2024 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by informer-gen. DO NOT EDIT. - -package v1 - -import ( - "context" - time "time" - - kubefloworgv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - versioned "github.com/kubeflow/training-operator/pkg/client/clientset/versioned" - internalinterfaces "github.com/kubeflow/training-operator/pkg/client/informers/externalversions/internalinterfaces" - v1 "github.com/kubeflow/training-operator/pkg/client/listers/kubeflow.org/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - runtime "k8s.io/apimachinery/pkg/runtime" - watch "k8s.io/apimachinery/pkg/watch" - cache "k8s.io/client-go/tools/cache" -) - -// JAXJobInformer provides access to a shared informer and lister for -// JAXJobs. -type JAXJobInformer interface { - Informer() cache.SharedIndexInformer - Lister() v1.JAXJobLister -} - -type jAXJobInformer struct { - factory internalinterfaces.SharedInformerFactory - tweakListOptions internalinterfaces.TweakListOptionsFunc - namespace string -} - -// NewJAXJobInformer constructs a new informer for JAXJob type. -// Always prefer using an informer factory to get a shared informer instead of getting an independent -// one. This reduces memory footprint and number of connections to the server. -func NewJAXJobInformer(client versioned.Interface, namespace string, resyncPeriod time.Duration, indexers cache.Indexers) cache.SharedIndexInformer { - return NewFilteredJAXJobInformer(client, namespace, resyncPeriod, indexers, nil) -} - -// NewFilteredJAXJobInformer constructs a new informer for JAXJob type. -// Always prefer using an informer factory to get a shared informer instead of getting an independent -// one. This reduces memory footprint and number of connections to the server. -func NewFilteredJAXJobInformer(client versioned.Interface, namespace string, resyncPeriod time.Duration, indexers cache.Indexers, tweakListOptions internalinterfaces.TweakListOptionsFunc) cache.SharedIndexInformer { - return cache.NewSharedIndexInformer( - &cache.ListWatch{ - ListFunc: func(options metav1.ListOptions) (runtime.Object, error) { - if tweakListOptions != nil { - tweakListOptions(&options) - } - return client.KubeflowV1().JAXJobs(namespace).List(context.TODO(), options) - }, - WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) { - if tweakListOptions != nil { - tweakListOptions(&options) - } - return client.KubeflowV1().JAXJobs(namespace).Watch(context.TODO(), options) - }, - }, - &kubefloworgv1.JAXJob{}, - resyncPeriod, - indexers, - ) -} - -func (f *jAXJobInformer) defaultInformer(client versioned.Interface, resyncPeriod time.Duration) cache.SharedIndexInformer { - return NewFilteredJAXJobInformer(client, f.namespace, resyncPeriod, cache.Indexers{cache.NamespaceIndex: cache.MetaNamespaceIndexFunc}, f.tweakListOptions) -} - -func (f *jAXJobInformer) Informer() cache.SharedIndexInformer { - return f.factory.InformerFor(&kubefloworgv1.JAXJob{}, f.defaultInformer) -} - -func (f *jAXJobInformer) Lister() v1.JAXJobLister { - return v1.NewJAXJobLister(f.Informer().GetIndexer()) -} diff --git a/pkg/client/informers/externalversions/kubeflow.org/v1/mpijob.go b/pkg/client/informers/externalversions/kubeflow.org/v1/mpijob.go deleted file mode 100644 index 35291b6ae8..0000000000 --- a/pkg/client/informers/externalversions/kubeflow.org/v1/mpijob.go +++ /dev/null @@ -1,88 +0,0 @@ -// Copyright 2024 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by informer-gen. DO NOT EDIT. - -package v1 - -import ( - "context" - time "time" - - kubefloworgv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - versioned "github.com/kubeflow/training-operator/pkg/client/clientset/versioned" - internalinterfaces "github.com/kubeflow/training-operator/pkg/client/informers/externalversions/internalinterfaces" - v1 "github.com/kubeflow/training-operator/pkg/client/listers/kubeflow.org/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - runtime "k8s.io/apimachinery/pkg/runtime" - watch "k8s.io/apimachinery/pkg/watch" - cache "k8s.io/client-go/tools/cache" -) - -// MPIJobInformer provides access to a shared informer and lister for -// MPIJobs. -type MPIJobInformer interface { - Informer() cache.SharedIndexInformer - Lister() v1.MPIJobLister -} - -type mPIJobInformer struct { - factory internalinterfaces.SharedInformerFactory - tweakListOptions internalinterfaces.TweakListOptionsFunc - namespace string -} - -// NewMPIJobInformer constructs a new informer for MPIJob type. -// Always prefer using an informer factory to get a shared informer instead of getting an independent -// one. This reduces memory footprint and number of connections to the server. -func NewMPIJobInformer(client versioned.Interface, namespace string, resyncPeriod time.Duration, indexers cache.Indexers) cache.SharedIndexInformer { - return NewFilteredMPIJobInformer(client, namespace, resyncPeriod, indexers, nil) -} - -// NewFilteredMPIJobInformer constructs a new informer for MPIJob type. -// Always prefer using an informer factory to get a shared informer instead of getting an independent -// one. This reduces memory footprint and number of connections to the server. -func NewFilteredMPIJobInformer(client versioned.Interface, namespace string, resyncPeriod time.Duration, indexers cache.Indexers, tweakListOptions internalinterfaces.TweakListOptionsFunc) cache.SharedIndexInformer { - return cache.NewSharedIndexInformer( - &cache.ListWatch{ - ListFunc: func(options metav1.ListOptions) (runtime.Object, error) { - if tweakListOptions != nil { - tweakListOptions(&options) - } - return client.KubeflowV1().MPIJobs(namespace).List(context.TODO(), options) - }, - WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) { - if tweakListOptions != nil { - tweakListOptions(&options) - } - return client.KubeflowV1().MPIJobs(namespace).Watch(context.TODO(), options) - }, - }, - &kubefloworgv1.MPIJob{}, - resyncPeriod, - indexers, - ) -} - -func (f *mPIJobInformer) defaultInformer(client versioned.Interface, resyncPeriod time.Duration) cache.SharedIndexInformer { - return NewFilteredMPIJobInformer(client, f.namespace, resyncPeriod, cache.Indexers{cache.NamespaceIndex: cache.MetaNamespaceIndexFunc}, f.tweakListOptions) -} - -func (f *mPIJobInformer) Informer() cache.SharedIndexInformer { - return f.factory.InformerFor(&kubefloworgv1.MPIJob{}, f.defaultInformer) -} - -func (f *mPIJobInformer) Lister() v1.MPIJobLister { - return v1.NewMPIJobLister(f.Informer().GetIndexer()) -} diff --git a/pkg/client/informers/externalversions/kubeflow.org/v1/paddlejob.go b/pkg/client/informers/externalversions/kubeflow.org/v1/paddlejob.go deleted file mode 100644 index fcef538446..0000000000 --- a/pkg/client/informers/externalversions/kubeflow.org/v1/paddlejob.go +++ /dev/null @@ -1,88 +0,0 @@ -// Copyright 2024 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by informer-gen. DO NOT EDIT. - -package v1 - -import ( - "context" - time "time" - - kubefloworgv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - versioned "github.com/kubeflow/training-operator/pkg/client/clientset/versioned" - internalinterfaces "github.com/kubeflow/training-operator/pkg/client/informers/externalversions/internalinterfaces" - v1 "github.com/kubeflow/training-operator/pkg/client/listers/kubeflow.org/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - runtime "k8s.io/apimachinery/pkg/runtime" - watch "k8s.io/apimachinery/pkg/watch" - cache "k8s.io/client-go/tools/cache" -) - -// PaddleJobInformer provides access to a shared informer and lister for -// PaddleJobs. -type PaddleJobInformer interface { - Informer() cache.SharedIndexInformer - Lister() v1.PaddleJobLister -} - -type paddleJobInformer struct { - factory internalinterfaces.SharedInformerFactory - tweakListOptions internalinterfaces.TweakListOptionsFunc - namespace string -} - -// NewPaddleJobInformer constructs a new informer for PaddleJob type. -// Always prefer using an informer factory to get a shared informer instead of getting an independent -// one. This reduces memory footprint and number of connections to the server. -func NewPaddleJobInformer(client versioned.Interface, namespace string, resyncPeriod time.Duration, indexers cache.Indexers) cache.SharedIndexInformer { - return NewFilteredPaddleJobInformer(client, namespace, resyncPeriod, indexers, nil) -} - -// NewFilteredPaddleJobInformer constructs a new informer for PaddleJob type. -// Always prefer using an informer factory to get a shared informer instead of getting an independent -// one. This reduces memory footprint and number of connections to the server. -func NewFilteredPaddleJobInformer(client versioned.Interface, namespace string, resyncPeriod time.Duration, indexers cache.Indexers, tweakListOptions internalinterfaces.TweakListOptionsFunc) cache.SharedIndexInformer { - return cache.NewSharedIndexInformer( - &cache.ListWatch{ - ListFunc: func(options metav1.ListOptions) (runtime.Object, error) { - if tweakListOptions != nil { - tweakListOptions(&options) - } - return client.KubeflowV1().PaddleJobs(namespace).List(context.TODO(), options) - }, - WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) { - if tweakListOptions != nil { - tweakListOptions(&options) - } - return client.KubeflowV1().PaddleJobs(namespace).Watch(context.TODO(), options) - }, - }, - &kubefloworgv1.PaddleJob{}, - resyncPeriod, - indexers, - ) -} - -func (f *paddleJobInformer) defaultInformer(client versioned.Interface, resyncPeriod time.Duration) cache.SharedIndexInformer { - return NewFilteredPaddleJobInformer(client, f.namespace, resyncPeriod, cache.Indexers{cache.NamespaceIndex: cache.MetaNamespaceIndexFunc}, f.tweakListOptions) -} - -func (f *paddleJobInformer) Informer() cache.SharedIndexInformer { - return f.factory.InformerFor(&kubefloworgv1.PaddleJob{}, f.defaultInformer) -} - -func (f *paddleJobInformer) Lister() v1.PaddleJobLister { - return v1.NewPaddleJobLister(f.Informer().GetIndexer()) -} diff --git a/pkg/client/informers/externalversions/kubeflow.org/v1/pytorchjob.go b/pkg/client/informers/externalversions/kubeflow.org/v1/pytorchjob.go deleted file mode 100644 index 7e94876e7b..0000000000 --- a/pkg/client/informers/externalversions/kubeflow.org/v1/pytorchjob.go +++ /dev/null @@ -1,88 +0,0 @@ -// Copyright 2024 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by informer-gen. DO NOT EDIT. - -package v1 - -import ( - "context" - time "time" - - kubefloworgv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - versioned "github.com/kubeflow/training-operator/pkg/client/clientset/versioned" - internalinterfaces "github.com/kubeflow/training-operator/pkg/client/informers/externalversions/internalinterfaces" - v1 "github.com/kubeflow/training-operator/pkg/client/listers/kubeflow.org/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - runtime "k8s.io/apimachinery/pkg/runtime" - watch "k8s.io/apimachinery/pkg/watch" - cache "k8s.io/client-go/tools/cache" -) - -// PyTorchJobInformer provides access to a shared informer and lister for -// PyTorchJobs. -type PyTorchJobInformer interface { - Informer() cache.SharedIndexInformer - Lister() v1.PyTorchJobLister -} - -type pyTorchJobInformer struct { - factory internalinterfaces.SharedInformerFactory - tweakListOptions internalinterfaces.TweakListOptionsFunc - namespace string -} - -// NewPyTorchJobInformer constructs a new informer for PyTorchJob type. -// Always prefer using an informer factory to get a shared informer instead of getting an independent -// one. This reduces memory footprint and number of connections to the server. -func NewPyTorchJobInformer(client versioned.Interface, namespace string, resyncPeriod time.Duration, indexers cache.Indexers) cache.SharedIndexInformer { - return NewFilteredPyTorchJobInformer(client, namespace, resyncPeriod, indexers, nil) -} - -// NewFilteredPyTorchJobInformer constructs a new informer for PyTorchJob type. -// Always prefer using an informer factory to get a shared informer instead of getting an independent -// one. This reduces memory footprint and number of connections to the server. -func NewFilteredPyTorchJobInformer(client versioned.Interface, namespace string, resyncPeriod time.Duration, indexers cache.Indexers, tweakListOptions internalinterfaces.TweakListOptionsFunc) cache.SharedIndexInformer { - return cache.NewSharedIndexInformer( - &cache.ListWatch{ - ListFunc: func(options metav1.ListOptions) (runtime.Object, error) { - if tweakListOptions != nil { - tweakListOptions(&options) - } - return client.KubeflowV1().PyTorchJobs(namespace).List(context.TODO(), options) - }, - WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) { - if tweakListOptions != nil { - tweakListOptions(&options) - } - return client.KubeflowV1().PyTorchJobs(namespace).Watch(context.TODO(), options) - }, - }, - &kubefloworgv1.PyTorchJob{}, - resyncPeriod, - indexers, - ) -} - -func (f *pyTorchJobInformer) defaultInformer(client versioned.Interface, resyncPeriod time.Duration) cache.SharedIndexInformer { - return NewFilteredPyTorchJobInformer(client, f.namespace, resyncPeriod, cache.Indexers{cache.NamespaceIndex: cache.MetaNamespaceIndexFunc}, f.tweakListOptions) -} - -func (f *pyTorchJobInformer) Informer() cache.SharedIndexInformer { - return f.factory.InformerFor(&kubefloworgv1.PyTorchJob{}, f.defaultInformer) -} - -func (f *pyTorchJobInformer) Lister() v1.PyTorchJobLister { - return v1.NewPyTorchJobLister(f.Informer().GetIndexer()) -} diff --git a/pkg/client/informers/externalversions/kubeflow.org/v1/tfjob.go b/pkg/client/informers/externalversions/kubeflow.org/v1/tfjob.go deleted file mode 100644 index b97554f91b..0000000000 --- a/pkg/client/informers/externalversions/kubeflow.org/v1/tfjob.go +++ /dev/null @@ -1,88 +0,0 @@ -// Copyright 2024 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by informer-gen. DO NOT EDIT. - -package v1 - -import ( - "context" - time "time" - - kubefloworgv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - versioned "github.com/kubeflow/training-operator/pkg/client/clientset/versioned" - internalinterfaces "github.com/kubeflow/training-operator/pkg/client/informers/externalversions/internalinterfaces" - v1 "github.com/kubeflow/training-operator/pkg/client/listers/kubeflow.org/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - runtime "k8s.io/apimachinery/pkg/runtime" - watch "k8s.io/apimachinery/pkg/watch" - cache "k8s.io/client-go/tools/cache" -) - -// TFJobInformer provides access to a shared informer and lister for -// TFJobs. -type TFJobInformer interface { - Informer() cache.SharedIndexInformer - Lister() v1.TFJobLister -} - -type tFJobInformer struct { - factory internalinterfaces.SharedInformerFactory - tweakListOptions internalinterfaces.TweakListOptionsFunc - namespace string -} - -// NewTFJobInformer constructs a new informer for TFJob type. -// Always prefer using an informer factory to get a shared informer instead of getting an independent -// one. This reduces memory footprint and number of connections to the server. -func NewTFJobInformer(client versioned.Interface, namespace string, resyncPeriod time.Duration, indexers cache.Indexers) cache.SharedIndexInformer { - return NewFilteredTFJobInformer(client, namespace, resyncPeriod, indexers, nil) -} - -// NewFilteredTFJobInformer constructs a new informer for TFJob type. -// Always prefer using an informer factory to get a shared informer instead of getting an independent -// one. This reduces memory footprint and number of connections to the server. -func NewFilteredTFJobInformer(client versioned.Interface, namespace string, resyncPeriod time.Duration, indexers cache.Indexers, tweakListOptions internalinterfaces.TweakListOptionsFunc) cache.SharedIndexInformer { - return cache.NewSharedIndexInformer( - &cache.ListWatch{ - ListFunc: func(options metav1.ListOptions) (runtime.Object, error) { - if tweakListOptions != nil { - tweakListOptions(&options) - } - return client.KubeflowV1().TFJobs(namespace).List(context.TODO(), options) - }, - WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) { - if tweakListOptions != nil { - tweakListOptions(&options) - } - return client.KubeflowV1().TFJobs(namespace).Watch(context.TODO(), options) - }, - }, - &kubefloworgv1.TFJob{}, - resyncPeriod, - indexers, - ) -} - -func (f *tFJobInformer) defaultInformer(client versioned.Interface, resyncPeriod time.Duration) cache.SharedIndexInformer { - return NewFilteredTFJobInformer(client, f.namespace, resyncPeriod, cache.Indexers{cache.NamespaceIndex: cache.MetaNamespaceIndexFunc}, f.tweakListOptions) -} - -func (f *tFJobInformer) Informer() cache.SharedIndexInformer { - return f.factory.InformerFor(&kubefloworgv1.TFJob{}, f.defaultInformer) -} - -func (f *tFJobInformer) Lister() v1.TFJobLister { - return v1.NewTFJobLister(f.Informer().GetIndexer()) -} diff --git a/pkg/client/informers/externalversions/kubeflow.org/v1/xgboostjob.go b/pkg/client/informers/externalversions/kubeflow.org/v1/xgboostjob.go deleted file mode 100644 index a0344d59ec..0000000000 --- a/pkg/client/informers/externalversions/kubeflow.org/v1/xgboostjob.go +++ /dev/null @@ -1,88 +0,0 @@ -// Copyright 2024 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by informer-gen. DO NOT EDIT. - -package v1 - -import ( - "context" - time "time" - - kubefloworgv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - versioned "github.com/kubeflow/training-operator/pkg/client/clientset/versioned" - internalinterfaces "github.com/kubeflow/training-operator/pkg/client/informers/externalversions/internalinterfaces" - v1 "github.com/kubeflow/training-operator/pkg/client/listers/kubeflow.org/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - runtime "k8s.io/apimachinery/pkg/runtime" - watch "k8s.io/apimachinery/pkg/watch" - cache "k8s.io/client-go/tools/cache" -) - -// XGBoostJobInformer provides access to a shared informer and lister for -// XGBoostJobs. -type XGBoostJobInformer interface { - Informer() cache.SharedIndexInformer - Lister() v1.XGBoostJobLister -} - -type xGBoostJobInformer struct { - factory internalinterfaces.SharedInformerFactory - tweakListOptions internalinterfaces.TweakListOptionsFunc - namespace string -} - -// NewXGBoostJobInformer constructs a new informer for XGBoostJob type. -// Always prefer using an informer factory to get a shared informer instead of getting an independent -// one. This reduces memory footprint and number of connections to the server. -func NewXGBoostJobInformer(client versioned.Interface, namespace string, resyncPeriod time.Duration, indexers cache.Indexers) cache.SharedIndexInformer { - return NewFilteredXGBoostJobInformer(client, namespace, resyncPeriod, indexers, nil) -} - -// NewFilteredXGBoostJobInformer constructs a new informer for XGBoostJob type. -// Always prefer using an informer factory to get a shared informer instead of getting an independent -// one. This reduces memory footprint and number of connections to the server. -func NewFilteredXGBoostJobInformer(client versioned.Interface, namespace string, resyncPeriod time.Duration, indexers cache.Indexers, tweakListOptions internalinterfaces.TweakListOptionsFunc) cache.SharedIndexInformer { - return cache.NewSharedIndexInformer( - &cache.ListWatch{ - ListFunc: func(options metav1.ListOptions) (runtime.Object, error) { - if tweakListOptions != nil { - tweakListOptions(&options) - } - return client.KubeflowV1().XGBoostJobs(namespace).List(context.TODO(), options) - }, - WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) { - if tweakListOptions != nil { - tweakListOptions(&options) - } - return client.KubeflowV1().XGBoostJobs(namespace).Watch(context.TODO(), options) - }, - }, - &kubefloworgv1.XGBoostJob{}, - resyncPeriod, - indexers, - ) -} - -func (f *xGBoostJobInformer) defaultInformer(client versioned.Interface, resyncPeriod time.Duration) cache.SharedIndexInformer { - return NewFilteredXGBoostJobInformer(client, f.namespace, resyncPeriod, cache.Indexers{cache.NamespaceIndex: cache.MetaNamespaceIndexFunc}, f.tweakListOptions) -} - -func (f *xGBoostJobInformer) Informer() cache.SharedIndexInformer { - return f.factory.InformerFor(&kubefloworgv1.XGBoostJob{}, f.defaultInformer) -} - -func (f *xGBoostJobInformer) Lister() v1.XGBoostJobLister { - return v1.NewXGBoostJobLister(f.Informer().GetIndexer()) -} diff --git a/pkg/client/listers/kubeflow.org/v1/expansion_generated.go b/pkg/client/listers/kubeflow.org/v1/expansion_generated.go deleted file mode 100644 index 24a842ad5f..0000000000 --- a/pkg/client/listers/kubeflow.org/v1/expansion_generated.go +++ /dev/null @@ -1,65 +0,0 @@ -// Copyright 2024 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by lister-gen. DO NOT EDIT. - -package v1 - -// JAXJobListerExpansion allows custom methods to be added to -// JAXJobLister. -type JAXJobListerExpansion interface{} - -// JAXJobNamespaceListerExpansion allows custom methods to be added to -// JAXJobNamespaceLister. -type JAXJobNamespaceListerExpansion interface{} - -// MPIJobListerExpansion allows custom methods to be added to -// MPIJobLister. -type MPIJobListerExpansion interface{} - -// MPIJobNamespaceListerExpansion allows custom methods to be added to -// MPIJobNamespaceLister. -type MPIJobNamespaceListerExpansion interface{} - -// PaddleJobListerExpansion allows custom methods to be added to -// PaddleJobLister. -type PaddleJobListerExpansion interface{} - -// PaddleJobNamespaceListerExpansion allows custom methods to be added to -// PaddleJobNamespaceLister. -type PaddleJobNamespaceListerExpansion interface{} - -// PyTorchJobListerExpansion allows custom methods to be added to -// PyTorchJobLister. -type PyTorchJobListerExpansion interface{} - -// PyTorchJobNamespaceListerExpansion allows custom methods to be added to -// PyTorchJobNamespaceLister. -type PyTorchJobNamespaceListerExpansion interface{} - -// TFJobListerExpansion allows custom methods to be added to -// TFJobLister. -type TFJobListerExpansion interface{} - -// TFJobNamespaceListerExpansion allows custom methods to be added to -// TFJobNamespaceLister. -type TFJobNamespaceListerExpansion interface{} - -// XGBoostJobListerExpansion allows custom methods to be added to -// XGBoostJobLister. -type XGBoostJobListerExpansion interface{} - -// XGBoostJobNamespaceListerExpansion allows custom methods to be added to -// XGBoostJobNamespaceLister. -type XGBoostJobNamespaceListerExpansion interface{} diff --git a/pkg/client/listers/kubeflow.org/v1/jaxjob.go b/pkg/client/listers/kubeflow.org/v1/jaxjob.go deleted file mode 100644 index 74f318f018..0000000000 --- a/pkg/client/listers/kubeflow.org/v1/jaxjob.go +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright 2024 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by lister-gen. DO NOT EDIT. - -package v1 - -import ( - v1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - "k8s.io/apimachinery/pkg/labels" - "k8s.io/client-go/listers" - "k8s.io/client-go/tools/cache" -) - -// JAXJobLister helps list JAXJobs. -// All objects returned here must be treated as read-only. -type JAXJobLister interface { - // List lists all JAXJobs in the indexer. - // Objects returned here must be treated as read-only. - List(selector labels.Selector) (ret []*v1.JAXJob, err error) - // JAXJobs returns an object that can list and get JAXJobs. - JAXJobs(namespace string) JAXJobNamespaceLister - JAXJobListerExpansion -} - -// jAXJobLister implements the JAXJobLister interface. -type jAXJobLister struct { - listers.ResourceIndexer[*v1.JAXJob] -} - -// NewJAXJobLister returns a new JAXJobLister. -func NewJAXJobLister(indexer cache.Indexer) JAXJobLister { - return &jAXJobLister{listers.New[*v1.JAXJob](indexer, v1.Resource("jaxjob"))} -} - -// JAXJobs returns an object that can list and get JAXJobs. -func (s *jAXJobLister) JAXJobs(namespace string) JAXJobNamespaceLister { - return jAXJobNamespaceLister{listers.NewNamespaced[*v1.JAXJob](s.ResourceIndexer, namespace)} -} - -// JAXJobNamespaceLister helps list and get JAXJobs. -// All objects returned here must be treated as read-only. -type JAXJobNamespaceLister interface { - // List lists all JAXJobs in the indexer for a given namespace. - // Objects returned here must be treated as read-only. - List(selector labels.Selector) (ret []*v1.JAXJob, err error) - // Get retrieves the JAXJob from the indexer for a given namespace and name. - // Objects returned here must be treated as read-only. - Get(name string) (*v1.JAXJob, error) - JAXJobNamespaceListerExpansion -} - -// jAXJobNamespaceLister implements the JAXJobNamespaceLister -// interface. -type jAXJobNamespaceLister struct { - listers.ResourceIndexer[*v1.JAXJob] -} diff --git a/pkg/client/listers/kubeflow.org/v1/mpijob.go b/pkg/client/listers/kubeflow.org/v1/mpijob.go deleted file mode 100644 index 9540d47439..0000000000 --- a/pkg/client/listers/kubeflow.org/v1/mpijob.go +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright 2024 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by lister-gen. DO NOT EDIT. - -package v1 - -import ( - v1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - "k8s.io/apimachinery/pkg/labels" - "k8s.io/client-go/listers" - "k8s.io/client-go/tools/cache" -) - -// MPIJobLister helps list MPIJobs. -// All objects returned here must be treated as read-only. -type MPIJobLister interface { - // List lists all MPIJobs in the indexer. - // Objects returned here must be treated as read-only. - List(selector labels.Selector) (ret []*v1.MPIJob, err error) - // MPIJobs returns an object that can list and get MPIJobs. - MPIJobs(namespace string) MPIJobNamespaceLister - MPIJobListerExpansion -} - -// mPIJobLister implements the MPIJobLister interface. -type mPIJobLister struct { - listers.ResourceIndexer[*v1.MPIJob] -} - -// NewMPIJobLister returns a new MPIJobLister. -func NewMPIJobLister(indexer cache.Indexer) MPIJobLister { - return &mPIJobLister{listers.New[*v1.MPIJob](indexer, v1.Resource("mpijob"))} -} - -// MPIJobs returns an object that can list and get MPIJobs. -func (s *mPIJobLister) MPIJobs(namespace string) MPIJobNamespaceLister { - return mPIJobNamespaceLister{listers.NewNamespaced[*v1.MPIJob](s.ResourceIndexer, namespace)} -} - -// MPIJobNamespaceLister helps list and get MPIJobs. -// All objects returned here must be treated as read-only. -type MPIJobNamespaceLister interface { - // List lists all MPIJobs in the indexer for a given namespace. - // Objects returned here must be treated as read-only. - List(selector labels.Selector) (ret []*v1.MPIJob, err error) - // Get retrieves the MPIJob from the indexer for a given namespace and name. - // Objects returned here must be treated as read-only. - Get(name string) (*v1.MPIJob, error) - MPIJobNamespaceListerExpansion -} - -// mPIJobNamespaceLister implements the MPIJobNamespaceLister -// interface. -type mPIJobNamespaceLister struct { - listers.ResourceIndexer[*v1.MPIJob] -} diff --git a/pkg/client/listers/kubeflow.org/v1/paddlejob.go b/pkg/client/listers/kubeflow.org/v1/paddlejob.go deleted file mode 100644 index 1897a9a63c..0000000000 --- a/pkg/client/listers/kubeflow.org/v1/paddlejob.go +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright 2024 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by lister-gen. DO NOT EDIT. - -package v1 - -import ( - v1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - "k8s.io/apimachinery/pkg/labels" - "k8s.io/client-go/listers" - "k8s.io/client-go/tools/cache" -) - -// PaddleJobLister helps list PaddleJobs. -// All objects returned here must be treated as read-only. -type PaddleJobLister interface { - // List lists all PaddleJobs in the indexer. - // Objects returned here must be treated as read-only. - List(selector labels.Selector) (ret []*v1.PaddleJob, err error) - // PaddleJobs returns an object that can list and get PaddleJobs. - PaddleJobs(namespace string) PaddleJobNamespaceLister - PaddleJobListerExpansion -} - -// paddleJobLister implements the PaddleJobLister interface. -type paddleJobLister struct { - listers.ResourceIndexer[*v1.PaddleJob] -} - -// NewPaddleJobLister returns a new PaddleJobLister. -func NewPaddleJobLister(indexer cache.Indexer) PaddleJobLister { - return &paddleJobLister{listers.New[*v1.PaddleJob](indexer, v1.Resource("paddlejob"))} -} - -// PaddleJobs returns an object that can list and get PaddleJobs. -func (s *paddleJobLister) PaddleJobs(namespace string) PaddleJobNamespaceLister { - return paddleJobNamespaceLister{listers.NewNamespaced[*v1.PaddleJob](s.ResourceIndexer, namespace)} -} - -// PaddleJobNamespaceLister helps list and get PaddleJobs. -// All objects returned here must be treated as read-only. -type PaddleJobNamespaceLister interface { - // List lists all PaddleJobs in the indexer for a given namespace. - // Objects returned here must be treated as read-only. - List(selector labels.Selector) (ret []*v1.PaddleJob, err error) - // Get retrieves the PaddleJob from the indexer for a given namespace and name. - // Objects returned here must be treated as read-only. - Get(name string) (*v1.PaddleJob, error) - PaddleJobNamespaceListerExpansion -} - -// paddleJobNamespaceLister implements the PaddleJobNamespaceLister -// interface. -type paddleJobNamespaceLister struct { - listers.ResourceIndexer[*v1.PaddleJob] -} diff --git a/pkg/client/listers/kubeflow.org/v1/pytorchjob.go b/pkg/client/listers/kubeflow.org/v1/pytorchjob.go deleted file mode 100644 index 953338197a..0000000000 --- a/pkg/client/listers/kubeflow.org/v1/pytorchjob.go +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright 2024 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by lister-gen. DO NOT EDIT. - -package v1 - -import ( - v1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - "k8s.io/apimachinery/pkg/labels" - "k8s.io/client-go/listers" - "k8s.io/client-go/tools/cache" -) - -// PyTorchJobLister helps list PyTorchJobs. -// All objects returned here must be treated as read-only. -type PyTorchJobLister interface { - // List lists all PyTorchJobs in the indexer. - // Objects returned here must be treated as read-only. - List(selector labels.Selector) (ret []*v1.PyTorchJob, err error) - // PyTorchJobs returns an object that can list and get PyTorchJobs. - PyTorchJobs(namespace string) PyTorchJobNamespaceLister - PyTorchJobListerExpansion -} - -// pyTorchJobLister implements the PyTorchJobLister interface. -type pyTorchJobLister struct { - listers.ResourceIndexer[*v1.PyTorchJob] -} - -// NewPyTorchJobLister returns a new PyTorchJobLister. -func NewPyTorchJobLister(indexer cache.Indexer) PyTorchJobLister { - return &pyTorchJobLister{listers.New[*v1.PyTorchJob](indexer, v1.Resource("pytorchjob"))} -} - -// PyTorchJobs returns an object that can list and get PyTorchJobs. -func (s *pyTorchJobLister) PyTorchJobs(namespace string) PyTorchJobNamespaceLister { - return pyTorchJobNamespaceLister{listers.NewNamespaced[*v1.PyTorchJob](s.ResourceIndexer, namespace)} -} - -// PyTorchJobNamespaceLister helps list and get PyTorchJobs. -// All objects returned here must be treated as read-only. -type PyTorchJobNamespaceLister interface { - // List lists all PyTorchJobs in the indexer for a given namespace. - // Objects returned here must be treated as read-only. - List(selector labels.Selector) (ret []*v1.PyTorchJob, err error) - // Get retrieves the PyTorchJob from the indexer for a given namespace and name. - // Objects returned here must be treated as read-only. - Get(name string) (*v1.PyTorchJob, error) - PyTorchJobNamespaceListerExpansion -} - -// pyTorchJobNamespaceLister implements the PyTorchJobNamespaceLister -// interface. -type pyTorchJobNamespaceLister struct { - listers.ResourceIndexer[*v1.PyTorchJob] -} diff --git a/pkg/client/listers/kubeflow.org/v1/tfjob.go b/pkg/client/listers/kubeflow.org/v1/tfjob.go deleted file mode 100644 index 92e0524f63..0000000000 --- a/pkg/client/listers/kubeflow.org/v1/tfjob.go +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright 2024 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by lister-gen. DO NOT EDIT. - -package v1 - -import ( - v1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - "k8s.io/apimachinery/pkg/labels" - "k8s.io/client-go/listers" - "k8s.io/client-go/tools/cache" -) - -// TFJobLister helps list TFJobs. -// All objects returned here must be treated as read-only. -type TFJobLister interface { - // List lists all TFJobs in the indexer. - // Objects returned here must be treated as read-only. - List(selector labels.Selector) (ret []*v1.TFJob, err error) - // TFJobs returns an object that can list and get TFJobs. - TFJobs(namespace string) TFJobNamespaceLister - TFJobListerExpansion -} - -// tFJobLister implements the TFJobLister interface. -type tFJobLister struct { - listers.ResourceIndexer[*v1.TFJob] -} - -// NewTFJobLister returns a new TFJobLister. -func NewTFJobLister(indexer cache.Indexer) TFJobLister { - return &tFJobLister{listers.New[*v1.TFJob](indexer, v1.Resource("tfjob"))} -} - -// TFJobs returns an object that can list and get TFJobs. -func (s *tFJobLister) TFJobs(namespace string) TFJobNamespaceLister { - return tFJobNamespaceLister{listers.NewNamespaced[*v1.TFJob](s.ResourceIndexer, namespace)} -} - -// TFJobNamespaceLister helps list and get TFJobs. -// All objects returned here must be treated as read-only. -type TFJobNamespaceLister interface { - // List lists all TFJobs in the indexer for a given namespace. - // Objects returned here must be treated as read-only. - List(selector labels.Selector) (ret []*v1.TFJob, err error) - // Get retrieves the TFJob from the indexer for a given namespace and name. - // Objects returned here must be treated as read-only. - Get(name string) (*v1.TFJob, error) - TFJobNamespaceListerExpansion -} - -// tFJobNamespaceLister implements the TFJobNamespaceLister -// interface. -type tFJobNamespaceLister struct { - listers.ResourceIndexer[*v1.TFJob] -} diff --git a/pkg/client/listers/kubeflow.org/v1/xgboostjob.go b/pkg/client/listers/kubeflow.org/v1/xgboostjob.go deleted file mode 100644 index d4794c98ba..0000000000 --- a/pkg/client/listers/kubeflow.org/v1/xgboostjob.go +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright 2024 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by lister-gen. DO NOT EDIT. - -package v1 - -import ( - v1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - "k8s.io/apimachinery/pkg/labels" - "k8s.io/client-go/listers" - "k8s.io/client-go/tools/cache" -) - -// XGBoostJobLister helps list XGBoostJobs. -// All objects returned here must be treated as read-only. -type XGBoostJobLister interface { - // List lists all XGBoostJobs in the indexer. - // Objects returned here must be treated as read-only. - List(selector labels.Selector) (ret []*v1.XGBoostJob, err error) - // XGBoostJobs returns an object that can list and get XGBoostJobs. - XGBoostJobs(namespace string) XGBoostJobNamespaceLister - XGBoostJobListerExpansion -} - -// xGBoostJobLister implements the XGBoostJobLister interface. -type xGBoostJobLister struct { - listers.ResourceIndexer[*v1.XGBoostJob] -} - -// NewXGBoostJobLister returns a new XGBoostJobLister. -func NewXGBoostJobLister(indexer cache.Indexer) XGBoostJobLister { - return &xGBoostJobLister{listers.New[*v1.XGBoostJob](indexer, v1.Resource("xgboostjob"))} -} - -// XGBoostJobs returns an object that can list and get XGBoostJobs. -func (s *xGBoostJobLister) XGBoostJobs(namespace string) XGBoostJobNamespaceLister { - return xGBoostJobNamespaceLister{listers.NewNamespaced[*v1.XGBoostJob](s.ResourceIndexer, namespace)} -} - -// XGBoostJobNamespaceLister helps list and get XGBoostJobs. -// All objects returned here must be treated as read-only. -type XGBoostJobNamespaceLister interface { - // List lists all XGBoostJobs in the indexer for a given namespace. - // Objects returned here must be treated as read-only. - List(selector labels.Selector) (ret []*v1.XGBoostJob, err error) - // Get retrieves the XGBoostJob from the indexer for a given namespace and name. - // Objects returned here must be treated as read-only. - Get(name string) (*v1.XGBoostJob, error) - XGBoostJobNamespaceListerExpansion -} - -// xGBoostJobNamespaceLister implements the XGBoostJobNamespaceLister -// interface. -type xGBoostJobNamespaceLister struct { - listers.ResourceIndexer[*v1.XGBoostJob] -} diff --git a/pkg/common/interface.go b/pkg/common/interface.go deleted file mode 100644 index e9ea457ecc..0000000000 --- a/pkg/common/interface.go +++ /dev/null @@ -1,94 +0,0 @@ -/* -Copyright 2023 The Kubeflow Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package common - -import ( - v1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime/schema" - - apiv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" -) - -// ControllerInterface defines the Interface to be implemented by custom operators. e.g. tf-operator needs to implement this interface -type ControllerInterface interface { - // Returns the Controller name - ControllerName() string - - // Returns the GroupVersionKind of the API - GetAPIGroupVersionKind() schema.GroupVersionKind - - // Returns the GroupVersion of the API - GetAPIGroupVersion() schema.GroupVersion - - // Returns the Group Name(value) in the labels of the job - GetGroupNameLabelValue() string - - // Returns the Job from Informer Cache - GetJobFromInformerCache(namespace, name string) (metav1.Object, error) - - // Returns the Job from API server - GetJobFromAPIClient(namespace, name string) (metav1.Object, error) - - // GetPodsForJob returns the pods managed by the job. This can be achieved by selecting pods using label key "job-name" - // i.e. all pods created by the job will come with label "job-name" = - GetPodsForJob(job interface{}) ([]*v1.Pod, error) - - // GetServicesForJob returns the services managed by the job. This can be achieved by selecting services using label key "job-name" - // i.e. all services created by the job will come with label "job-name" = - GetServicesForJob(job interface{}) ([]*v1.Service, error) - - // DeleteJob deletes the job - DeleteJob(job interface{}) error - - // UpdateJobStatus updates the job status and job conditions - UpdateJobStatus(job interface{}, replicas map[apiv1.ReplicaType]*apiv1.ReplicaSpec, jobStatus *apiv1.JobStatus) error - - // UpdateJobStatusInApiServer updates the job status in API server - UpdateJobStatusInApiServer(job interface{}, jobStatus *apiv1.JobStatus) error - - // SetClusterSpec sets the cluster spec for the pod - SetClusterSpec(job interface{}, podTemplate *v1.PodTemplateSpec, rtype, index string) error - - // Returns the default container name in pod - GetDefaultContainerName() string - - // Get the default container port name - GetDefaultContainerPortName() string - - // Returns if this replica type with index specified is a master role. - // MasterRole pod will have "job-role=master" set in its label - IsMasterRole(replicas map[apiv1.ReplicaType]*apiv1.ReplicaSpec, rtype apiv1.ReplicaType, index int) bool - - // ReconcileJobs checks and updates replicas for each given ReplicaSpec of a job. - // Common implementation will be provided and User can still override this to implement their own reconcile logic - ReconcileJobs(job interface{}, replicas map[apiv1.ReplicaType]*apiv1.ReplicaSpec, jobStatus apiv1.JobStatus, runPolicy *apiv1.RunPolicy) error - - // ReconcilePods checks and updates pods for each given ReplicaSpec. - // It will requeue the job in case of an error while creating/deleting pods. - // Common implementation will be provided and User can still override this to implement their own reconcile logic - ReconcilePods(job interface{}, jobStatus *apiv1.JobStatus, pods []*v1.Pod, rtype apiv1.ReplicaType, spec *apiv1.ReplicaSpec, - replicas map[apiv1.ReplicaType]*apiv1.ReplicaSpec) error - - // ReconcileServices checks and updates services for each given ReplicaSpec. - // It will requeue the job in case of an error while creating/deleting services. - // Common implementation will be provided and User can still override this to implement their own reconcile logic - ReconcileServices(job metav1.Object, services []*v1.Service, rtype apiv1.ReplicaType, spec *apiv1.ReplicaSpec) error - - // GetFrameworkName returns framework name (e.g., tensorflow). - GetFrameworkName() string -} diff --git a/pkg/common/metrics.go b/pkg/common/metrics.go deleted file mode 100644 index cbb015b9d8..0000000000 --- a/pkg/common/metrics.go +++ /dev/null @@ -1,89 +0,0 @@ -// Copyright 2021 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License - -package common - -import ( - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promauto" - "sigs.k8s.io/controller-runtime/pkg/metrics" -) - -// Define all the prometheus counters for all jobs -var ( - jobsCreatedCount = promauto.NewCounterVec( - prometheus.CounterOpts{ - Name: "training_operator_jobs_created_total", - Help: "Counts number of jobs created", - }, - []string{"job_namespace", "framework"}, - ) - jobsDeletedCount = promauto.NewCounterVec( - prometheus.CounterOpts{ - Name: "training_operator_jobs_deleted_total", - Help: "Counts number of jobs deleted", - }, - []string{"job_namespace", "framework"}, - ) - jobsSuccessfulCount = promauto.NewCounterVec( - prometheus.CounterOpts{ - Name: "training_operator_jobs_successful_total", - Help: "Counts number of jobs successful", - }, - []string{"job_namespace", "framework"}, - ) - jobsFailedCount = promauto.NewCounterVec( - prometheus.CounterOpts{ - Name: "training_operator_jobs_failed_total", - Help: "Counts number of jobs failed", - }, - []string{"job_namespace", "framework"}, - ) - jobsRestartedCount = promauto.NewCounterVec( - prometheus.CounterOpts{ - Name: "training_operator_jobs_restarted_total", - Help: "Counts number of jobs restarted", - }, - []string{"job_namespace", "framework"}, - ) -) - -func init() { - // Register custom metrics with the global prometheus registry - metrics.Registry.MustRegister(jobsCreatedCount, - jobsDeletedCount, - jobsSuccessfulCount, - jobsFailedCount, - jobsRestartedCount) -} - -func CreatedJobsCounterInc(job_namespace, framework string) { - jobsCreatedCount.WithLabelValues(job_namespace, framework).Inc() -} - -func DeletedJobsCounterInc(job_namespace, framework string) { - jobsDeletedCount.WithLabelValues(job_namespace, framework).Inc() -} - -func SuccessfulJobsCounterInc(job_namespace, framework string) { - jobsSuccessfulCount.WithLabelValues(job_namespace, framework).Inc() -} - -func FailedJobsCounterInc(job_namespace, framework string) { - jobsFailedCount.WithLabelValues(job_namespace, framework).Inc() -} - -func RestartedJobsCounterInc(job_namespace, framework string) { - jobsRestartedCount.WithLabelValues(job_namespace, framework).Inc() -} diff --git a/pkg/common/update.go b/pkg/common/update.go deleted file mode 100644 index 02503f4e31..0000000000 --- a/pkg/common/update.go +++ /dev/null @@ -1,27 +0,0 @@ -// Copyright 2021 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License - -package common - -import ( - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" -) - -// ClearGeneratedFields will clear the generated fields from the given object meta. -// It is used to avoid problems like "the object has been modified; please apply your -// changes to the latest version and try again". -func ClearGeneratedFields(objmeta *metav1.ObjectMeta) { - objmeta.UID = "" - objmeta.CreationTimestamp = metav1.Time{} -} diff --git a/pkg/common/util/fake_workqueue.go b/pkg/common/util/fake_workqueue.go deleted file mode 100644 index 5102b1d571..0000000000 --- a/pkg/common/util/fake_workqueue.go +++ /dev/null @@ -1,56 +0,0 @@ -// Copyright 2021 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License - -package util - -import "time" - -// FakeWorkQueue implements RateLimitingInterface but actually does nothing. -type FakeWorkQueue[T any] struct{} - -// Add WorkQueue Add method -func (f *FakeWorkQueue[T]) Add(item T) {} - -// Len WorkQueue Len method -func (f *FakeWorkQueue[T]) Len() int { return 0 } - -// Get WorkQueue Get method -func (f *FakeWorkQueue[T]) Get() (item T, shutdown bool) { - var empty T - return empty, false -} - -// Done WorkQueue Done method -func (f *FakeWorkQueue[T]) Done(item T) {} - -// ShutDown WorkQueue ShutDown method -func (f *FakeWorkQueue[T]) ShutDown() {} - -// ShutDownWithDrain WorkQueue ShutDownWithDrain method -func (f *FakeWorkQueue[T]) ShutDownWithDrain() {} - -// ShuttingDown WorkQueue ShuttingDown method -func (f *FakeWorkQueue[T]) ShuttingDown() bool { return true } - -// AddAfter WorkQueue AddAfter method -func (f *FakeWorkQueue[T]) AddAfter(item T, duration time.Duration) {} - -// AddRateLimited WorkQueue AddRateLimited method -func (f *FakeWorkQueue[T]) AddRateLimited(item T) {} - -// Forget WorkQueue Forget method -func (f *FakeWorkQueue[T]) Forget(item T) {} - -// NumRequeues WorkQueue NumRequeues method -func (f *FakeWorkQueue[T]) NumRequeues(item T) int { return 0 } diff --git a/pkg/common/util/reconciler.go b/pkg/common/util/reconciler.go deleted file mode 100644 index b043b9cd80..0000000000 --- a/pkg/common/util/reconciler.go +++ /dev/null @@ -1,198 +0,0 @@ -// Copyright 2021 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License - -package util - -import ( - "fmt" - "reflect" - "strings" - - log "github.com/sirupsen/logrus" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/runtime/schema" - "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/event" - "sigs.k8s.io/controller-runtime/pkg/predicate" - - kubeflowv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - "github.com/kubeflow/training-operator/pkg/controller.v1/common" - "github.com/kubeflow/training-operator/pkg/controller.v1/expectation" -) - -// GenExpectationGenericKey generates an expectation key for {Kind} of a job -func GenExpectationGenericKey(jobKey string, replicaType string, pl string) string { - return jobKey + "/" + strings.ToLower(replicaType) + "/" + pl -} - -// LoggerForGenericKind generates log entry for generic Kubernetes resource Kind -func LoggerForGenericKind(obj metav1.Object, kind string) *log.Entry { - job := "" - if controllerRef := metav1.GetControllerOf(obj); controllerRef != nil { - if controllerRef.Kind == kind { - job = obj.GetNamespace() + "." + controllerRef.Name - } - } - return log.WithFields(log.Fields{ - // We use job to match the key used in controller.go - // In controller.go we log the key used with the workqueue. - "job": job, - kind: obj.GetNamespace() + "." + obj.GetName(), - "uid": obj.GetUID(), - }) -} - -func objectKind(s *runtime.Scheme, obj client.Object) schema.GroupVersionKind { - gkvs, _, err := s.ObjectKinds(obj) - if err != nil { - var logger = LoggerForGenericKind(obj, "") - logger.Errorf("unknown kind for %v", obj) - return schema.GroupVersionKind{} - } - return gkvs[0] -} - -func OnDependentFuncs[T client.Object](s *runtime.Scheme, expectations expectation.ControllerExpectationsInterface, jobController *common.JobController) predicate.TypedFuncs[T] { - return predicate.TypedFuncs[T]{ - CreateFunc: OnDependentCreateFuncGeneric[T](s, expectations), - UpdateFunc: OnDependentUpdateFuncGeneric[T](s, jobController), - DeleteFunc: OnDependentDeleteFuncGeneric[T](s, expectations), - } -} - -// OnDependentCreateFuncGeneric modify expectations when dependent (pod/service) creation observed. -func OnDependentCreateFuncGeneric[T client.Object](s *runtime.Scheme, exp expectation.ControllerExpectationsInterface) func(createEvent event.TypedCreateEvent[T]) bool { - return func(e event.TypedCreateEvent[T]) bool { - rtype := e.Object.GetLabels()[kubeflowv1.ReplicaTypeLabel] - if len(rtype) == 0 { - return false - } - - if controllerRef := metav1.GetControllerOf(e.Object); controllerRef != nil { - jobKey := fmt.Sprintf("%s/%s", e.Object.GetNamespace(), controllerRef.Name) - kind := e.Object.GetObjectKind().GroupVersionKind().Kind - if kind == "" { - kind = objectKind(s, e.Object).Kind - } - pl := strings.ToLower(kind) + "s" - expectKey := GenExpectationGenericKey(jobKey, rtype, pl) - exp.CreationObserved(expectKey) - return true - } - - return true - } -} - -// OnDependentUpdateFuncGeneric modify expectations when dependent update observed. -func OnDependentUpdateFuncGeneric[T client.Object](_ *runtime.Scheme, jc *common.JobController) func(updateEvent event.TypedUpdateEvent[T]) bool { - return func(e event.TypedUpdateEvent[T]) bool { - newObj := e.ObjectNew - oldObj := e.ObjectOld - if newObj.GetResourceVersion() == oldObj.GetResourceVersion() { - // Periodic resync will send update events for all known pods. - // Two different versions of the same pod will always have different RVs. - return false - } - - kind := jc.Controller.GetAPIGroupVersionKind().Kind - var logger = LoggerForGenericKind(newObj, kind) - - newControllerRef := metav1.GetControllerOf(newObj) - oldControllerRef := metav1.GetControllerOf(oldObj) - controllerRefChanged := !reflect.DeepEqual(newControllerRef, oldControllerRef) - - if controllerRefChanged && oldControllerRef != nil { - // The ControllerRef was changed. Sync the old controller, if any. - if job := resolveControllerRef(jc, oldObj.GetNamespace(), oldControllerRef); job != nil { - logger.Infof("%s controller ref updated: %v, %v", kind, newObj, oldObj) - return true - } - } - - // If it has a controller ref, that's all that matters. - if newControllerRef != nil { - job := resolveControllerRef(jc, newObj.GetNamespace(), newControllerRef) - if job == nil { - return false - } - logger.Debugf("%s has a controller ref: %v, %v", kind, newObj, oldObj) - return true - } - return false - } -} - -// OnDependentDeleteFuncGeneric modify expectations when dependent deletion observed. -func OnDependentDeleteFuncGeneric[T client.Object](s *runtime.Scheme, exp expectation.ControllerExpectationsInterface) func(event.TypedDeleteEvent[T]) bool { - return func(e event.TypedDeleteEvent[T]) bool { - rtype := e.Object.GetLabels()[kubeflowv1.ReplicaTypeLabel] - if len(rtype) == 0 { - return false - } - - if controllerRef := metav1.GetControllerOf(e.Object); controllerRef != nil { - jobKey := fmt.Sprintf("%s/%s", e.Object.GetNamespace(), controllerRef.Name) - kind := e.Object.GetObjectKind().GroupVersionKind().Kind - if kind == "" { - kind = objectKind(s, e.Object).Kind - } - pl := strings.ToLower(kind) + "s" - expectKey := GenExpectationGenericKey(jobKey, rtype, pl) - exp.DeletionObserved(expectKey) - return true - } - - return true - } -} - -// SatisfiedExpectations returns true if the required adds/dels for the given job have been observed. -// Add/del counts are established by the controller at sync time, and updated as controllees are observed by the controller -// manager. -func SatisfiedExpectations(exp expectation.ControllerExpectationsInterface, jobKey string, replicaTypes []kubeflowv1.ReplicaType) bool { - satisfied := false - for _, rtype := range replicaTypes { - // Check the expectations of the pods. - expectationPodsKey := expectation.GenExpectationPodsKey(jobKey, string(rtype)) - satisfied = satisfied || exp.SatisfiedExpectations(expectationPodsKey) - // Check the expectations of the services. - expectationServicesKey := expectation.GenExpectationServicesKey(jobKey, string(rtype)) - satisfied = satisfied || exp.SatisfiedExpectations(expectationServicesKey) - } - - return satisfied -} - -// resolveControllerRef returns the job referenced by a ControllerRef, -// or nil if the ControllerRef could not be resolved to a matching job -// of the correct Kind. -func resolveControllerRef(jc *common.JobController, namespace string, controllerRef *metav1.OwnerReference) metav1.Object { - // We can't look up by UID, so look up by Name and then verify UID. - // Don't even try to look up by Name if it's the wrong Kind. - if controllerRef.Kind != jc.Controller.GetAPIGroupVersionKind().Kind { - return nil - } - job, err := jc.Controller.GetJobFromInformerCache(namespace, controllerRef.Name) - if err != nil { - return nil - } - if job.GetUID() != controllerRef.UID { - // The controller we found with this Name is not the same one that the - // ControllerRef points to. - return nil - } - return job -} diff --git a/pkg/common/util/scheduler.go b/pkg/common/util/scheduler.go deleted file mode 100644 index a7bbff1d82..0000000000 --- a/pkg/common/util/scheduler.go +++ /dev/null @@ -1,35 +0,0 @@ -// Copyright 2021 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License - -package util - -import kubeflowv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - -func IsGangSchedulerSet(replicas map[kubeflowv1.ReplicaType]*kubeflowv1.ReplicaSpec, schedulerName string) bool { - for _, spec := range replicas { - if spec.Template.Spec.SchedulerName != "" && spec.Template.Spec.SchedulerName == schedulerName { - return true - } - } - return false -} - -func GetSchedulerName(replicas map[kubeflowv1.ReplicaType]*kubeflowv1.ReplicaSpec) string { - for _, spec := range replicas { - if len(spec.Template.Spec.SchedulerName) > 0 { - return spec.Template.Spec.SchedulerName - } - } - return "" -} diff --git a/pkg/common/util/util.go b/pkg/common/util/util.go deleted file mode 100644 index e0aa438e48..0000000000 --- a/pkg/common/util/util.go +++ /dev/null @@ -1,86 +0,0 @@ -// Copyright 2021 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License - -package util - -import ( - "fmt" - "time" - - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - - kubeflowv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - commonutil "github.com/kubeflow/training-operator/pkg/util" -) - -type ObjectFilterFunction func(obj metav1.Object) bool - -// ConvertServiceList convert service list to service point list -func ConvertServiceList(list []corev1.Service) []*corev1.Service { - if list == nil { - return nil - } - ret := make([]*corev1.Service, 0, len(list)) - for i := range list { - ret = append(ret, &list[i]) - } - return ret -} - -// JobControlledPodList filter pod list owned by the job. -func JobControlledPodList(list []corev1.Pod, job metav1.Object) []*corev1.Pod { - if list == nil { - return nil - } - ret := make([]*corev1.Pod, 0, len(list)) - for i := range list { - if !metav1.IsControlledBy(&list[i], job) { - continue - } - ret = append(ret, &list[i]) - } - return ret -} - -func GetReplicaTypes(specs map[kubeflowv1.ReplicaType]*kubeflowv1.ReplicaSpec) []kubeflowv1.ReplicaType { - keys := make([]kubeflowv1.ReplicaType, 0, len(specs)) - for k := range specs { - keys = append(keys, k) - } - return keys -} - -// DurationUntilExpireTime returns the duration until job needs to be cleaned up, or -1 if it's infinite. -func DurationUntilExpireTime(runPolicy *kubeflowv1.RunPolicy, jobStatus kubeflowv1.JobStatus) (time.Duration, error) { - if !commonutil.IsSucceeded(jobStatus) && !commonutil.IsFailed(jobStatus) { - return -1, nil - } - currentTime := time.Now() - ttl := runPolicy.TTLSecondsAfterFinished - if ttl == nil { - return -1, nil - } - duration := time.Second * time.Duration(*ttl) - if jobStatus.CompletionTime == nil { - return -1, fmt.Errorf("job completion time is nil, cannot cleanup") - } - finishTime := jobStatus.CompletionTime - expireTime := finishTime.Add(duration) - if currentTime.After(expireTime) { - return 0, nil - } else { - return expireTime.Sub(currentTime), nil - } -} diff --git a/pkg/common/util/util_test.go b/pkg/common/util/util_test.go deleted file mode 100644 index 67509d3f10..0000000000 --- a/pkg/common/util/util_test.go +++ /dev/null @@ -1,124 +0,0 @@ -// Copyright 2022 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package util - -import ( - "testing" - "time" - - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/utils/ptr" - - kubeflowv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" -) - -func TestDurationUntilExpireTime(t *testing.T) { - tests := []struct { - name string - runPolicy *kubeflowv1.RunPolicy - jobStatus kubeflowv1.JobStatus - want time.Duration - wantErr bool - }{ - { - name: "running job", - runPolicy: &kubeflowv1.RunPolicy{}, - jobStatus: kubeflowv1.JobStatus{ - Conditions: []kubeflowv1.JobCondition{newJobCondition(kubeflowv1.JobRunning)}, - }, - want: -1, - wantErr: false, - }, - { - name: "succeeded job with remaining time 1s", - runPolicy: &kubeflowv1.RunPolicy{ - TTLSecondsAfterFinished: ptr.To[int32](5), - }, - jobStatus: kubeflowv1.JobStatus{ - Conditions: []kubeflowv1.JobCondition{newJobCondition(kubeflowv1.JobSucceeded)}, - CompletionTime: &metav1.Time{Time: time.Now().Add(4 * time.Second)}, - }, - want: 1, - wantErr: false, - }, - { - name: "failed job with remaining time 1s", - runPolicy: &kubeflowv1.RunPolicy{ - TTLSecondsAfterFinished: ptr.To[int32](5), - }, - jobStatus: kubeflowv1.JobStatus{ - Conditions: []kubeflowv1.JobCondition{newJobCondition(kubeflowv1.JobFailed)}, - CompletionTime: &metav1.Time{Time: time.Now().Add(4 * time.Second)}, - }, - want: 1, - wantErr: false, - }, - { - name: "succeeded job with infinite TTL", - runPolicy: &kubeflowv1.RunPolicy{}, - jobStatus: kubeflowv1.JobStatus{ - Conditions: []kubeflowv1.JobCondition{newJobCondition(kubeflowv1.JobSucceeded)}, - CompletionTime: &metav1.Time{Time: time.Now().Add(4 * time.Second)}, - }, - want: -1, - wantErr: false, - }, - { - name: "succeeded job without remaining time", - runPolicy: &kubeflowv1.RunPolicy{ - TTLSecondsAfterFinished: ptr.To[int32](5), - }, - jobStatus: kubeflowv1.JobStatus{ - Conditions: []kubeflowv1.JobCondition{newJobCondition(kubeflowv1.JobSucceeded)}, - CompletionTime: &metav1.Time{Time: time.Now().Add(6 * time.Second)}, - }, - want: 0, - wantErr: false, - }, - { - name: "succeeded job with nil completion time error", - runPolicy: &kubeflowv1.RunPolicy{ - TTLSecondsAfterFinished: ptr.To[int32](5), - }, - jobStatus: kubeflowv1.JobStatus{ - Conditions: []kubeflowv1.JobCondition{newJobCondition(kubeflowv1.JobSucceeded)}, - }, - want: -1, - wantErr: true, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - got, err := DurationUntilExpireTime(tt.runPolicy, tt.jobStatus) - if (err != nil) != tt.wantErr { - t.Errorf("DurationUntilExpireTime() error = %v, wantErr %v", err, tt.wantErr) - return - } - if got != tt.want { - if tt.want < 0 || tt.want >= 0 && tt.want > got { - t.Errorf("DurationUntilExpireTime() got = %v, want %v", got, tt.want) - } - } - }) - } -} - -func newJobCondition(t kubeflowv1.JobConditionType) kubeflowv1.JobCondition { - return kubeflowv1.JobCondition{ - Type: t, - Status: corev1.ConditionTrue, - } -} diff --git a/pkg/common/util/webhooks.go b/pkg/common/util/webhooks.go deleted file mode 100644 index aa4031cffe..0000000000 --- a/pkg/common/util/webhooks.go +++ /dev/null @@ -1,32 +0,0 @@ -package util - -import ( - v1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - - apivalidation "k8s.io/apimachinery/pkg/api/validation" - "k8s.io/apimachinery/pkg/util/sets" - "k8s.io/apimachinery/pkg/util/validation/field" -) - -var supportedJobControllers = sets.New( - v1.MultiKueueController, - v1.KubeflowJobsController) - -func ValidateRunPolicy(runPolicy *v1.RunPolicy) field.ErrorList { - errs := field.ErrorList{} - if runPolicy.ManagedBy != nil { - manager := *runPolicy.ManagedBy - if !supportedJobControllers.Has(manager) { - fieldPath := field.NewPath("spec", "runPolicy", "managedBy") - errs = append(errs, field.NotSupported(fieldPath, manager, supportedJobControllers.UnsortedList())) - } - } - return errs -} - -func ValidateRunPolicyUpdate(oldRunPolicy, newRunPolicy *v1.RunPolicy) field.ErrorList { - oldManager := oldRunPolicy.ManagedBy - newManager := newRunPolicy.ManagedBy - fieldPath := field.NewPath("spec", "runPolicy", "managedBy") - return apivalidation.ValidateImmutableField(newManager, oldManager, fieldPath) -} diff --git a/pkg/config/config.go b/pkg/config/config.go deleted file mode 100644 index 68461998f2..0000000000 --- a/pkg/config/config.go +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright 2021 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License - -package config - -// Config is the global configuration for the training operator. -var Config struct { - PyTorchInitContainerTemplateFile string - PyTorchInitContainerImage string - MPIKubectlDeliveryImage string - PyTorchInitContainerMaxTries int -} - -const ( - // PyTorchInitContainerImageDefault is the default image for the pytorch - // init container. - PyTorchInitContainerImageDefault = "alpine:3.10" - // PyTorchInitContainerTemplateFileDefault is the default template file for - // the pytorch init container. - PyTorchInitContainerTemplateFileDefault = "/etc/config/initContainer.yaml" - // PyTorchInitContainerMaxTriesDefault is the default number of tries for the pytorch init container. - PyTorchInitContainerMaxTriesDefault = 100 - // MPIKubectlDeliveryImageDefault is the default image for launcher pod in MPIJob init container. - MPIKubectlDeliveryImageDefault = "kubeflow/kubectl-delivery:latest" -) diff --git a/pkg/controller.v1/common/job.go b/pkg/controller.v1/common/job.go deleted file mode 100644 index ee750c2d1d..0000000000 --- a/pkg/controller.v1/common/job.go +++ /dev/null @@ -1,464 +0,0 @@ -/* -Copyright 2023 The Kubeflow Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package common - -import ( - "fmt" - "reflect" - "time" - - apiv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - "github.com/kubeflow/training-operator/pkg/controller.v1/expectation" - "github.com/kubeflow/training-operator/pkg/core" - commonutil "github.com/kubeflow/training-operator/pkg/util" - "github.com/kubeflow/training-operator/pkg/util/k8sutil" - trainutil "github.com/kubeflow/training-operator/pkg/util/train" - - log "github.com/sirupsen/logrus" - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime" - utilruntime "k8s.io/apimachinery/pkg/util/runtime" - "k8s.io/klog/v2" - schedulerpluginsv1alpha1 "sigs.k8s.io/scheduler-plugins/apis/scheduling/v1alpha1" - volcanov1beta1 "volcano.sh/apis/pkg/apis/scheduling/v1beta1" -) - -// DeletePodsAndServices deletes pods and services considering cleanPodPolicy. -// However, if the job doesn't have Succeeded or Failed condition, it ignores cleanPodPolicy. -func (jc *JobController) DeletePodsAndServices(runtimeObject runtime.Object, runPolicy *apiv1.RunPolicy, jobStatus apiv1.JobStatus, pods []*corev1.Pod) error { - if len(pods) == 0 { - return nil - } - - // Delete nothing when the cleanPodPolicy is None and the job has Succeeded or Failed condition. - if commonutil.IsFinished(jobStatus) && *runPolicy.CleanPodPolicy == apiv1.CleanPodPolicyNone { - return nil - } - - for _, pod := range pods { - // Note that pending pod will turn into running once schedulable, - // not cleaning it may leave orphan running pod in the future, - // we should treat it equivalent to running phase here. - if commonutil.IsFinished(jobStatus) && *runPolicy.CleanPodPolicy == apiv1.CleanPodPolicyRunning && pod.Status.Phase != corev1.PodRunning && pod.Status.Phase != corev1.PodPending { - continue - } - if err := jc.PodControl.DeletePod(pod.Namespace, pod.Name, runtimeObject); err != nil { - return err - } - // Pod and service have the same name, thus the service could be deleted using pod's name. - if err := jc.ServiceControl.DeleteService(pod.Namespace, pod.Name, runtimeObject); err != nil { - return err - } - } - return nil -} - -// recordAbnormalPods records the active pod whose latest condition is not in True status. -func (jc *JobController) recordAbnormalPods(activePods []*corev1.Pod, object runtime.Object) { - core.RecordAbnormalPods(activePods, object, jc.Recorder) -} - -// ReconcileJobs checks and updates replicas for each given ReplicaSpec. -// It will requeue the job in case of an error while creating/deleting pods/services. -func (jc *JobController) ReconcileJobs( - job interface{}, - replicas map[apiv1.ReplicaType]*apiv1.ReplicaSpec, - jobStatus apiv1.JobStatus, - runPolicy *apiv1.RunPolicy) error { - - metaObject, ok := job.(metav1.Object) - jobName := metaObject.GetName() - if !ok { - return fmt.Errorf("job is not of type metav1.Object") - } - runtimeObject, ok := job.(runtime.Object) - if !ok { - return fmt.Errorf("job is not of type runtime.Object") - } - jobKey, err := KeyFunc(job) - if err != nil { - utilruntime.HandleError(fmt.Errorf("couldn't get key for job object %#v: %v", job, err)) - return err - } - jobKind := jc.Controller.GetAPIGroupVersionKind().Kind - // Reset expectations - // 1. Since `ReconcileJobs` is called, we expect that previous expectations are all satisfied, - // and it's safe to reset the expectations - // 2. Reset expectations can avoid dirty data such as `expectedDeletion = -1` - // (pod or service was deleted unexpectedly) - if err = jc.ResetExpectations(jobKey, replicas); err != nil { - log.Warnf("Failed to reset expectations: %v", err) - } - - log.Infof("Reconciling for job %s", metaObject.GetName()) - pods, err := jc.Controller.GetPodsForJob(job) - if err != nil { - log.Warnf("GetPodsForJob error %v", err) - return err - } - - services, err := jc.Controller.GetServicesForJob(job) - if err != nil { - log.Warnf("GetServicesForJob error %v", err) - return err - } - - oldStatus := jobStatus.DeepCopy() - if commonutil.IsFinished(jobStatus) { - // If the Job is succeeded or failed, delete all pods, services, and podGroup. - if err = jc.CleanUpResources(runPolicy, runtimeObject, metaObject, jobStatus, pods); err != nil { - return err - } - - // At this point the pods may have been deleted. - // 1) If the job succeeded, we manually set the replica status. - // 2) If any replicas are still active, set their status to 'succeeded'. - if commonutil.IsSucceeded(jobStatus) { - for rtype := range jobStatus.ReplicaStatuses { - jobStatus.ReplicaStatuses[rtype].Succeeded += jobStatus.ReplicaStatuses[rtype].Active - jobStatus.ReplicaStatuses[rtype].Active = 0 - } - } - - // No need to update the job status if the status hasn't changed since last time. - if !reflect.DeepEqual(*oldStatus, jobStatus) { - return jc.Controller.UpdateJobStatusInApiServer(job, &jobStatus) - } - - return nil - } - - if trainutil.IsJobSuspended(runPolicy) { - if err = jc.CleanUpResources(runPolicy, runtimeObject, metaObject, jobStatus, pods); err != nil { - return err - } - for rType := range jobStatus.ReplicaStatuses { - jobStatus.ReplicaStatuses[rType].Active = 0 - } - msg := fmt.Sprintf("%s %s is suspended.", jobKind, jobName) - if commonutil.IsRunning(jobStatus) { - commonutil.UpdateJobConditions(&jobStatus, apiv1.JobRunning, corev1.ConditionFalse, commonutil.NewReason(jobKind, commonutil.JobSuspendedReason), msg) - } - // We add the suspended condition to the job only when the job doesn't have a suspended condition. - if !commonutil.IsSuspended(jobStatus) { - commonutil.UpdateJobConditions(&jobStatus, apiv1.JobSuspended, corev1.ConditionTrue, commonutil.NewReason(jobKind, commonutil.JobSuspendedReason), msg) - } - jc.Recorder.Event(runtimeObject, corev1.EventTypeNormal, commonutil.NewReason(jobKind, commonutil.JobSuspendedReason), msg) - if !reflect.DeepEqual(*oldStatus, jobStatus) { - return jc.Controller.UpdateJobStatusInApiServer(job, &jobStatus) - } - return nil - } - if commonutil.IsSuspended(jobStatus) { - msg := fmt.Sprintf("%s %s is resumed.", jobKind, jobName) - commonutil.UpdateJobConditions(&jobStatus, apiv1.JobSuspended, corev1.ConditionFalse, commonutil.NewReason(jobKind, commonutil.JobResumedReason), msg) - now := metav1.Now() - jobStatus.StartTime = &now - jc.Recorder.Eventf(runtimeObject, corev1.EventTypeNormal, commonutil.NewReason(jobKind, commonutil.JobResumedReason), msg) - } - - // retrieve the previous number of retry - previousRetry := jc.WorkQueue.NumRequeues(jobKey) - - activePods := k8sutil.FilterActivePods(pods) - - jc.recordAbnormalPods(activePods, runtimeObject) - - active := int32(len(activePods)) - failed := k8sutil.FilterPodCount(pods, corev1.PodFailed) - totalReplicas := k8sutil.GetTotalReplicas(replicas) - prevReplicasFailedNum := k8sutil.GetTotalFailedReplicas(jobStatus.ReplicaStatuses) - - var failureMessage string - jobExceedsLimit := false - exceedsBackoffLimit := false - pastBackoffLimit := false - - if runPolicy.BackoffLimit != nil { - jobHasNewFailure := failed > prevReplicasFailedNum - // new failures happen when status does not reflect the failures and active - // is different from parallelism, otherwise the previous controller loop - // failed updating status so even if we pick up failure it is not a new one - exceedsBackoffLimit = jobHasNewFailure && (active != totalReplicas) && - (int32(previousRetry)+1 > *runPolicy.BackoffLimit) - - pastBackoffLimit, err = jc.PastBackoffLimit(jobName, runPolicy, replicas, pods) - if err != nil { - return err - } - } - - if exceedsBackoffLimit || pastBackoffLimit { - // check if the number of pod restart exceeds backoff (for restart OnFailure only) - // OR if the number of failed jobs increased since the last syncJob - jobExceedsLimit = true - failureMessage = fmt.Sprintf("Job %s has failed because it has reached the specified backoff limit", jobName) - } else if jc.PastActiveDeadline(runPolicy, jobStatus) { - failureMessage = fmt.Sprintf("Job %s has failed because it was active longer than specified deadline", jobName) - jobExceedsLimit = true - } - - if jobExceedsLimit { - // Set job completion time before resource cleanup - if jobStatus.CompletionTime == nil { - now := metav1.Now() - jobStatus.CompletionTime = &now - } - - // If the Job exceeds backoff limit or is past active deadline - // delete all pods and services, then set the status to failed - if err := jc.DeletePodsAndServices(runtimeObject, runPolicy, jobStatus, pods); err != nil { - return err - } - - if err := jc.CleanupJob(runPolicy, jobStatus, job); err != nil { - return err - } - - if jc.Config.EnableGangScheduling() { - jc.Recorder.Event(runtimeObject, corev1.EventTypeNormal, "JobTerminated", "Job has been terminated. Deleting PodGroup") - if err := jc.DeletePodGroup(metaObject); err != nil { - jc.Recorder.Eventf(runtimeObject, corev1.EventTypeWarning, "FailedDeletePodGroup", "Error deleting: %v", err) - return err - } else { - jc.Recorder.Eventf(runtimeObject, corev1.EventTypeNormal, "SuccessfulDeletePodGroup", "Deleted PodGroup: %v", jobName) - } - } - - jc.Recorder.Event(runtimeObject, corev1.EventTypeNormal, commonutil.NewReason(jobKind, commonutil.JobFailedReason), failureMessage) - - commonutil.UpdateJobConditions(&jobStatus, apiv1.JobFailed, corev1.ConditionTrue, commonutil.NewReason(jobKind, commonutil.JobFailedReason), failureMessage) - - return jc.Controller.UpdateJobStatusInApiServer(job, &jobStatus) - } else { - // General cases which need to reconcile - if jc.Config.EnableGangScheduling() { - minMember := totalReplicas - queue := "default" - priorityClass := "" - var schedulerTimeout *int32 - var minResources *corev1.ResourceList - - if runPolicy.SchedulingPolicy != nil { - if minAvailable := runPolicy.SchedulingPolicy.MinAvailable; minAvailable != nil { - minMember = *minAvailable - } - if q := runPolicy.SchedulingPolicy.Queue; len(q) != 0 { - queue = q - } - if pc := runPolicy.SchedulingPolicy.PriorityClass; len(pc) != 0 { - priorityClass = pc - } - if mr := runPolicy.SchedulingPolicy.MinResources; mr != nil { - minResources = (*corev1.ResourceList)(mr) - } - if timeout := runPolicy.SchedulingPolicy.ScheduleTimeoutSeconds; timeout != nil { - schedulerTimeout = timeout - } - } - - if minResources == nil { - minResources = jc.calcPGMinResources(minMember, replicas) - } - - var pgSpecFill FillPodGroupSpecFunc - switch jc.Config.GangScheduling { - case GangSchedulerVolcano: - pgSpecFill = func(pg metav1.Object) error { - volcanoPodGroup, match := pg.(*volcanov1beta1.PodGroup) - if !match { - return fmt.Errorf("unable to recognize PodGroup: %v", klog.KObj(pg)) - } - - if q := volcanoPodGroup.Spec.Queue; len(q) > 0 { - queue = q - } - - volcanoPodGroup.Spec = volcanov1beta1.PodGroupSpec{ - MinMember: minMember, - Queue: queue, - PriorityClassName: priorityClass, - MinResources: minResources, - } - return nil - } - default: - pgSpecFill = func(pg metav1.Object) error { - schedulerPluginsPodGroup, match := pg.(*schedulerpluginsv1alpha1.PodGroup) - if !match { - return fmt.Errorf("unable to recognize PodGroup: %v", klog.KObj(pg)) - } - schedulerPluginsPodGroup.Spec = schedulerpluginsv1alpha1.PodGroupSpec{ - MinMember: minMember, - MinResources: *minResources, - ScheduleTimeoutSeconds: schedulerTimeout, - } - return nil - } - } - - syncReplicas := true - pg, err := jc.SyncPodGroup(metaObject, pgSpecFill) - if err != nil { - log.Warnf("Sync PodGroup %v: %v", jobKey, err) - syncReplicas = false - } - - // Delay pods creation until PodGroup status is Inqueue - if jc.PodGroupControl.DelayPodCreationDueToPodGroup(pg) { - log.Warnf("PodGroup %v unschedulable", jobKey) - syncReplicas = false - } - - if !syncReplicas { - now := metav1.Now() - jobStatus.LastReconcileTime = &now - - // Update job status here to trigger a new reconciliation - return jc.Controller.UpdateJobStatusInApiServer(job, &jobStatus) - } - } - - // Diff current active pods/services with replicas. - for rtype, spec := range replicas { - err := jc.Controller.ReconcilePods(metaObject, &jobStatus, pods, rtype, spec, replicas) - if err != nil { - log.Warnf("ReconcilePods error %v", err) - return err - } - - err = jc.Controller.ReconcileServices(metaObject, services, rtype, spec) - - if err != nil { - log.Warnf("ReconcileServices error %v", err) - return err - } - } - } - - err = jc.Controller.UpdateJobStatus(job, replicas, &jobStatus) - if err != nil { - log.Warnf("UpdateJobStatus error %v", err) - return err - } - // No need to update the job status if the status hasn't changed since last time. - if !reflect.DeepEqual(*oldStatus, jobStatus) { - return jc.Controller.UpdateJobStatusInApiServer(job, &jobStatus) - } - return nil -} - -func (jc *JobController) CleanUpResources( - runPolicy *apiv1.RunPolicy, - runtimeObject runtime.Object, - metaObject metav1.Object, - jobStatus apiv1.JobStatus, - pods []*corev1.Pod, -) error { - if err := jc.DeletePodsAndServices(runtimeObject, runPolicy, jobStatus, pods); err != nil { - return err - } - if jc.Config.EnableGangScheduling() { - - jc.Recorder.Event(runtimeObject, corev1.EventTypeNormal, "JobTerminated", "Job has been terminated. Deleting PodGroup") - if err := jc.DeletePodGroup(metaObject); err != nil { - jc.Recorder.Eventf(runtimeObject, corev1.EventTypeWarning, "FailedDeletePodGroup", "Error deleting: %v", err) - return err - } else { - jc.Recorder.Eventf(runtimeObject, corev1.EventTypeNormal, "SuccessfulDeletePodGroup", "Deleted PodGroup: %v", metaObject.GetName()) - } - } - if err := jc.CleanupJob(runPolicy, jobStatus, runtimeObject); err != nil { - return err - } - return nil -} - -// ResetExpectations reset the expectation for creates and deletes of pod/service to zero. -func (jc *JobController) ResetExpectations(jobKey string, replicas map[apiv1.ReplicaType]*apiv1.ReplicaSpec) error { - var allErrs error - for rtype := range replicas { - expectationPodsKey := expectation.GenExpectationPodsKey(jobKey, string(rtype)) - if err := jc.Expectations.SetExpectations(expectationPodsKey, 0, 0); err != nil { - allErrs = err - } - expectationServicesKey := expectation.GenExpectationServicesKey(jobKey, string(rtype)) - if err := jc.Expectations.SetExpectations(expectationServicesKey, 0, 0); err != nil { - allErrs = fmt.Errorf("%s: %w", allErrs.Error(), err) - } - } - return allErrs -} - -// PastActiveDeadline checks if job has ActiveDeadlineSeconds field set and if it is exceeded. -func (jc *JobController) PastActiveDeadline(runPolicy *apiv1.RunPolicy, jobStatus apiv1.JobStatus) bool { - return core.PastActiveDeadline(runPolicy, jobStatus) -} - -// PastBackoffLimit checks if container restartCounts sum exceeds BackoffLimit -// this method applies only to pods when restartPolicy is one of OnFailure, Always or ExitCode -func (jc *JobController) PastBackoffLimit(jobName string, runPolicy *apiv1.RunPolicy, - replicas map[apiv1.ReplicaType]*apiv1.ReplicaSpec, pods []*corev1.Pod) (bool, error) { - return core.PastBackoffLimit(jobName, runPolicy, replicas, pods, jc.FilterPodsForReplicaType) -} - -func (jc *JobController) CleanupJob(runPolicy *apiv1.RunPolicy, jobStatus apiv1.JobStatus, job interface{}) error { - currentTime := time.Now() - metaObject, _ := job.(metav1.Object) - ttl := runPolicy.TTLSecondsAfterFinished - if ttl == nil || trainutil.IsJobSuspended(runPolicy) { - return nil - } - duration := time.Second * time.Duration(*ttl) - if jobStatus.CompletionTime == nil { - return fmt.Errorf("job completion time is nil, cannot cleanup") - } - finishTime := jobStatus.CompletionTime - expireTime := finishTime.Add(duration) - if currentTime.After(expireTime) { - err := jc.Controller.DeleteJob(job) - if err != nil { - commonutil.LoggerForJob(metaObject).Warnf("Cleanup Job error: %v.", err) - return err - } - return nil - } else { - if finishTime.After(currentTime) { - commonutil.LoggerForJob(metaObject).Warnf("Found Job finished in the future. This is likely due to time skew in the cluster. Job cleanup will be deferred.") - } - remaining := expireTime.Sub(currentTime) - key, err := KeyFunc(job) - if err != nil { - commonutil.LoggerForJob(metaObject).Warnf("Couldn't get key for job object: %v", err) - return err - } - jc.WorkQueue.AddAfter(key, remaining) - return nil - } -} - -func (jc *JobController) calcPGMinResources(minMember int32, replicas map[apiv1.ReplicaType]*apiv1.ReplicaSpec) *corev1.ResourceList { - return CalcPGMinResources(minMember, replicas, jc.PriorityClassLister.Get) -} - -func (jc *JobController) ManagedByExternalController(controllerName *string) *string { - if controllerName != nil && *controllerName != apiv1.KubeflowJobsController { - return controllerName - } - return nil -} diff --git a/pkg/controller.v1/common/job_controller.go b/pkg/controller.v1/common/job_controller.go deleted file mode 100644 index a4ca4a5815..0000000000 --- a/pkg/controller.v1/common/job_controller.go +++ /dev/null @@ -1,263 +0,0 @@ -/* -Copyright 2023 The Kubeflow Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package common - -import ( - "strings" - - apiv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - "github.com/kubeflow/training-operator/pkg/common" - "github.com/kubeflow/training-operator/pkg/controller.v1/control" - "github.com/kubeflow/training-operator/pkg/controller.v1/expectation" - - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promauto" - log "github.com/sirupsen/logrus" - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - kubeinformers "k8s.io/client-go/informers" - kubeclientset "k8s.io/client-go/kubernetes" - "k8s.io/client-go/kubernetes/scheme" - typedcorev1 "k8s.io/client-go/kubernetes/typed/core/v1" - corelisters "k8s.io/client-go/listers/core/v1" - schedulinglisters "k8s.io/client-go/listers/scheduling/v1" - "k8s.io/client-go/tools/cache" - "k8s.io/client-go/tools/record" - "k8s.io/client-go/util/workqueue" - "sigs.k8s.io/controller-runtime/pkg/client" - volcanoclient "volcano.sh/apis/pkg/client/clientset/versioned" -) - -var ( - // KeyFunc is the short name to DeletionHandlingMetaNamespaceKeyFunc. - // IndexerInformer uses a delta queue, therefore for deletes we have to use this - // key function but it should be just fine for non delete events. - KeyFunc = cache.DeletionHandlingMetaNamespaceKeyFunc - - createdPodGroupsCount = promauto.NewCounter(prometheus.CounterOpts{ - Name: "created_pod_groups_total", - Help: "The total number of created pod groups", - }) - deletedPodGroupsCount = promauto.NewCounter(prometheus.CounterOpts{ - Name: "deleted_pod_groups_total", - Help: "The total number of deleted pod groups", - }) -) - -type GangScheduler string - -const ( - GangSchedulerNone GangScheduler = "None" - GangSchedulerVolcano GangScheduler = "volcano" - // GangSchedulerSchedulerPlugins Using this scheduler name or any scheduler name different than volcano uses the scheduler-plugins PodGroup - GangSchedulerSchedulerPlugins GangScheduler = "scheduler-plugins" -) - -// JobControllerConfiguration contains configuration of operator. -type JobControllerConfiguration struct { - // GangScheduling choice: None, volcano and scheduler-plugins - GangScheduling GangScheduler -} - -func (c *JobControllerConfiguration) EnableGangScheduling() bool { - return c.GangScheduling != "" && c.GangScheduling != GangSchedulerNone -} - -// JobController abstracts other operators to manage the lifecycle of Jobs. -// User need to first implement the ControllerInterface(objectA) and then initialize a JobController(objectB) struct with objectA -// as the parameter. -// And then call objectB.ReconcileJobs as mentioned below, the ReconcileJobs method is the entrypoint to trigger the -// reconcile logic of the job controller -// -// ReconcileJobs( -// -// job interface{}, -// replicas map[apiv1.ReplicaType]*apiv1.ReplicaSpec, -// jobStatus apiv1.JobStatus, -// runPolicy *apiv1.RunPolicy) error -type JobController struct { - Controller common.ControllerInterface - - Config JobControllerConfiguration - - // PodControl is used to add or delete pods. - PodControl control.PodControlInterface - - // ServiceControl is used to add or delete services. - ServiceControl control.ServiceControlInterface - - // KubeClientSet is a standard kubernetes clientset. - KubeClientSet kubeclientset.Interface - - // PodGroupControl is used to add or delete PodGroup. - PodGroupControl control.PodGroupControlInterface - - // PodLister can list/get pods from the shared informer's store. - PodLister corelisters.PodLister - - // ServiceLister can list/get services from the shared informer's store. - ServiceLister corelisters.ServiceLister - - // PriorityClassLister can list/get priorityClasses from the shared informer's store. - PriorityClassLister schedulinglisters.PriorityClassLister - - // PodInformerSynced returns true if the pod store has been synced at least once. - PodInformerSynced cache.InformerSynced - - // ServiceInformerSynced returns true if the service store has been synced at least once. - ServiceInformerSynced cache.InformerSynced - - // PriorityClassInformerSynced returns true if the priority class store has been synced at least once. - PriorityClassInformerSynced cache.InformerSynced - - // A TTLCache of pod/services creates/deletes each job expects to see - // We use Job namespace/name + ReplicaType + pods/services as an expectation key, - // For example, there is a TFJob with namespace "tf-operator" and name "tfjob-abc": - // { - // "PS": { - // "Replicas": 2, - // }, - // "Worker": { - // "Replicas": 4, - // } - // } - // We will create 4 expectations: - // - "tf-operator/tfjob-abc/ps/services", expects 2 adds. - // - "tf-operator/tfjob-abc/ps/pods", expects 2 adds. - // - "tf-operator/tfjob-abc/worker/services", expects 4 adds. - // - "tf-operator/tfjob-abc/worker/pods", expects 4 adds. - Expectations expectation.ControllerExpectationsInterface - - // WorkQueue is a rate limited work queue. This is used to queue work to be - // processed instead of performing it as soon as a change happens. This - // means we can ensure we only process a fixed amount of resources at a - // time, and makes it easy to ensure we are never processing the same item - // simultaneously in two different workers. - WorkQueue workqueue.TypedRateLimitingInterface[string] - - // Recorder is an event recorder for recording Event resources to the - // Kubernetes API. - Recorder record.EventRecorder -} - -type GangSchedulingSetupFunc func(jc *JobController) - -var GenVolcanoSetupFunc = func(vci volcanoclient.Interface) GangSchedulingSetupFunc { - return func(jc *JobController) { - jc.Config.GangScheduling = GangSchedulerVolcano - jc.PodGroupControl = control.NewVolcanoControl(vci) - } -} - -var GenSchedulerPluginsSetupFunc = func(c client.Client, gangSchedulerName string) GangSchedulingSetupFunc { - return func(jc *JobController) { - jc.Config.GangScheduling = GangScheduler(gangSchedulerName) - jc.PodGroupControl = control.NewSchedulerPluginsControl(c, gangSchedulerName) - } -} - -var GenNonGangSchedulerSetupFunc = func() GangSchedulingSetupFunc { - return func(jc *JobController) { - jc.Config.GangScheduling = "" - jc.PodGroupControl = nil - } -} - -func NewJobController( - controllerImpl common.ControllerInterface, - reconcilerSyncPeriod metav1.Duration, - kubeClientSet kubeclientset.Interface, - setupPodGroup GangSchedulingSetupFunc, - kubeInformerFactory kubeinformers.SharedInformerFactory, - workQueueName string) JobController { - - log.Debug("Creating event broadcaster") - eventBroadcaster := record.NewBroadcaster() - eventBroadcaster.StartLogging(log.Infof) - eventBroadcaster.StartRecordingToSink(&typedcorev1.EventSinkImpl{Interface: kubeClientSet.CoreV1().Events("")}) - recorder := eventBroadcaster.NewRecorder(scheme.Scheme, corev1.EventSource{Component: controllerImpl.ControllerName()}) - - podControl := control.RealPodControl{ - KubeClient: kubeClientSet, - Recorder: eventBroadcaster.NewRecorder(scheme.Scheme, corev1.EventSource{Component: controllerImpl.ControllerName()}), - } - - serviceControl := control.RealServiceControl{ - KubeClient: kubeClientSet, - Recorder: eventBroadcaster.NewRecorder(scheme.Scheme, corev1.EventSource{Component: controllerImpl.ControllerName()}), - } - - jc := JobController{ - Controller: controllerImpl, - Config: JobControllerConfiguration{}, - PodControl: podControl, - ServiceControl: serviceControl, - KubeClientSet: kubeClientSet, - Expectations: expectation.NewControllerExpectations(), - WorkQueue: workqueue.NewTypedRateLimitingQueueWithConfig(workqueue.DefaultTypedControllerRateLimiter[string](), - workqueue.TypedRateLimitingQueueConfig[string]{Name: workQueueName}), - Recorder: recorder, - } - - setupPodGroup(&jc) - - return jc - -} - -func (jc *JobController) GenOwnerReference(obj metav1.Object) *metav1.OwnerReference { - boolPtr := func(b bool) *bool { return &b } - controllerRef := &metav1.OwnerReference{ - APIVersion: jc.Controller.GetAPIGroupVersion().String(), - Kind: jc.Controller.GetAPIGroupVersionKind().Kind, - Name: obj.GetName(), - UID: obj.GetUID(), - BlockOwnerDeletion: boolPtr(true), - Controller: boolPtr(true), - } - - return controllerRef -} - -func (jc *JobController) GenLabels(jobName string) map[string]string { - jobName = strings.Replace(jobName, "/", "-", -1) - return map[string]string{ - apiv1.OperatorNameLabel: jc.Controller.ControllerName(), - apiv1.JobNameLabel: jobName, - } -} - -// resolveControllerRef returns the job referenced by a ControllerRef, -// or nil if the ControllerRef could not be resolved to a matching job -// of the correct Kind. -func (jc *JobController) resolveControllerRef(namespace string, controllerRef *metav1.OwnerReference) metav1.Object { - // We can't look up by UID, so look up by Name and then verify UID. - // Don't even try to look up by Name if it's the wrong Kind. - if controllerRef.Kind != jc.Controller.GetAPIGroupVersionKind().Kind { - return nil - } - job, err := jc.Controller.GetJobFromInformerCache(namespace, controllerRef.Name) - if err != nil { - return nil - } - if job.GetUID() != controllerRef.UID { - // The controller we found with this Name is not the same one that the - // ControllerRef points to. - return nil - } - return job -} diff --git a/pkg/controller.v1/common/job_test.go b/pkg/controller.v1/common/job_test.go deleted file mode 100644 index ca948b788b..0000000000 --- a/pkg/controller.v1/common/job_test.go +++ /dev/null @@ -1,286 +0,0 @@ -/* -Copyright 2023 The Kubeflow Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package common - -import ( - "context" - "testing" - "time" - - "github.com/google/go-cmp/cmp" - apiv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - "github.com/kubeflow/training-operator/pkg/controller.v1/control" - testjobv1 "github.com/kubeflow/training-operator/test_job/apis/test_job/v1" - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime" - "k8s.io/client-go/kubernetes/fake" - "k8s.io/client-go/tools/record" - "k8s.io/utils/ptr" -) - -func TestDeletePodsAndServices(T *testing.T) { - pods := []runtime.Object{ - newPod("runningPod", corev1.PodRunning), - newPod("succeededPod", corev1.PodSucceeded), - } - services := []runtime.Object{ - newService("runningPod"), - newService("succeededPod"), - } - - cases := map[string]struct { - cleanPodPolicy apiv1.CleanPodPolicy - jobCondition apiv1.JobConditionType - wantPods *corev1.PodList - wantService *corev1.ServiceList - }{ - "Succeeded job and cleanPodPolicy is Running": { - cleanPodPolicy: apiv1.CleanPodPolicyRunning, - jobCondition: apiv1.JobSucceeded, - wantPods: &corev1.PodList{ - Items: []corev1.Pod{ - *pods[1].(*corev1.Pod), - }, - }, - wantService: &corev1.ServiceList{ - Items: []corev1.Service{ - *services[1].(*corev1.Service), - }, - }, - }, - "Suspended job and cleanPodPolicy is Running": { - cleanPodPolicy: apiv1.CleanPodPolicyRunning, - jobCondition: apiv1.JobSuspended, - wantPods: &corev1.PodList{}, - wantService: &corev1.ServiceList{}, - }, - "Finished job and cleanPodPolicy is All": { - cleanPodPolicy: apiv1.CleanPodPolicyAll, - jobCondition: apiv1.JobSucceeded, - wantPods: &corev1.PodList{}, - wantService: &corev1.ServiceList{}, - }, - "Finished job and cleanPodPolicy is None": { - cleanPodPolicy: apiv1.CleanPodPolicyNone, - jobCondition: apiv1.JobFailed, - wantPods: &corev1.PodList{ - Items: []corev1.Pod{ - *pods[0].(*corev1.Pod), - *pods[1].(*corev1.Pod), - }, - }, - wantService: &corev1.ServiceList{ - Items: []corev1.Service{ - *services[0].(*corev1.Service), - *services[1].(*corev1.Service), - }, - }, - }, - "Suspended job and cleanPodPolicy is None": { - cleanPodPolicy: apiv1.CleanPodPolicyNone, - jobCondition: apiv1.JobSuspended, - wantPods: &corev1.PodList{}, - wantService: &corev1.ServiceList{}, - }, - } - for name, tc := range cases { - T.Run(name, func(t *testing.T) { - fakeClient := fake.NewSimpleClientset(append(pods, services...)...) - jobController := JobController{ - PodControl: control.RealPodControl{KubeClient: fakeClient, Recorder: &record.FakeRecorder{}}, - ServiceControl: control.RealServiceControl{KubeClient: fakeClient, Recorder: &record.FakeRecorder{}}, - } - - var inPods []*corev1.Pod - for i := range pods { - inPods = append(inPods, pods[i].(*corev1.Pod)) - } - runPolicy := &apiv1.RunPolicy{ - CleanPodPolicy: &tc.cleanPodPolicy, - } - jobStatus := apiv1.JobStatus{ - Conditions: []apiv1.JobCondition{ - { - Type: tc.jobCondition, - Status: corev1.ConditionTrue, - }, - }, - } - if err := jobController.DeletePodsAndServices(&testjobv1.TestJob{}, runPolicy, jobStatus, inPods); err != nil { - T.Errorf("Failed to delete pods and services: %v", err) - } - gotPods, err := fakeClient.CoreV1().Pods("").List(context.Background(), metav1.ListOptions{}) - if err != nil { - t.Errorf("Failed to list pods: %v", err) - } - if diff := cmp.Diff(tc.wantPods, gotPods); len(diff) != 0 { - t.Errorf("Unexpected pods after running DeletePodsAndServices (-want,+got):%s\n", diff) - } - gotServices, err := fakeClient.CoreV1().Services("").List(context.Background(), metav1.ListOptions{}) - if err != nil { - t.Errorf("Failed to list services: %v", err) - } - if diff := cmp.Diff(tc.wantService, gotServices); len(diff) != 0 { - t.Errorf("Unexpected services after running DeletePodsAndServices (-want,+got):%s\n", diff) - } - }) - } -} - -func TestPastBackoffLimit(T *testing.T) { - backoffLimitExceededPod := newPod("runningPodWithBackoff", corev1.PodRunning) - backoffLimitExceededPod.Status.ContainerStatuses = []corev1.ContainerStatus{ - {RestartCount: 3}, - } - allPods := []*corev1.Pod{ - newPod("runningPod", corev1.PodRunning), - newPod("succeededPod", corev1.PodSucceeded), - backoffLimitExceededPod, - } - cases := map[string]struct { - pods []*corev1.Pod - backOffLimit int32 - wantPastBackOffLimit bool - }{ - "backOffLimit is 0": { - pods: allPods[:2], - backOffLimit: 0, - wantPastBackOffLimit: false, - }, - "backOffLimit is 3": { - pods: allPods, - backOffLimit: 3, - wantPastBackOffLimit: true, - }, - } - for name, tc := range cases { - T.Run(name, func(t *testing.T) { - jobController := JobController{} - runPolicy := &apiv1.RunPolicy{ - BackoffLimit: &tc.backOffLimit, - } - replica := map[apiv1.ReplicaType]*apiv1.ReplicaSpec{ - "test": {RestartPolicy: apiv1.RestartPolicyOnFailure}, - } - got, err := jobController.PastBackoffLimit("test-job", runPolicy, replica, tc.pods) - if err != nil { - t.Errorf("Failaed to do PastBackoffLimit: %v", err) - } - if tc.wantPastBackOffLimit != got { - t.Errorf("Unexpected pastBackoffLimit: \nwant: %v\ngot: %v\n", tc.wantPastBackOffLimit, got) - } - }) - } -} - -func TestPastActiveDeadline(T *testing.T) { - cases := map[string]struct { - activeDeadlineSeconds int64 - wantPastActiveDeadlineSeconds bool - }{ - "activeDeadlineSeconds is 0": { - activeDeadlineSeconds: 0, - wantPastActiveDeadlineSeconds: true, - }, - "activeDeadlineSeconds is 2": { - activeDeadlineSeconds: 2, - wantPastActiveDeadlineSeconds: false, - }, - } - for name, tc := range cases { - T.Run(name, func(t *testing.T) { - jobController := JobController{} - runPolicy := &apiv1.RunPolicy{ - ActiveDeadlineSeconds: &tc.activeDeadlineSeconds, - } - jobStatus := apiv1.JobStatus{ - StartTime: &metav1.Time{ - Time: time.Now(), - }, - } - if got := jobController.PastActiveDeadline(runPolicy, jobStatus); tc.wantPastActiveDeadlineSeconds != got { - t.Errorf("Unexpected PastActiveDeadline: \nwant: %v\ngot: %v\n", tc.wantPastActiveDeadlineSeconds, got) - } - }) - } -} - -func TestManagedByExternalController(T *testing.T) { - cases := map[string]struct { - managedBy *string - wantControllerName *string - }{ - "managedBy is nil": { - managedBy: nil, - wantControllerName: nil, - }, - "managedBy is empty": { - managedBy: ptr.To[string](""), - wantControllerName: ptr.To[string](""), - }, - "managedBy is training-operator controller": { - managedBy: ptr.To[string](apiv1.KubeflowJobsController), - wantControllerName: nil, - }, - "managedBy is not the training-operator controller": { - managedBy: ptr.To[string]("kueue.x-k8s.io/multikueue"), - wantControllerName: ptr.To[string]("kueue.x-k8s.io/multikueue"), - }, - "managedBy is other value": { - managedBy: ptr.To[string]("other-job-controller"), - wantControllerName: ptr.To[string]("other-job-controller"), - }, - } - for name, tc := range cases { - T.Run(name, func(t *testing.T) { - jobController := JobController{} - runPolicy := &apiv1.RunPolicy{ - ManagedBy: tc.managedBy, - } - - gotControllerName := jobController.ManagedByExternalController(runPolicy.ManagedBy) - if diff := cmp.Diff(tc.wantControllerName, gotControllerName); diff != "" { - t.Errorf("Unexpected manager controller (-want +got):\n%s", diff) - } - }) - } -} - -func newPod(name string, phase corev1.PodPhase) *corev1.Pod { - pod := &corev1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: name, - Labels: map[string]string{ - apiv1.ReplicaTypeLabel: "test", - }, - }, - Status: corev1.PodStatus{ - Phase: phase, - }, - } - return pod -} - -func newService(name string) *corev1.Service { - service := &corev1.Service{ - ObjectMeta: metav1.ObjectMeta{ - Name: name, - }, - } - return service -} diff --git a/pkg/controller.v1/common/pod.go b/pkg/controller.v1/common/pod.go deleted file mode 100644 index fda1a2e528..0000000000 --- a/pkg/controller.v1/common/pod.go +++ /dev/null @@ -1,483 +0,0 @@ -// Copyright 2019 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package common - -import ( - "fmt" - "reflect" - "strconv" - "strings" - - apiv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - trainingoperatorcommon "github.com/kubeflow/training-operator/pkg/common" - "github.com/kubeflow/training-operator/pkg/controller.v1/control" - "github.com/kubeflow/training-operator/pkg/controller.v1/expectation" - "github.com/kubeflow/training-operator/pkg/core" - commonutil "github.com/kubeflow/training-operator/pkg/util" - utillabels "github.com/kubeflow/training-operator/pkg/util/labels" - trainutil "github.com/kubeflow/training-operator/pkg/util/train" - - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promauto" - log "github.com/sirupsen/logrus" - v1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/errors" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/labels" - "k8s.io/apimachinery/pkg/runtime" - utilruntime "k8s.io/apimachinery/pkg/util/runtime" - "k8s.io/client-go/tools/cache" - "k8s.io/klog/v2" -) - -const ( - // podTemplateRestartPolicyReason is the warning reason when the restart - // policy is set in pod template. - podTemplateRestartPolicyReason = "SetPodTemplateRestartPolicy" - // exitedWithCodeReason is the normal reason when the pod is exited because of the exit code. - exitedWithCodeReason = "ExitedWithCode" - // podTemplateSchedulerNameReason is the warning reason when other scheduler name is set - // in pod templates with gang-scheduling enabled - podTemplateSchedulerNameReason = "SetPodTemplateSchedulerName" -) - -var ( - // Prometheus metrics - createdPodsCount = promauto.NewCounter(prometheus.CounterOpts{ - Name: "created_pods_total", - Help: "The total number of created pods", - }) - deletedPodsCount = promauto.NewCounter(prometheus.CounterOpts{ - Name: "deleted_pods_total", - Help: "The total number of deleted pods", - }) - failedPodsCount = promauto.NewCounter(prometheus.CounterOpts{ - Name: "failed_pods_total", - Help: "The total number of failed pods", - }) -) - -// When a pod is created, enqueue the job that manages it and update its expectations. -func (jc *JobController) AddPod(obj interface{}) { - pod := obj.(*v1.Pod) - if pod.DeletionTimestamp != nil { - // on a restart of the controller controller, it's possible a new pod shows up in a state that - // is already pending deletion. Prevent the pod from being a creation observation. - // jc.deletePod(pod) - return - } - - // If it has a ControllerRef, that's all that matters. - if controllerRef := metav1.GetControllerOf(pod); controllerRef != nil { - job := jc.resolveControllerRef(pod.Namespace, controllerRef) - - logger := commonutil.LoggerForPod(pod, jc.Controller.GetAPIGroupVersionKind().Kind) - - if job == nil { - if utillabels.HasKnownLabels(pod.Labels, jc.Controller.GetGroupNameLabelValue()) { - logger.Info("This pod's job does not exist") - } - return - } - - jobKey, err := KeyFunc(job) - if err != nil { - logger.Infof("Failed to get the jobkey: %v", err) - return - } - - rType, err := utillabels.ReplicaType(pod.Labels) - if err != nil { - logger.Infof("This pod maybe not created by %v", jc.Controller.ControllerName()) - return - } - - expectationPodsKey := expectation.GenExpectationPodsKey(jobKey, string(rType)) - - jc.Expectations.CreationObserved(expectationPodsKey) - // TODO: we may need add backoff here - jc.WorkQueue.Add(jobKey) - - return - } - -} - -// When a pod is updated, figure out what job is managing it and wake it up. -// If the labels of the pod have changed we need to awaken both the old -// and new replica set. old and cur must be *v1.Pod types. -func (jc *JobController) UpdatePod(old, cur interface{}) { - curPod := cur.(*v1.Pod) - oldPod := old.(*v1.Pod) - if curPod.ResourceVersion == oldPod.ResourceVersion { - // Periodic resync will send update events for all known pods. - // Two different versions of the same pod will always have different RVs. - return - } - - logger := commonutil.LoggerForPod(curPod, jc.Controller.GetAPIGroupVersionKind().Kind) - curControllerRef := metav1.GetControllerOf(curPod) - oldControllerRef := metav1.GetControllerOf(oldPod) - controllerRefChanged := !reflect.DeepEqual(curControllerRef, oldControllerRef) - if controllerRefChanged && oldControllerRef != nil { - // The ControllerRef was changed. Sync the old controller, if any. - if job := jc.resolveControllerRef(oldPod.Namespace, oldControllerRef); job != nil { - logger.Infof("pod ControllerRef updated: %v, %v", curPod, oldPod) - jobKey, err := KeyFunc(job) - if err != nil { - return - } - // TODO: we may need add backoff here - jc.WorkQueue.Add(jobKey) - } - } - - // If it has a ControllerRef, that's all that matters. - if curControllerRef != nil { - job := jc.resolveControllerRef(curPod.Namespace, curControllerRef) - if job == nil { - return - } - logger.Debugf("pod has a ControllerRef: %v, %v", curPod, oldPod) - jobKey, err := KeyFunc(job) - if err != nil { - return - } - // TODO: we may need add backoff here - jc.WorkQueue.Add(jobKey) - return - } -} - -// When a pod is deleted, enqueue the job that manages the pod and update its expectations. -// obj could be an *v1.Pod, or a DeletionFinalStateUnknown marker item. -func (jc *JobController) DeletePod(obj interface{}) { - pod, ok := obj.(*v1.Pod) - - logger := commonutil.LoggerForPod(pod, jc.Controller.GetAPIGroupVersionKind().Kind) - - // When delete is dropped, the relist will notice a pod in the store not - // in the list, leading to the insertion of a tombstone object which contains - // the deleted key/value. Note that this value might be stale. If the pod - // changed labels the new job will not be woken up till the periodic resync. - if !ok { - tombstone, ok := obj.(cache.DeletedFinalStateUnknown) - if !ok { - utilruntime.HandleError(fmt.Errorf("couldn't get object from tombstone %+v", obj)) - return - } - pod, ok = tombstone.Obj.(*v1.Pod) - if !ok { - utilruntime.HandleError(fmt.Errorf("tombstone contained object that is not a pod %+v", obj)) - return - } - } - - controllerRef := metav1.GetControllerOf(pod) - if controllerRef == nil { - // No controller should care about orphans being deleted. - return - } - job := jc.resolveControllerRef(pod.Namespace, controllerRef) - if job == nil { - return - } - jobKey, err := KeyFunc(job) - if err != nil { - return - } - - rType, err := utillabels.ReplicaType(pod.Labels) - if err != nil { - logger.Infof("This pod maybe not created by %v", jc.Controller.ControllerName()) - return - } - - expectationPodsKey := expectation.GenExpectationPodsKey(jobKey, string(rType)) - - jc.Expectations.DeletionObserved(expectationPodsKey) - deletedPodsCount.Inc() - // TODO: we may need add backoff here - jc.WorkQueue.Add(jobKey) -} - -// getPodsForJob returns the set of pods that this job should manage. -// It also reconciles ControllerRef by adopting/orphaning. -// Note that the returned Pods are pointers into the cache. -func (jc *JobController) GetPodsForJob(jobObject interface{}) ([]*v1.Pod, error) { - job, ok := jobObject.(metav1.Object) - if !ok { - return nil, fmt.Errorf("job is not of type metav1.Object") - } - - // Create selector. - selector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{ - MatchLabels: jc.GenLabels(job.GetName()), - }) - - if err != nil { - return nil, fmt.Errorf("couldn't convert Job selector: %v", err) - } - // List all pods to include those that don't match the selector anymore - // but have a ControllerRef pointing to this controller. - pods, err := jc.PodLister.Pods(job.GetNamespace()).List(labels.Everything()) - if err != nil { - return nil, err - } - - // If any adoptions are attempted, we should first recheck for deletion - // with an uncached quorum read sometime after listing Pods (see #42639). - canAdoptFunc := RecheckDeletionTimestamp(func() (metav1.Object, error) { - fresh, err := jc.Controller.GetJobFromAPIClient(job.GetNamespace(), job.GetName()) - if err != nil { - return nil, err - } - if fresh.GetUID() != job.GetUID() { - return nil, fmt.Errorf("original Job %v/%v is gone: got uid %v, wanted %v", job.GetNamespace(), job.GetName(), fresh.GetUID(), job.GetUID()) - } - return fresh, nil - }) - cm := control.NewPodControllerRefManager(jc.PodControl, job, selector, jc.Controller.GetAPIGroupVersionKind(), canAdoptFunc) - return cm.ClaimPods(pods) -} - -// FilterPodsForReplicaType returns pods belong to a replicaType. -func (jc *JobController) FilterPodsForReplicaType(pods []*v1.Pod, replicaType string) ([]*v1.Pod, error) { - return core.FilterPodsForReplicaType(pods, replicaType) -} - -// getPodSlices returns a slice, which element is the slice of pod. -// It gives enough information to caller to make decision to up/down scale resources. -func (jc *JobController) GetPodSlices(pods []*v1.Pod, replicas int, logger *log.Entry) [][]*v1.Pod { - return core.GetPodSlices(pods, replicas, logger) -} - -// ReconcilePods checks and updates pods for each given ReplicaSpec. -// It will requeue the job in case of an error while creating/deleting pods. -func (jc *JobController) ReconcilePods( - job interface{}, - jobStatus *apiv1.JobStatus, - pods []*v1.Pod, - rType apiv1.ReplicaType, - spec *apiv1.ReplicaSpec, - replicas map[apiv1.ReplicaType]*apiv1.ReplicaSpec) error { - - rt := strings.ToLower(string(rType)) - metaObject, ok := job.(metav1.Object) - if !ok { - return fmt.Errorf("job is not a metav1.Object type") - } - runtimeObject, ok := job.(runtime.Object) - if !ok { - return fmt.Errorf("job is not a runtime.Object type") - } - jobKey, err := KeyFunc(metaObject) - if err != nil { - utilruntime.HandleError(fmt.Errorf("couldn't get key for job object %#v: %v", job, err)) - return err - } - jobKind := jc.Controller.GetAPIGroupVersionKind().Kind - expectationPodsKey := expectation.GenExpectationPodsKey(jobKey, rt) - - // Convert ReplicaType to lower string. - logger := commonutil.LoggerForReplica(metaObject, rt) - // Get all pods for the type rt. - pods, err = jc.FilterPodsForReplicaType(pods, rt) - if err != nil { - return err - } - numReplicas := int(*spec.Replicas) - var masterRole bool - - initializeReplicaStatuses(jobStatus, rType) - - // GetPodSlices will return enough information here to make decision to add/remove/update resources. - // - // For example, let's assume we have pods with replica-index 0, 1, 2 - // If replica is 4, return a slice with size 4. [[0],[1],[2],[]], a pod with replica-index 3 will be created. - // - // If replica is 1, return a slice with size 3. [[0],[1],[2]], pod with replica-index 1 and 2 are out of range and will be deleted. - podSlices := jc.GetPodSlices(pods, numReplicas, logger) - for index, podSlice := range podSlices { - if len(podSlice) > 1 { - logger.Warningf("We have too many pods for %s %d", rt, index) - } else if len(podSlice) == 0 { - logger.Infof("Need to create new pod: %s-%d", rt, index) - - // check if this replica is the master role - masterRole = jc.Controller.IsMasterRole(replicas, rType, index) - err = jc.createNewPod(job, rt, index, spec, masterRole, replicas) - if err != nil { - return err - } - } else { - // Check the status of the current pod. - pod := podSlice[0] - - // check if the index is in the valid range, if not, we should kill the pod - if index < 0 || index >= numReplicas { - err = jc.PodControl.DeletePod(pod.Namespace, pod.Name, runtimeObject) - if err != nil { - return err - } - // Deletion is expected - jc.Expectations.RaiseExpectations(expectationPodsKey, 0, 1) - } - - // Get the exit code of the container. - var exitCode int32 = 0xbeef // magic number - for _, status := range pod.Status.ContainerStatuses { - state := status.State - if status.Name == jc.Controller.GetDefaultContainerName() && state.Terminated != nil { - exitCode = state.Terminated.ExitCode - logger.Infof("Pod: %v.%v exited with code %v", pod.Namespace, pod.Name, exitCode) - jc.Recorder.Eventf(runtimeObject, v1.EventTypeNormal, exitedWithCodeReason, "Pod: %v.%v exited with code %v", pod.Namespace, pod.Name, exitCode) - } - } - // Check if the pod is retryable. - if pod.Status.Phase == v1.PodFailed { - failedPodsCount.Inc() - if spec.RestartPolicy == apiv1.RestartPolicyExitCode && trainutil.IsRetryableExitCode(exitCode) || - spec.RestartPolicy == apiv1.RestartPolicyOnFailure || - spec.RestartPolicy == apiv1.RestartPolicyAlways { - logger.Infof("Need to restart the pod: %v.%v", pod.Namespace, pod.Name) - if err := jc.PodControl.DeletePod(pod.Namespace, pod.Name, runtimeObject); err != nil { - return err - } - // Deletion is expected - jc.Expectations.RaiseExpectations(expectationPodsKey, 0, 1) - - msg := fmt.Sprintf("job %s is restarting because %s replica(s) failed.", - metaObject.GetName(), rType) - jc.Recorder.Event(runtimeObject, v1.EventTypeWarning, commonutil.NewReason(jobKind, commonutil.JobRestartingReason), msg) - commonutil.UpdateJobConditions(jobStatus, apiv1.JobRestarting, v1.ConditionTrue, commonutil.NewReason(jobKind, commonutil.JobRestartingReason), msg) - trainingoperatorcommon.RestartedJobsCounterInc(metaObject.GetNamespace(), jc.Controller.GetFrameworkName()) - } else if spec.RestartPolicy == apiv1.RestartPolicyExitCode && !trainutil.IsRetryableExitCode(exitCode) { - logger.Infof("Pod %q has a non-retryable exit code. Failing job.", klog.KObj(pod)) - msg := fmt.Sprintf("job %q is failing because %q replica(s) failed.", - metaObject.GetName(), rType) - jc.Recorder.Event(runtimeObject, v1.EventTypeWarning, commonutil.NewReason(jobKind, commonutil.JobFailedReason), msg) - commonutil.UpdateJobConditions(jobStatus, apiv1.JobFailed, v1.ConditionTrue, commonutil.NewReason(jobKind, commonutil.JobFailedReason), msg) - } - } - - updateJobReplicaStatuses(jobStatus, rType, pod) - } - } - return nil -} - -// createNewPod creates a new pod for the given index and type. -func (jc *JobController) createNewPod(job interface{}, rt string, index int, spec *apiv1.ReplicaSpec, masterRole bool, - replicas map[apiv1.ReplicaType]*apiv1.ReplicaSpec) error { - - metaObject, ok := job.(metav1.Object) - if !ok { - return fmt.Errorf("job is not a metav1.Object type") - } - runtimeObject, ok := job.(runtime.Object) - if !ok { - return fmt.Errorf("job is not a runtime.Object type") - } - jobKey, err := KeyFunc(metaObject) - if err != nil { - utilruntime.HandleError(fmt.Errorf("couldn't get key for job object %#v: %v", job, err)) - return err - } - logger := commonutil.LoggerForReplica(metaObject, rt) - - // Set type and index for the worker. - labels := jc.GenLabels(metaObject.GetName()) - utillabels.SetReplicaType(labels, rt) - utillabels.SetReplicaIndex(labels, index) - - if masterRole { - utillabels.SetJobRole(labels, "master") - } - - podTemplate := spec.Template.DeepCopy() - - idxStr := strconv.Itoa(index) - // Set name for the template. - podTemplate.Name = GenGeneralName(metaObject.GetName(), rt, idxStr) - - if podTemplate.Labels == nil { - podTemplate.Labels = make(map[string]string) - } - - for key, value := range labels { - podTemplate.Labels[key] = value - } - - if err := jc.Controller.SetClusterSpec(job, podTemplate, rt, idxStr); err != nil { - return err - } - - // Submit a warning event if the user specifies restart policy for - // the pod template. We recommend to set it from the replica level. - if podTemplate.Spec.RestartPolicy != v1.RestartPolicy("") { - errMsg := "Restart policy in pod template will be overwritten by restart policy in replica spec" - logger.Warning(errMsg) - jc.Recorder.Event(runtimeObject, v1.EventTypeWarning, podTemplateRestartPolicyReason, errMsg) - } - core.SetRestartPolicy(podTemplate, spec) - - // if gang-scheduling is enabled: - // 1. if user has specified other scheduler, we report a warning without overriding any fields. - // 2. if no SchedulerName is set for pods, we set the SchedulerName to gang-scheduler-name. - if jc.Config.EnableGangScheduling() { - if isCustomSchedulerSet(replicas, jc.PodGroupControl.GetSchedulerName()) { - errMsg := "Another scheduler is specified when gang-scheduling is enabled and it will not be overwritten" - logger.Warning(errMsg) - jc.Recorder.Event(runtimeObject, v1.EventTypeWarning, podTemplateSchedulerNameReason, errMsg) - } - jc.PodGroupControl.DecoratePodTemplateSpec(podTemplate, metaObject, rt) - } - - // Creation is expected when there is no error returned - // We use `RaiseExpectations` here to accumulate expectations since `SetExpectations` has no such kind of ability - expectationPodsKey := expectation.GenExpectationPodsKey(jobKey, rt) - jc.Expectations.RaiseExpectations(expectationPodsKey, 1, 0) - - controllerRef := jc.GenOwnerReference(metaObject) - err = jc.PodControl.CreatePodsWithControllerRef(metaObject.GetNamespace(), podTemplate, runtimeObject, controllerRef) - if err != nil && errors.IsTimeout(err) { - // Pod is created but its initialization has timed out. - // If the initialization is successful eventually, the - // controller will observe the creation via the informer. - // If the initialization fails, or if the pod keeps - // uninitialized for a long time, the informer will not - // receive any update, and the controller will create a new - // pod when the expectation expires. - return nil - } else if err != nil { - // Since error occurred(the informer won't observe this pod), - // we decrement the expected number of creates - // and wait until next reconciliation - jc.Expectations.CreationObserved(expectationPodsKey) - return err - } - createdPodsCount.Inc() - return nil -} - -func isCustomSchedulerSet(replicas map[apiv1.ReplicaType]*apiv1.ReplicaSpec, gangSchedulerName string) bool { - for _, spec := range replicas { - if spec.Template.Spec.SchedulerName != "" && spec.Template.Spec.SchedulerName != gangSchedulerName { - return true - } - } - return false -} diff --git a/pkg/controller.v1/common/pod_test.go b/pkg/controller.v1/common/pod_test.go deleted file mode 100644 index 6a23810604..0000000000 --- a/pkg/controller.v1/common/pod_test.go +++ /dev/null @@ -1,228 +0,0 @@ -// Copyright 2018 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package common - -import ( - "testing" - - apiv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - "github.com/kubeflow/training-operator/pkg/core" - - "github.com/stretchr/testify/assert" - v1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" -) - -func TestSetRestartPolicy(t *testing.T) { - testCases := map[string]struct { - replicaSpec *apiv1.ReplicaSpec - expectedRestartPolicy v1.RestartPolicy - }{ - "restartPolicy is ExitCode": { - replicaSpec: &apiv1.ReplicaSpec{ - RestartPolicy: apiv1.RestartPolicyExitCode, - }, - expectedRestartPolicy: v1.RestartPolicyNever, - }, - "restartPolicy is Never": { - replicaSpec: &apiv1.ReplicaSpec{ - RestartPolicy: apiv1.RestartPolicyNever, - }, - expectedRestartPolicy: v1.RestartPolicyNever, - }, - "restartPolicy is Always": { - replicaSpec: &apiv1.ReplicaSpec{ - RestartPolicy: apiv1.RestartPolicyAlways, - }, - expectedRestartPolicy: v1.RestartPolicyAlways, - }, - "restartPolicy is OnFailure": { - replicaSpec: &apiv1.ReplicaSpec{ - RestartPolicy: apiv1.RestartPolicyOnFailure, - }, - expectedRestartPolicy: v1.RestartPolicyOnFailure, - }, - } - for name, tc := range testCases { - t.Run(name, func(t *testing.T) { - podTemplate := &tc.replicaSpec.Template - core.SetRestartPolicy(podTemplate, tc.replicaSpec) - if podTemplate.Spec.RestartPolicy != tc.expectedRestartPolicy { - t.Errorf("Unexpected restartPolicy from SetRetartPolicy:\nwant:%v\ngot:%v\n", tc.expectedRestartPolicy, podTemplate.Spec.RestartPolicy) - } - }) - } -} - -func TestIsCustomSchedulerSet(t *testing.T) { - testCases := map[string]struct { - replicaSpecs map[apiv1.ReplicaType]*apiv1.ReplicaSpec - gangSchedulerName string - want bool - }{ - "replicaSpecs aren't set custom schedulerName": { - replicaSpecs: map[apiv1.ReplicaType]*apiv1.ReplicaSpec{ - apiv1.ReplicaType("A"): {}, - apiv1.ReplicaType("B"): {}, - }, - gangSchedulerName: "alpha", - want: false, - }, - "all replicaSpecs are set custom schedulerName": { - replicaSpecs: map[apiv1.ReplicaType]*apiv1.ReplicaSpec{ - apiv1.ReplicaType("A"): { - Template: v1.PodTemplateSpec{ - Spec: v1.PodSpec{ - SchedulerName: "custom-a", - }, - }, - }, - apiv1.ReplicaType("B"): { - Template: v1.PodTemplateSpec{ - Spec: v1.PodSpec{ - SchedulerName: "custom-b", - }, - }, - }, - }, - gangSchedulerName: "beta", - want: true, - }, - "one of replicaSpecs is set custom schedulerName": { - replicaSpecs: map[apiv1.ReplicaType]*apiv1.ReplicaSpec{ - apiv1.ReplicaType("A"): {}, - apiv1.ReplicaType("B"): { - Template: v1.PodTemplateSpec{ - Spec: v1.PodSpec{ - SchedulerName: "custom-b", - }, - }, - }, - }, - gangSchedulerName: "gamma", - want: true, - }, - } - for name, tc := range testCases { - t.Run(name, func(t *testing.T) { - got := isCustomSchedulerSet(tc.replicaSpecs, tc.gangSchedulerName) - if tc.want != got { - t.Errorf("Unexpected value from isCustomSchedulerSet:\nwant:%v\ngot:%v\n", tc.want, got) - } - }) - } -} - -func TestCalculatePodSliceSize(t *testing.T) { - type testCase struct { - pods []*v1.Pod - replicas int - expectedSize int - } - - pods := []*v1.Pod{ - { - ObjectMeta: metav1.ObjectMeta{ - Labels: map[string]string{apiv1.ReplicaIndexLabel: "0"}, - }, - }, - { - ObjectMeta: metav1.ObjectMeta{ - Labels: map[string]string{apiv1.ReplicaIndexLabel: "1"}, - }, - }, - { - ObjectMeta: metav1.ObjectMeta{ - Labels: map[string]string{apiv1.ReplicaIndexLabel: "2"}, - }, - }, - } - - var testCases = []testCase{ - { - pods: pods, - replicas: 3, - expectedSize: 3, - }, - { - pods: pods, - replicas: 4, - expectedSize: 4, - }, - { - pods: pods, - replicas: 2, - expectedSize: 3, - }, - { - pods: append(pods, &v1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Labels: map[string]string{apiv1.ReplicaIndexLabel: "4"}, - }, - }), - replicas: 3, - expectedSize: 5, - }, - } - - for _, tc := range testCases { - result := core.CalculatePodSliceSize(tc.pods, tc.replicas) - assert.Equal(t, tc.expectedSize, result) - } -} - -func TestFilterPodsForReplicaType(t *testing.T) { - pods := []*v1.Pod{ - { - ObjectMeta: metav1.ObjectMeta{ - Name: "a", - Labels: map[string]string{apiv1.ReplicaTypeLabel: "foo"}, - }, - }, - { - ObjectMeta: metav1.ObjectMeta{ - Name: "b", - Labels: map[string]string{apiv1.ReplicaTypeLabel: "bar"}, - }, - }, - { - ObjectMeta: metav1.ObjectMeta{ - Name: "c", - Labels: map[string]string{apiv1.ReplicaTypeLabel: "foo"}, - }, - }, - { - ObjectMeta: metav1.ObjectMeta{ - Name: "d", - Labels: map[string]string{apiv1.ReplicaTypeLabel: "bar"}, - }, - }, - { - ObjectMeta: metav1.ObjectMeta{ - Name: "e", - Labels: map[string]string{ - apiv1.ReplicaTypeLabel: "foo", - }, - }, - }, - } - c := &JobController{} - got, err := c.FilterPodsForReplicaType(pods, "foo") - if err != nil { - t.Fatalf("FilterPodsForReplicaType returned error: %v", err) - } - want := []*v1.Pod{pods[0], pods[2], pods[4]} - assert.Equal(t, want, got) -} diff --git a/pkg/controller.v1/common/scheduling.go b/pkg/controller.v1/common/scheduling.go deleted file mode 100644 index 05bd54e062..0000000000 --- a/pkg/controller.v1/common/scheduling.go +++ /dev/null @@ -1,93 +0,0 @@ -/* -Copyright 2023 The Kubeflow Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package common - -import ( - "fmt" - - "github.com/google/go-cmp/cmp" - log "github.com/sirupsen/logrus" - k8serrors "k8s.io/apimachinery/pkg/api/errors" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/klog/v2" - "sigs.k8s.io/controller-runtime/pkg/client" -) - -type FillPodGroupSpecFunc func(object metav1.Object) error - -func (jc *JobController) SyncPodGroup(job metav1.Object, specFunc FillPodGroupSpecFunc) (metav1.Object, error) { - pgctl := jc.PodGroupControl - - // Check whether podGroup exists or not - podGroup, err := pgctl.GetPodGroup(job.GetNamespace(), job.GetName()) - if err == nil { - // update podGroup for gang scheduling - oldPodGroup := &podGroup - if err = specFunc(podGroup); err != nil { - return nil, fmt.Errorf("unable to fill the spec of PodGroup, '%v': %v", klog.KObj(podGroup), err) - } - if diff := cmp.Diff(oldPodGroup, podGroup); len(diff) != 0 { - return podGroup, pgctl.UpdatePodGroup(podGroup.(client.Object)) - } - return podGroup, nil - } else if client.IgnoreNotFound(err) != nil { - return nil, fmt.Errorf("unable to get a PodGroup: %v", err) - } else { - // create podGroup for gang scheduling - newPodGroup := pgctl.NewEmptyPodGroup() - newPodGroup.SetName(job.GetName()) - newPodGroup.SetNamespace(job.GetNamespace()) - newPodGroup.SetAnnotations(job.GetAnnotations()) - newPodGroup.SetOwnerReferences([]metav1.OwnerReference{*jc.GenOwnerReference(job)}) - if err = specFunc(newPodGroup); err != nil { - return nil, fmt.Errorf("unable to fill the spec of PodGroup, '%v': %v", klog.KObj(newPodGroup), err) - } - - err = pgctl.CreatePodGroup(newPodGroup) - if err != nil { - return podGroup, fmt.Errorf("unable to create PodGroup: %v", err) - } - createdPodGroupsCount.Inc() - } - - createdPodGroup, err := pgctl.GetPodGroup(job.GetNamespace(), job.GetName()) - if err != nil { - return nil, fmt.Errorf("unable to get PodGroup after success creation: %v", err) - } - - return createdPodGroup, nil -} - -func (jc *JobController) DeletePodGroup(job metav1.Object) error { - pgctl := jc.PodGroupControl - - // Check whether podGroup exists or not - _, err := pgctl.GetPodGroup(job.GetNamespace(), job.GetName()) - if err != nil && k8serrors.IsNotFound(err) { - return nil - } - - log.Infof("Deleting PodGroup %s", job.GetName()) - - // Delete podGroup - err = pgctl.DeletePodGroup(job.GetNamespace(), job.GetName()) - if err != nil { - return fmt.Errorf("unable to delete PodGroup: %v", err) - } - deletedPodGroupsCount.Inc() - return nil -} diff --git a/pkg/controller.v1/common/service.go b/pkg/controller.v1/common/service.go deleted file mode 100644 index 0cc32f3dfc..0000000000 --- a/pkg/controller.v1/common/service.go +++ /dev/null @@ -1,273 +0,0 @@ -// Copyright 2019 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package common - -import ( - "fmt" - "strconv" - "strings" - - apiv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - "github.com/kubeflow/training-operator/pkg/controller.v1/control" - "github.com/kubeflow/training-operator/pkg/controller.v1/expectation" - "github.com/kubeflow/training-operator/pkg/core" - commonutil "github.com/kubeflow/training-operator/pkg/util" - utillabels "github.com/kubeflow/training-operator/pkg/util/labels" - - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promauto" - log "github.com/sirupsen/logrus" - v1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/errors" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/labels" - "k8s.io/apimachinery/pkg/runtime" - utilruntime "k8s.io/apimachinery/pkg/util/runtime" -) - -var ( - succeededServiceCreationCount = promauto.NewCounter(prometheus.CounterOpts{ - Name: "succeeded_service_creation_total", - Help: "The total number of succeeded service creation", - }) - failedServiceCreationCount = promauto.NewCounter(prometheus.CounterOpts{ - Name: "failed_service_creation_total", - Help: "The total number of failed service creation", - }) -) - -// When a service is created, enqueue the controller that manages it and update its expectations. -func (jc *JobController) AddService(obj interface{}) { - service := obj.(*v1.Service) - if service.DeletionTimestamp != nil { - // on a restart of the controller controller, it's possible a new service shows up in a state that - // is already pending deletion. Prevent the service from being a creation observation. - // tc.deleteService(service) - return - } - - // If it has a ControllerRef, that's all that matters. - if controllerRef := metav1.GetControllerOf(service); controllerRef != nil { - job := jc.resolveControllerRef(service.Namespace, controllerRef) - if job == nil { - return - } - - jobKey, err := KeyFunc(job) - if err != nil { - return - } - - rType, err := utillabels.ReplicaType(service.Labels) - if err != nil { - log.Infof("This service maybe not created by %v", jc.Controller.ControllerName()) - return - } - - expectationServicesKey := expectation.GenExpectationServicesKey(jobKey, string(rType)) - - jc.Expectations.CreationObserved(expectationServicesKey) - // TODO: we may need add backoff here - jc.WorkQueue.Add(jobKey) - - return - } - -} - -// When a service is updated, figure out what job/s manage it and wake them up. -// If the labels of the service have changed we need to awaken both the old -// and new replica set. old and cur must be *v1.Service types. -func (jc *JobController) UpdateService(old, cur interface{}) { - // TODO(CPH): handle this gracefully. -} - -// When a service is deleted, enqueue the job that manages the service and update its expectations. -// obj could be an *v1.Service, or a DeletionFinalStateUnknown marker item. -func (jc *JobController) DeleteService(obj interface{}) { - // TODO(CPH): handle this gracefully. -} - -// GetServicesForJob returns the set of services that this job should manage. -// It also reconciles ControllerRef by adopting/orphaning. -// Note that the returned services are pointers into the cache. -func (jc *JobController) GetServicesForJob(jobObject interface{}) ([]*v1.Service, error) { - job, ok := jobObject.(metav1.Object) - if !ok { - return nil, fmt.Errorf("job is not of type metav1.Object") - } - - // Create selector - selector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{ - MatchLabels: jc.GenLabels(job.GetName()), - }) - - if err != nil { - return nil, fmt.Errorf("couldn't convert Job selector: %v", err) - } - // List all services to include those that don't match the selector anymore - // but have a ControllerRef pointing to this controller. - services, err := jc.ServiceLister.Services(job.GetNamespace()).List(labels.Everything()) - if err != nil { - return nil, err - } - - // If any adoptions are attempted, we should first recheck for deletion - // with an uncached quorum read sometime after listing services (see #42639). - canAdoptFunc := RecheckDeletionTimestamp(func() (metav1.Object, error) { - fresh, err := jc.Controller.GetJobFromInformerCache(job.GetNamespace(), job.GetName()) - if err != nil { - return nil, err - } - if fresh.GetUID() != job.GetUID() { - return nil, fmt.Errorf("original Job %v/%v is gone: got uid %v, wanted %v", job.GetNamespace(), job.GetName(), fresh.GetUID(), job.GetUID()) - } - return fresh, nil - }) - cm := control.NewServiceControllerRefManager(jc.ServiceControl, job, selector, jc.Controller.GetAPIGroupVersionKind(), canAdoptFunc) - return cm.ClaimServices(services) -} - -// FilterServicesForReplicaType returns service belong to a replicaType. -func (jc *JobController) FilterServicesForReplicaType(services []*v1.Service, replicaType string) ([]*v1.Service, error) { - return core.FilterServicesForReplicaType(services, replicaType) -} - -// GetServiceSlices returns a slice, which element is the slice of service. -// Assume the return object is serviceSlices, then serviceSlices[i] is an -// array of pointers to services corresponding to Services for replica i. -func (jc *JobController) GetServiceSlices(services []*v1.Service, replicas int, logger *log.Entry) [][]*v1.Service { - return core.GetServiceSlices(services, replicas, logger) -} - -// ReconcileServices checks and updates services for each given ReplicaSpec. -// It will requeue the job in case of an error while creating/deleting services. -func (jc *JobController) ReconcileServices( - job metav1.Object, - services []*v1.Service, - rtype apiv1.ReplicaType, - spec *apiv1.ReplicaSpec) error { - - // Convert ReplicaType to lower string. - rt := strings.ToLower(string(rtype)) - replicas := int(*spec.Replicas) - // Get all services for the type rt. - services, err := jc.FilterServicesForReplicaType(services, rt) - if err != nil { - return err - } - - // GetServiceSlices will return enough information here to make decision to add/remove/update resources. - // - // For example, let's assume we have services with replica-index 0, 1, 2 - // If replica is 4, return a slice with size 4. [[0],[1],[2],[]], a svc with replica-index 3 will be created. - // - // If replica is 1, return a slice with size 3. [[0],[1],[2]], svc with replica-index 1 and 2 are out of range and will be deleted. - serviceSlices := jc.GetServiceSlices(services, replicas, commonutil.LoggerForReplica(job, rt)) - - for index, serviceSlice := range serviceSlices { - if len(serviceSlice) > 1 { - commonutil.LoggerForReplica(job, rt).Warningf("We have too many services for %s %d", rtype, index) - } else if len(serviceSlice) == 0 { - commonutil.LoggerForReplica(job, rt).Infof("need to create new service: %s-%d", rtype, index) - err = jc.CreateNewService(job, rtype, spec, strconv.Itoa(index)) - if err != nil { - return err - } - } else { - // Check the status of the current svc. - svc := serviceSlice[0] - - // check if the index is in the valid range, if not, we should kill the svc - if index < 0 || index >= replicas { - err = jc.ServiceControl.DeleteService(svc.Namespace, svc.Name, job.(runtime.Object)) - if err != nil { - return err - } - } - } - } - return nil -} - -// GetPortsFromJob gets the ports of job container. Port could be nil, if distributed communication strategy doesn't need and no other ports that need to be exposed. -func (jc *JobController) GetPortsFromJob(spec *apiv1.ReplicaSpec) (map[string]int32, error) { - return core.GetPortsFromJob(spec, jc.Controller.GetDefaultContainerName()) -} - -// CreateNewService creates a new service for the given index and type. -func (jc *JobController) CreateNewService(job metav1.Object, rtype apiv1.ReplicaType, - spec *apiv1.ReplicaSpec, index string) error { - jobKey, err := KeyFunc(job) - if err != nil { - utilruntime.HandleError(fmt.Errorf("couldn't get key for job object %#v: %v", job, err)) - return err - } - - rt := strings.ToLower(string(rtype)) - labels := jc.GenLabels(job.GetName()) - utillabels.SetReplicaType(labels, rt) - utillabels.SetReplicaIndexStr(labels, index) - - ports, err := jc.GetPortsFromJob(spec) - if err != nil { - return err - } - - service := &v1.Service{ - Spec: v1.ServiceSpec{ - ClusterIP: "None", - Selector: labels, - Ports: []v1.ServicePort{}, - }, - } - - // Add service ports to headless service - for name, port := range ports { - svcPort := v1.ServicePort{Name: name, Port: port} - service.Spec.Ports = append(service.Spec.Ports, svcPort) - } - - service.Name = GenGeneralName(job.GetName(), rt, index) - service.Labels = labels - // Create OwnerReference. - controllerRef := jc.GenOwnerReference(job) - - // Creation is expected when there is no error returned - expectationServicesKey := expectation.GenExpectationServicesKey(jobKey, rt) - jc.Expectations.RaiseExpectations(expectationServicesKey, 1, 0) - - err = jc.ServiceControl.CreateServicesWithControllerRef(job.GetNamespace(), service, job.(runtime.Object), controllerRef) - if err != nil && errors.IsTimeout(err) { - // Service is created but its initialization has timed out. - // If the initialization is successful eventually, the - // controller will observe the creation via the informer. - // If the initialization fails, or if the service keeps - // uninitialized for a long time, the informer will not - // receive any update, and the controller will create a new - // service when the expectation expires. - succeededServiceCreationCount.Inc() - return nil - } else if err != nil { - // Since error occurred(the informer won't observe this service), - // we decrement the expected number of creates - // and wait until next reconciliation - jc.Expectations.CreationObserved(expectationServicesKey) - failedServiceCreationCount.Inc() - return err - } - succeededServiceCreationCount.Inc() - return nil -} diff --git a/pkg/controller.v1/common/service_test.go b/pkg/controller.v1/common/service_test.go deleted file mode 100644 index 0a8ec62a84..0000000000 --- a/pkg/controller.v1/common/service_test.go +++ /dev/null @@ -1,114 +0,0 @@ -package common - -import ( - "testing" - - "github.com/kubeflow/training-operator/pkg/core" - - apiv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - "github.com/stretchr/testify/assert" - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" -) - -func TestCalculateServiceSliceSize(t *testing.T) { - type testCase struct { - services []*corev1.Service - replicas int - expectedSize int - } - - services := []*corev1.Service{ - { - ObjectMeta: metav1.ObjectMeta{ - Labels: map[string]string{apiv1.ReplicaIndexLabel: "0"}, - }, - }, - { - ObjectMeta: metav1.ObjectMeta{ - Labels: map[string]string{apiv1.ReplicaIndexLabel: "1"}, - }, - }, - { - ObjectMeta: metav1.ObjectMeta{ - Labels: map[string]string{apiv1.ReplicaIndexLabel: "2"}, - }, - }, - } - - var testCases = []testCase{ - { - services: services, - replicas: 3, - expectedSize: 3, - }, - { - services: services, - replicas: 4, - expectedSize: 4, - }, - { - services: services, - replicas: 2, - expectedSize: 3, - }, - { - services: append(services, &corev1.Service{ - ObjectMeta: metav1.ObjectMeta{ - Labels: map[string]string{apiv1.ReplicaIndexLabel: "4"}, - }, - }), - replicas: 3, - expectedSize: 5, - }, - } - - for _, tc := range testCases { - result := core.CalculateServiceSliceSize(tc.services, tc.replicas) - assert.Equal(t, tc.expectedSize, result) - } -} - -func TestFilterServicesForReplicaType(t *testing.T) { - services := []*corev1.Service{ - { - ObjectMeta: metav1.ObjectMeta{ - Name: "a", - Labels: map[string]string{apiv1.ReplicaTypeLabel: "foo"}, - }, - }, - { - ObjectMeta: metav1.ObjectMeta{ - Name: "b", - Labels: map[string]string{apiv1.ReplicaTypeLabel: "bar"}, - }, - }, - { - ObjectMeta: metav1.ObjectMeta{ - Name: "c", - Labels: map[string]string{apiv1.ReplicaTypeLabel: "foo"}, - }, - }, - { - ObjectMeta: metav1.ObjectMeta{ - Name: "d", - Labels: map[string]string{apiv1.ReplicaTypeLabel: "bar"}, - }, - }, - { - ObjectMeta: metav1.ObjectMeta{ - Name: "e", - Labels: map[string]string{ - apiv1.ReplicaTypeLabel: "foo", - }, - }, - }, - } - c := &JobController{} - got, err := c.FilterServicesForReplicaType(services, "foo") - if err != nil { - t.Fatalf("FilterPodsForReplicaType returned error: %v", err) - } - want := []*corev1.Service{services[0], services[2], services[4]} - assert.Equal(t, want, got) -} diff --git a/pkg/controller.v1/common/status.go b/pkg/controller.v1/common/status.go deleted file mode 100644 index e2dfc6907d..0000000000 --- a/pkg/controller.v1/common/status.go +++ /dev/null @@ -1,17 +0,0 @@ -package common - -import ( - apiv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - "github.com/kubeflow/training-operator/pkg/core" - corev1 "k8s.io/api/core/v1" -) - -// initializeReplicaStatuses initializes the ReplicaStatuses for replica. -func initializeReplicaStatuses(jobStatus *apiv1.JobStatus, rtype apiv1.ReplicaType) { - core.InitializeReplicaStatuses(jobStatus, rtype) -} - -// updateJobReplicaStatuses updates the JobReplicaStatuses according to the pod. -func updateJobReplicaStatuses(jobStatus *apiv1.JobStatus, rtype apiv1.ReplicaType, pod *corev1.Pod) { - core.UpdateJobReplicaStatuses(jobStatus, rtype, pod) -} diff --git a/pkg/controller.v1/common/status_test.go b/pkg/controller.v1/common/status_test.go deleted file mode 100644 index 3a7f1a4a49..0000000000 --- a/pkg/controller.v1/common/status_test.go +++ /dev/null @@ -1,49 +0,0 @@ -package common - -import ( - "testing" - "time" - - apiv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - "github.com/stretchr/testify/assert" - corev1 "k8s.io/api/core/v1" - metaV1 "k8s.io/apimachinery/pkg/apis/meta/v1" -) - -func TestUpdateJobReplicaStatuses(t *testing.T) { - jobStatus := apiv1.JobStatus{} - initializeReplicaStatuses(&jobStatus, "worker") - _, ok := jobStatus.ReplicaStatuses["worker"] - // assert ReplicaStatus for "worker" exists - assert.True(t, ok) - setStatusForTest(&jobStatus, "worker", 2, 3, 1, 1) - // terminating pod should count as failed. - assert.Equal(t, jobStatus.ReplicaStatuses["worker"].Failed, int32(3)) - assert.Equal(t, jobStatus.ReplicaStatuses["worker"].Succeeded, int32(3)) - assert.Equal(t, jobStatus.ReplicaStatuses["worker"].Active, int32(1)) -} - -func setStatusForTest(jobStatus *apiv1.JobStatus, rtype apiv1.ReplicaType, failed, succeeded, active, terminating int32) { - pod := corev1.Pod{ - Status: corev1.PodStatus{}, - } - var i int32 - for i = 0; i < failed; i++ { - pod.Status.Phase = corev1.PodFailed - updateJobReplicaStatuses(jobStatus, rtype, &pod) - } - for i = 0; i < succeeded; i++ { - pod.Status.Phase = corev1.PodSucceeded - updateJobReplicaStatuses(jobStatus, rtype, &pod) - } - for i = 0; i < active; i++ { - pod.Status.Phase = corev1.PodRunning - updateJobReplicaStatuses(jobStatus, rtype, &pod) - } - for i = 0; i < terminating; i++ { - pod.Status.Phase = corev1.PodRunning - deletionTimestamp := metaV1.NewTime(time.Now()) - pod.DeletionTimestamp = &deletionTimestamp - updateJobReplicaStatuses(jobStatus, rtype, &pod) - } -} diff --git a/pkg/controller.v1/common/util.go b/pkg/controller.v1/common/util.go deleted file mode 100644 index a3f23aa12c..0000000000 --- a/pkg/controller.v1/common/util.go +++ /dev/null @@ -1,145 +0,0 @@ -// Copyright 2019 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package common - -import ( - "fmt" - "sort" - "strings" - - apiv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - log "github.com/sirupsen/logrus" - v1 "k8s.io/api/core/v1" - schedulingv1 "k8s.io/api/scheduling/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" -) - -// ReplicasPriority is a slice of ReplicaPriority. -type ReplicasPriority []ReplicaPriority - -type ReplicaPriority struct { - priority int32 - - apiv1.ReplicaSpec -} - -func (p ReplicasPriority) Len() int { - return len(p) -} - -func (p ReplicasPriority) Less(i, j int) bool { - return p[i].priority > p[j].priority -} - -func (p ReplicasPriority) Swap(i, j int) { - p[i], p[j] = p[j], p[i] -} - -func GenGeneralName(jobName string, rtype string, index string) string { - n := jobName + "-" + strings.ToLower(rtype) + "-" + index - return strings.Replace(n, "/", "-", -1) -} - -// RecheckDeletionTimestamp returns a CanAdopt() function to recheck deletion. -// -// The CanAdopt() function calls getObject() to fetch the latest value, -// and denies adoption attempts if that object has a non-nil DeletionTimestamp. -func RecheckDeletionTimestamp(getObject func() (metav1.Object, error)) func() error { - return func() error { - obj, err := getObject() - if err != nil { - return fmt.Errorf("can't recheck DeletionTimestamp: %v", err) - } - if obj.GetDeletionTimestamp() != nil { - return fmt.Errorf("%v/%v has just been deleted at %v", obj.GetNamespace(), obj.GetName(), obj.GetDeletionTimestamp()) - } - return nil - } -} - -func MaxInt(x, y int) int { - if x < y { - return y - } - return x -} - -func AddResourceList(list, req, limit v1.ResourceList) { - for name, quantity := range req { - - if value, ok := list[name]; !ok { - list[name] = quantity.DeepCopy() - } else { - value.Add(quantity) - list[name] = value - } - } - - if req != nil { - return - } - - // If Requests is omitted for a container, - // it defaults to Limits if that is explicitly specified. - for name, quantity := range limit { - if value, ok := list[name]; !ok { - list[name] = quantity.DeepCopy() - } else { - value.Add(quantity) - list[name] = value - } - } -} - -type PriorityClassGetFunc func(string) (*schedulingv1.PriorityClass, error) - -func CalcPGMinResources(minMember int32, replicas map[apiv1.ReplicaType]*apiv1.ReplicaSpec, pcGetFunc PriorityClassGetFunc) *v1.ResourceList { - var replicasPriority ReplicasPriority - for t, replica := range replicas { - rp := ReplicaPriority{0, *replica} - pc := replica.Template.Spec.PriorityClassName - - priorityClass, err := pcGetFunc(pc) - if err != nil || priorityClass == nil { - log.Warnf("Ignore task %s priority class %s: %v", t, pc, err) - } else { - rp.priority = priorityClass.Value - } - - replicasPriority = append(replicasPriority, rp) - } - - sort.Sort(replicasPriority) - - minAvailableTasksRes := v1.ResourceList{} - podCnt := int32(0) - for _, task := range replicasPriority { - if task.Replicas == nil { - continue - } - - for i := int32(0); i < *task.Replicas; i++ { - if podCnt >= minMember { - break - } - podCnt++ - for _, c := range task.Template.Spec.Containers { - AddResourceList(minAvailableTasksRes, c.Resources.Requests, c.Resources.Limits) - } - } - } - - return &minAvailableTasksRes -} diff --git a/pkg/controller.v1/common/util_test.go b/pkg/controller.v1/common/util_test.go deleted file mode 100644 index 1ef80005e0..0000000000 --- a/pkg/controller.v1/common/util_test.go +++ /dev/null @@ -1,82 +0,0 @@ -// Copyright 2019 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package common - -import ( - "testing" - - "github.com/stretchr/testify/assert" - - apiv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" -) - -func TestGenGeneralName(t *testing.T) { - tcs := []struct { - index string - key string - replicaType apiv1.ReplicaType - expectedName string - }{ - { - index: "1", - key: "1/2/3/4/5", - replicaType: "worker", - expectedName: "1-2-3-4-5-worker-1", - }, - { - index: "1", - key: "1/2/3/4/5", - replicaType: "WORKER", - expectedName: "1-2-3-4-5-worker-1", - }, - } - - for _, tc := range tcs { - actual := GenGeneralName(tc.key, string(tc.replicaType), tc.index) - if actual != tc.expectedName { - t.Errorf("Expected name %s, got %s", tc.expectedName, actual) - } - } -} - -func TestMaxInt(t *testing.T) { - type testCase struct { - x int - y int - expectedMax int - } - var testCases = []testCase{ - { - x: 10, - y: 20, - expectedMax: 20, - }, - { - x: 20, - y: 10, - expectedMax: 20, - }, - { - x: 5, - y: 5, - expectedMax: 5, - }, - } - - for _, tc := range testCases { - result := MaxInt(tc.x, tc.y) - assert.Equal(t, tc.expectedMax, result) - } -} diff --git a/pkg/controller.v1/control/controller_ref_manager.go b/pkg/controller.v1/control/controller_ref_manager.go deleted file mode 100644 index 952965297a..0000000000 --- a/pkg/controller.v1/control/controller_ref_manager.go +++ /dev/null @@ -1,380 +0,0 @@ -// Copyright 2019 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package control - -import ( - "fmt" - "sync" - - commonutil "github.com/kubeflow/training-operator/pkg/util" - log "github.com/sirupsen/logrus" - - v1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/errors" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/labels" - "k8s.io/apimachinery/pkg/runtime/schema" - utilerrors "k8s.io/apimachinery/pkg/util/errors" -) - -type BaseControllerRefManager struct { - Controller metav1.Object - Selector labels.Selector - - canAdoptErr error - canAdoptOnce sync.Once - CanAdoptFunc func() error -} - -func (m *BaseControllerRefManager) CanAdopt() error { - m.canAdoptOnce.Do(func() { - if m.CanAdoptFunc != nil { - m.canAdoptErr = m.CanAdoptFunc() - } - }) - return m.canAdoptErr -} - -// ClaimObject tries to take ownership of an object for this controller. -// -// It will reconcile the following: -// - Adopt orphans if the match function returns true. -// - Release owned objects if the match function returns false. -// -// A non-nil error is returned if some form of reconciliation was attempted and -// failed. Usually, controllers should try again later in case reconciliation -// is still needed. -// -// If the error is nil, either the reconciliation succeeded, or no -// reconciliation was necessary. The returned boolean indicates whether you now -// own the object. -// -// No reconciliation will be attempted if the controller is being deleted. -func (m *BaseControllerRefManager) ClaimObject(obj metav1.Object, match func(metav1.Object) bool, adopt, release func(metav1.Object) error) (bool, error) { - controllerRef := metav1.GetControllerOf(obj) - if controllerRef != nil { - if controllerRef.UID != m.Controller.GetUID() { - // Owned by someone else. Ignore. - return false, nil - } - if match(obj) { - // We already own it and the selector matches. - // Return true (successfully claimed) before checking deletion timestamp. - // We're still allowed to claim things we already own while being deleted - // because doing so requires taking no actions. - return true, nil - } - // Owned by us but selector doesn't match. - // Try to release, unless we're being deleted. - if m.Controller.GetDeletionTimestamp() != nil { - return false, nil - } - if err := release(obj); err != nil { - // If the pod no longer exists, ignore the error. - if errors.IsNotFound(err) { - return false, nil - } - // Either someone else released it, or there was a transient error. - // The controller should requeue and try again if it's still stale. - return false, err - } - // Successfully released. - return false, nil - } - - // It's an orphan. - if m.Controller.GetDeletionTimestamp() != nil || !match(obj) { - // Ignore if we're being deleted or selector doesn't match. - return false, nil - } - if obj.GetDeletionTimestamp() != nil { - // Ignore if the object is being deleted - return false, nil - } - // Selector matches. Try to adopt. - if err := adopt(obj); err != nil { - // If the pod no longer exists, ignore the error. - if errors.IsNotFound(err) { - return false, nil - } - // Either someone else claimed it first, or there was a transient error. - // The controller should requeue and try again if it's still orphaned. - return false, err - } - // Successfully adopted. - return true, nil -} - -type PodControllerRefManager struct { - BaseControllerRefManager - controllerKind schema.GroupVersionKind - podControl PodControlInterface -} - -// NewPodControllerRefManager returns a PodControllerRefManager that exposes -// methods to manage the controllerRef of pods. -// -// The CanAdopt() function can be used to perform a potentially expensive check -// (such as a live GET from the API server) prior to the first adoption. -// It will only be called (at most once) if an adoption is actually attempted. -// If CanAdopt() returns a non-nil error, all adoptions will fail. -// -// NOTE: Once CanAdopt() is called, it will not be called again by the same -// -// PodControllerRefManager instance. Create a new instance if it makes -// sense to check CanAdopt() again (e.g. in a different sync pass). -func NewPodControllerRefManager( - podControl PodControlInterface, - controller metav1.Object, - selector labels.Selector, - controllerKind schema.GroupVersionKind, - canAdopt func() error, -) *PodControllerRefManager { - return &PodControllerRefManager{ - BaseControllerRefManager: BaseControllerRefManager{ - Controller: controller, - Selector: selector, - CanAdoptFunc: canAdopt, - }, - controllerKind: controllerKind, - podControl: podControl, - } -} - -// ClaimPods tries to take ownership of a list of Pods. -// -// It will reconcile the following: -// - Adopt orphans if the selector matches. -// - Release owned objects if the selector no longer matches. -// -// Optional: If one or more filters are specified, a Pod will only be claimed if -// all filters return true. -// -// A non-nil error is returned if some form of reconciliation was attempted and -// failed. Usually, controllers should try again later in case reconciliation -// is still needed. -// -// If the error is nil, either the reconciliation succeeded, or no -// reconciliation was necessary. The list of Pods that you now own is returned. -func (m *PodControllerRefManager) ClaimPods(pods []*v1.Pod, filters ...func(*v1.Pod) bool) ([]*v1.Pod, error) { - var claimed []*v1.Pod - var errlist []error - - match := func(obj metav1.Object) bool { - pod := obj.(*v1.Pod) - // Check selector first so filters only run on potentially matching Pods. - if !m.Selector.Matches(labels.Set(pod.Labels)) { - return false - } - for _, filter := range filters { - if !filter(pod) { - return false - } - } - return true - } - adopt := func(obj metav1.Object) error { - return m.AdoptPod(obj.(*v1.Pod)) - } - release := func(obj metav1.Object) error { - return m.ReleasePod(obj.(*v1.Pod)) - } - - for _, pod := range pods { - ok, err := m.ClaimObject(pod, match, adopt, release) - if err != nil { - errlist = append(errlist, err) - continue - } - if ok { - claimed = append(claimed, pod) - } - } - return claimed, utilerrors.NewAggregate(errlist) -} - -// AdoptPod sends a patch to take control of the pod. It returns the error if -// the patching fails. -func (m *PodControllerRefManager) AdoptPod(pod *v1.Pod) error { - if err := m.CanAdopt(); err != nil { - return fmt.Errorf("can't adopt Pod %v/%v (%v): %v", pod.Namespace, pod.Name, pod.UID, err) - } - // Note that ValidateOwnerReferences() will reject this patch if another - // OwnerReference exists with controller=true. - addControllerPatch := fmt.Sprintf( - `{"metadata":{"ownerReferences":[{"apiVersion":"%s","kind":"%s","name":"%s","uid":"%s","controller":true,"blockOwnerDeletion":true}],"uid":"%s"}}`, - m.controllerKind.GroupVersion(), m.controllerKind.Kind, - m.Controller.GetName(), m.Controller.GetUID(), pod.UID) - return m.podControl.PatchPod(pod.Namespace, pod.Name, []byte(addControllerPatch)) -} - -// ReleasePod sends a patch to free the pod from the control of the controller. -// It returns the error if the patching fails. 404 and 422 errors are ignored. -func (m *PodControllerRefManager) ReleasePod(pod *v1.Pod) error { - log.Infof("patching pod %s_%s to remove its controllerRef to %s/%s:%s", - pod.Namespace, pod.Name, m.controllerKind.GroupVersion(), m.controllerKind.Kind, m.Controller.GetName()) - deleteOwnerRefPatch := fmt.Sprintf(`{"metadata":{"ownerReferences":[{"$patch":"delete","uid":"%s"}],"uid":"%s"}}`, m.Controller.GetUID(), pod.UID) - err := m.podControl.PatchPod(pod.Namespace, pod.Name, []byte(deleteOwnerRefPatch)) - if err != nil { - if errors.IsNotFound(err) { - // If the pod no longer exists, ignore it. - return nil - } - if errors.IsInvalid(err) { - // Invalid error will be returned in two cases: 1. the pod - // has no owner reference, 2. the uid of the pod doesn't - // match, which means the pod is deleted and then recreated. - // In both cases, the error can be ignored. - - // TODO: If the pod has owner references, but none of them - // has the owner.UID, server will silently ignore the patch. - // Investigate why. - return nil - } - } - return err -} - -type ServiceControllerRefManager struct { - BaseControllerRefManager - - controllerKind schema.GroupVersionKind - serviceControl ServiceControlInterface -} - -// NewServiceControllerRefManager returns a ServiceControllerRefManager that exposes -// methods to manage the controllerRef of services. -// -// The canAdopt() function can be used to perform a potentially expensive check -// (such as a live GET from the API server) prior to the first adoption. -// It will only be called (at most once) if an adoption is actually attempted. -// If canAdopt() returns a non-nil error, all adoptions will fail. -// -// NOTE: Once canAdopt() is called, it will not be called again by the same -// -// ServiceControllerRefManager instance. Create a new instance if it makes -// sense to check canAdopt() again (e.g. in a different sync pass). -func NewServiceControllerRefManager( - serviceControl ServiceControlInterface, - ctr metav1.Object, - selector labels.Selector, - controllerKind schema.GroupVersionKind, - canAdopt func() error, -) *ServiceControllerRefManager { - return &ServiceControllerRefManager{ - BaseControllerRefManager: BaseControllerRefManager{ - Controller: ctr, - Selector: selector, - CanAdoptFunc: canAdopt, - }, - controllerKind: controllerKind, - serviceControl: serviceControl, - } -} - -// ClaimServices tries to take ownership of a list of Services. -// -// It will reconcile the following: -// - Adopt orphans if the selector matches. -// - Release owned objects if the selector no longer matches. -// -// Optional: If one or more filters are specified, a Service will only be claimed if -// all filters return true. -// -// A non-nil error is returned if some form of reconciliation was attempted and -// failed. Usually, controllers should try again later in case reconciliation -// is still needed. -// -// If the error is nil, either the reconciliation succeeded, or no -// reconciliation was necessary. The list of Services that you now own is returned. -func (m *ServiceControllerRefManager) ClaimServices(services []*v1.Service, filters ...func(*v1.Service) bool) ([]*v1.Service, error) { - var claimed []*v1.Service - var errlist []error - - match := func(obj metav1.Object) bool { - service := obj.(*v1.Service) - // Check selector first so filters only run on potentially matching Services. - if !m.Selector.Matches(labels.Set(service.Labels)) { - return false - } - for _, filter := range filters { - if !filter(service) { - return false - } - } - return true - } - adopt := func(obj metav1.Object) error { - return m.AdoptService(obj.(*v1.Service)) - } - release := func(obj metav1.Object) error { - return m.ReleaseService(obj.(*v1.Service)) - } - - for _, service := range services { - ok, err := m.ClaimObject(service, match, adopt, release) - if err != nil { - errlist = append(errlist, err) - continue - } - if ok { - claimed = append(claimed, service) - } - } - return claimed, utilerrors.NewAggregate(errlist) -} - -// AdoptService sends a patch to take control of the service. It returns the error if -// the patching fails. -func (m *ServiceControllerRefManager) AdoptService(service *v1.Service) error { - if err := m.CanAdopt(); err != nil { - return fmt.Errorf("can't adopt Service %v/%v (%v): %v", service.Namespace, service.Name, service.UID, err) - } - // Note that ValidateOwnerReferences() will reject this patch if another - // OwnerReference exists with controller=true. - addControllerPatch := fmt.Sprintf( - `{"metadata":{"ownerReferences":[{"apiVersion":"%s","kind":"%s","name":"%s","uid":"%s","controller":true,"blockOwnerDeletion":true}],"uid":"%s"}}`, - m.controllerKind.GroupVersion(), m.controllerKind.Kind, - m.Controller.GetName(), m.Controller.GetUID(), service.UID) - return m.serviceControl.PatchService(service.Namespace, service.Name, []byte(addControllerPatch)) -} - -// ReleaseService sends a patch to free the service from the control of the controller. -// It returns the error if the patching fails. 404 and 422 errors are ignored. -func (m *ServiceControllerRefManager) ReleaseService(service *v1.Service) error { - logger := commonutil.LoggerForService(service, m.controllerKind.Kind) - logger.Infof("patching service %s_%s to remove its controllerRef to %s/%s:%s", - service.Namespace, service.Name, m.controllerKind.GroupVersion(), m.controllerKind.Kind, m.Controller.GetName()) - deleteOwnerRefPatch := fmt.Sprintf(`{"metadata":{"ownerReferences":[{"$patch":"delete","uid":"%s"}],"uid":"%s"}}`, m.Controller.GetUID(), service.UID) - err := m.serviceControl.PatchService(service.Namespace, service.Name, []byte(deleteOwnerRefPatch)) - if err != nil { - if errors.IsNotFound(err) { - // If the service no longer exists, ignore it. - return nil - } - if errors.IsInvalid(err) { - // Invalid error will be returned in two cases: 1. the service - // has no owner reference, 2. the uid of the service doesn't - // match, which means the service is deleted and then recreated. - // In both cases, the error can be ignored. - - // TODO: If the service has owner references, but none of them - // has the owner.UID, server will silently ignore the patch. - // Investigate why. - return nil - } - } - return err -} diff --git a/pkg/controller.v1/control/controller_ref_manager_test.go b/pkg/controller.v1/control/controller_ref_manager_test.go deleted file mode 100644 index b8ad90ac86..0000000000 --- a/pkg/controller.v1/control/controller_ref_manager_test.go +++ /dev/null @@ -1,373 +0,0 @@ -// Copyright 2019 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package control - -import ( - testutilv1 "github.com/kubeflow/training-operator/test_job/test_util/v1" - "reflect" - "testing" - - "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/types" - - testjobv1 "github.com/kubeflow/training-operator/test_job/apis/test_job/v1" -) - -func TestClaimPods(t *testing.T) { - controllerUID := "123" - - type test struct { - name string - manager *PodControllerRefManager - pods []*v1.Pod - claimed []*v1.Pod - } - var tests = []test{ - func() test { - testJob := testutilv1.NewTestJob(1) - testJobLabelSelector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{ - MatchLabels: testutilv1.GenLabels(testJob.Name), - }) - if err != nil { - t.Errorf("Unexpected error: %v", err) - } - testPod := testutilv1.NewBasePod("pod2", testJob, nil) - testPod.Labels[testutilv1.LabelGroupName] = "testing" - - return test{ - name: "Claim pods with correct label", - manager: NewPodControllerRefManager(&FakePodControl{}, - testJob, - testJobLabelSelector, - testjobv1.SchemeGroupVersionKind, - func() error { return nil }), - pods: []*v1.Pod{testutilv1.NewBasePod("pod1", testJob, t), testPod}, - claimed: []*v1.Pod{testutilv1.NewBasePod("pod1", testJob, t)}, - } - }(), - func() test { - controller := testutilv1.NewTestJob(1) - controllerLabelSelector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{ - MatchLabels: testutilv1.GenLabels(controller.Name), - }) - if err != nil { - t.Errorf("Unexpected error: %v", err) - } - controller.UID = types.UID(controllerUID) - now := metav1.Now() - controller.DeletionTimestamp = &now - testPod1 := testutilv1.NewBasePod("pod1", controller, t) - testPod1.SetOwnerReferences([]metav1.OwnerReference{}) - testPod2 := testutilv1.NewBasePod("pod2", controller, t) - testPod2.SetOwnerReferences([]metav1.OwnerReference{}) - return test{ - name: "Controller marked for deletion can not claim pods", - manager: NewPodControllerRefManager(&FakePodControl{}, - controller, - controllerLabelSelector, - testjobv1.SchemeGroupVersionKind, - func() error { return nil }), - pods: []*v1.Pod{testPod1, testPod2}, - claimed: nil, - } - }(), - func() test { - controller := testutilv1.NewTestJob(1) - controllerLabelSelector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{ - MatchLabels: testutilv1.GenLabels(controller.Name), - }) - if err != nil { - t.Errorf("Unexpected error: %v", err) - } - controller.UID = types.UID(controllerUID) - now := metav1.Now() - controller.DeletionTimestamp = &now - testPod2 := testutilv1.NewBasePod("pod2", controller, t) - testPod2.SetOwnerReferences([]metav1.OwnerReference{}) - return test{ - name: "Controller marked for deletion can not claim new pods", - manager: NewPodControllerRefManager(&FakePodControl{}, - controller, - controllerLabelSelector, - testjobv1.SchemeGroupVersionKind, - func() error { return nil }), - pods: []*v1.Pod{testutilv1.NewBasePod("pod1", controller, t), testPod2}, - claimed: []*v1.Pod{testutilv1.NewBasePod("pod1", controller, t)}, - } - }(), - func() test { - controller := testutilv1.NewTestJob(1) - controllerLabelSelector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{ - MatchLabels: testutilv1.GenLabels(controller.Name), - }) - if err != nil { - t.Errorf("Unexpected error: %v", err) - } - controller2 := testutilv1.NewTestJob(1) - controller.UID = types.UID(controllerUID) - controller2.UID = types.UID("AAAAA") - return test{ - name: "Controller can not claim pods owned by another controller", - manager: NewPodControllerRefManager(&FakePodControl{}, - controller, - controllerLabelSelector, - testjobv1.SchemeGroupVersionKind, - func() error { return nil }), - pods: []*v1.Pod{testutilv1.NewBasePod("pod1", controller, t), testutilv1.NewBasePod("pod2", controller2, t)}, - claimed: []*v1.Pod{testutilv1.NewBasePod("pod1", controller, t)}, - } - }(), - func() test { - controller := testutilv1.NewTestJob(1) - controllerLabelSelector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{ - MatchLabels: testutilv1.GenLabels(controller.Name), - }) - if err != nil { - t.Errorf("Unexpected error: %v", err) - } - controller.UID = types.UID(controllerUID) - testPod2 := testutilv1.NewBasePod("pod2", controller, t) - testPod2.Labels[testutilv1.LabelGroupName] = "testing" - return test{ - name: "Controller releases claimed pods when selector doesn't match", - manager: NewPodControllerRefManager(&FakePodControl{}, - controller, - controllerLabelSelector, - testjobv1.SchemeGroupVersionKind, - func() error { return nil }), - pods: []*v1.Pod{testutilv1.NewBasePod("pod1", controller, t), testPod2}, - claimed: []*v1.Pod{testutilv1.NewBasePod("pod1", controller, t)}, - } - }(), - func() test { - controller := testutilv1.NewTestJob(1) - controllerLabelSelector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{ - MatchLabels: testutilv1.GenLabels(controller.Name), - }) - if err != nil { - t.Errorf("Unexpected error: %v", err) - } - controller.UID = types.UID(controllerUID) - testPod1 := testutilv1.NewBasePod("pod1", controller, t) - testPod2 := testutilv1.NewBasePod("pod2", controller, t) - testPod2.Labels[testutilv1.LabelGroupName] = "testing" - now := metav1.Now() - testPod1.DeletionTimestamp = &now - testPod2.DeletionTimestamp = &now - - return test{ - name: "Controller does not claim orphaned pods marked for deletion", - manager: NewPodControllerRefManager(&FakePodControl{}, - controller, - controllerLabelSelector, - testjobv1.SchemeGroupVersionKind, - func() error { return nil }), - pods: []*v1.Pod{testPod1, testPod2}, - claimed: []*v1.Pod{testPod1}, - } - }(), - } - for _, test := range tests { - claimed, err := test.manager.ClaimPods(test.pods) - if err != nil { - t.Errorf("Test case `%s`, unexpected error: %v", test.name, err) - } else if !reflect.DeepEqual(test.claimed, claimed) { - t.Errorf("Test case `%s`, claimed wrong pods. Expected %v, got %v", test.name, podToStringSlice(test.claimed), podToStringSlice(claimed)) - } - - } -} - -func podToStringSlice(pods []*v1.Pod) []string { - var names []string - for _, pod := range pods { - names = append(names, pod.Name) - } - return names -} - -func TestClaimServices(t *testing.T) { - controllerUID := "123" - - type test struct { - name string - manager *ServiceControllerRefManager - services []*v1.Service - claimed []*v1.Service - } - var tests = []test{ - func() test { - testJob := testutilv1.NewTestJob(1) - testJobLabelSelector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{ - MatchLabels: testutilv1.GenLabels(testJob.Name), - }) - if err != nil { - t.Errorf("Unexpected error: %v", err) - } - testService := testutilv1.NewBaseService("service2", testJob, nil) - testService.Labels[testutilv1.LabelGroupName] = "testing" - - return test{ - name: "Claim services with correct label", - manager: NewServiceControllerRefManager(&FakeServiceControl{}, - testJob, - testJobLabelSelector, - testjobv1.SchemeGroupVersionKind, - func() error { return nil }), - services: []*v1.Service{testutilv1.NewBaseService("service1", testJob, t), testService}, - claimed: []*v1.Service{testutilv1.NewBaseService("service1", testJob, t)}, - } - }(), - func() test { - controller := testutilv1.NewTestJob(1) - controllerLabelSelector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{ - MatchLabels: testutilv1.GenLabels(controller.Name), - }) - if err != nil { - t.Errorf("Unexpected error: %v", err) - } - controller.UID = types.UID(controllerUID) - now := metav1.Now() - controller.DeletionTimestamp = &now - testService1 := testutilv1.NewBaseService("service1", controller, t) - testService1.SetOwnerReferences([]metav1.OwnerReference{}) - testService2 := testutilv1.NewBaseService("service2", controller, t) - testService2.SetOwnerReferences([]metav1.OwnerReference{}) - return test{ - name: "Controller marked for deletion can not claim services", - manager: NewServiceControllerRefManager(&FakeServiceControl{}, - controller, - controllerLabelSelector, - testjobv1.SchemeGroupVersionKind, - func() error { return nil }), - services: []*v1.Service{testService1, testService2}, - claimed: nil, - } - }(), - func() test { - controller := testutilv1.NewTestJob(1) - controllerLabelSelector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{ - MatchLabels: testutilv1.GenLabels(controller.Name), - }) - if err != nil { - t.Errorf("Unexpected error: %v", err) - } - controller.UID = types.UID(controllerUID) - now := metav1.Now() - controller.DeletionTimestamp = &now - testService2 := testutilv1.NewBaseService("service2", controller, t) - testService2.SetOwnerReferences([]metav1.OwnerReference{}) - return test{ - name: "Controller marked for deletion can not claim new services", - manager: NewServiceControllerRefManager(&FakeServiceControl{}, - controller, - controllerLabelSelector, - testjobv1.SchemeGroupVersionKind, - func() error { return nil }), - services: []*v1.Service{testutilv1.NewBaseService("service1", controller, t), testService2}, - claimed: []*v1.Service{testutilv1.NewBaseService("service1", controller, t)}, - } - }(), - func() test { - controller := testutilv1.NewTestJob(1) - controllerLabelSelector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{ - MatchLabels: testutilv1.GenLabels(controller.Name), - }) - if err != nil { - t.Errorf("Unexpected error: %v", err) - } - controller2 := testutilv1.NewTestJob(1) - controller.UID = types.UID(controllerUID) - controller2.UID = types.UID("AAAAA") - return test{ - name: "Controller can not claim services owned by another controller", - manager: NewServiceControllerRefManager(&FakeServiceControl{}, - controller, - controllerLabelSelector, - testjobv1.SchemeGroupVersionKind, - func() error { return nil }), - services: []*v1.Service{testutilv1.NewBaseService("service1", controller, t), testutilv1.NewBaseService("service2", controller2, t)}, - claimed: []*v1.Service{testutilv1.NewBaseService("service1", controller, t)}, - } - }(), - func() test { - controller := testutilv1.NewTestJob(1) - controllerLabelSelector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{ - MatchLabels: testutilv1.GenLabels(controller.Name), - }) - if err != nil { - t.Errorf("Unexpected error: %v", err) - } - controller.UID = types.UID(controllerUID) - testService2 := testutilv1.NewBaseService("service2", controller, t) - testService2.Labels[testutilv1.LabelGroupName] = "testing" - return test{ - name: "Controller releases claimed services when selector doesn't match", - manager: NewServiceControllerRefManager(&FakeServiceControl{}, - controller, - controllerLabelSelector, - testjobv1.SchemeGroupVersionKind, - func() error { return nil }), - services: []*v1.Service{testutilv1.NewBaseService("service1", controller, t), testService2}, - claimed: []*v1.Service{testutilv1.NewBaseService("service1", controller, t)}, - } - }(), - func() test { - controller := testutilv1.NewTestJob(1) - controllerLabelSelector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{ - MatchLabels: testutilv1.GenLabels(controller.Name), - }) - if err != nil { - t.Errorf("Unexpected error: %v", err) - } - controller.UID = types.UID(controllerUID) - testService1 := testutilv1.NewBaseService("service1", controller, t) - testService2 := testutilv1.NewBaseService("service2", controller, t) - testService2.Labels[testutilv1.LabelGroupName] = "testing" - now := metav1.Now() - testService1.DeletionTimestamp = &now - testService2.DeletionTimestamp = &now - - return test{ - name: "Controller does not claim orphaned services marked for deletion", - manager: NewServiceControllerRefManager(&FakeServiceControl{}, - controller, - controllerLabelSelector, - testjobv1.SchemeGroupVersionKind, - func() error { return nil }), - services: []*v1.Service{testService1, testService2}, - claimed: []*v1.Service{testService1}, - } - }(), - } - for _, test := range tests { - claimed, err := test.manager.ClaimServices(test.services) - if err != nil { - t.Errorf("Test case `%s`, unexpected error: %v", test.name, err) - } else if !reflect.DeepEqual(test.claimed, claimed) { - t.Errorf("Test case `%s`, claimed wrong services. Expected %v, got %v", test.name, serviceToStringSlice(test.claimed), serviceToStringSlice(claimed)) - } - - } -} - -func serviceToStringSlice(services []*v1.Service) []string { - var names []string - for _, service := range services { - names = append(names, service.Name) - } - return names -} diff --git a/pkg/controller.v1/control/pod_control.go b/pkg/controller.v1/control/pod_control.go deleted file mode 100644 index 3855ac50b7..0000000000 --- a/pkg/controller.v1/control/pod_control.go +++ /dev/null @@ -1,281 +0,0 @@ -// Copyright 2019 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package control - -import ( - "context" - "fmt" - "sync" - - commonutil "github.com/kubeflow/training-operator/pkg/util" - - v1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/errors" - "k8s.io/apimachinery/pkg/api/meta" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/labels" - "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/types" - clientset "k8s.io/client-go/kubernetes" - "k8s.io/client-go/tools/record" -) - -// Reasons for pod events -const ( - // FailedCreatePodReason is added in an event and in a job condition - // when a pod for a replica set is failed to be created. - FailedCreatePodReason = "FailedCreatePod" - // SuccessfulCreatePodReason is added in an event when a pod for a job - // is successfully created. - SuccessfulCreatePodReason = "SuccessfulCreatePod" - // FailedDeletePodReason is added in an event and in a job condition - // when a pod for a replica set is failed to be deleted. - FailedDeletePodReason = "FailedDeletePod" - // SuccessfulDeletePodReason is added in an event when a pod for a job - // is successfully deleted. - SuccessfulDeletePodReason = "SuccessfulDeletePod" -) - -// PodControlInterface is an interface that knows how to add or delete pods -// created as an interface to allow testing. -type PodControlInterface interface { - // CreatePods creates new pods according to the spec. - CreatePods(namespace string, template *v1.PodTemplateSpec, object runtime.Object) error - // CreatePodsOnNode creates a new pod according to the spec on the specified node, - // and sets the ControllerRef. - CreatePodsOnNode(nodeName, namespace string, template *v1.PodTemplateSpec, object runtime.Object, controllerRef *metav1.OwnerReference) error - // CreatePodsWithControllerRef creates new pods according to the spec, and sets object as the pod's controller. - CreatePodsWithControllerRef(namespace string, template *v1.PodTemplateSpec, object runtime.Object, controllerRef *metav1.OwnerReference) error - // DeletePod deletes the pod identified by podID. - DeletePod(namespace string, podID string, object runtime.Object) error - // PatchPod patches the pod. - PatchPod(namespace, name string, data []byte) error -} - -// RealPodControl is the default implementation of PodControlInterface. -type RealPodControl struct { - KubeClient clientset.Interface - Recorder record.EventRecorder -} - -var _ PodControlInterface = &RealPodControl{} - -func getPodsLabelSet(template *v1.PodTemplateSpec) labels.Set { - desiredLabels := make(labels.Set) - for k, v := range template.Labels { - desiredLabels[k] = v - } - return desiredLabels -} - -func getPodsFinalizers(template *v1.PodTemplateSpec) []string { - desiredFinalizers := make([]string, len(template.Finalizers)) - copy(desiredFinalizers, template.Finalizers) - return desiredFinalizers -} - -func getPodsAnnotationSet(template *v1.PodTemplateSpec) labels.Set { - desiredAnnotations := make(labels.Set) - for k, v := range template.Annotations { - desiredAnnotations[k] = v - } - return desiredAnnotations -} - -func (r RealPodControl) CreatePods(namespace string, template *v1.PodTemplateSpec, object runtime.Object) error { - return r.createPods("", namespace, template, object, nil) -} - -func (r RealPodControl) CreatePodsWithControllerRef(namespace string, template *v1.PodTemplateSpec, controllerObject runtime.Object, controllerRef *metav1.OwnerReference) error { - if err := ValidateControllerRef(controllerRef); err != nil { - return err - } - return r.createPods("", namespace, template, controllerObject, controllerRef) -} - -func (r RealPodControl) CreatePodsOnNode(nodeName, namespace string, template *v1.PodTemplateSpec, object runtime.Object, controllerRef *metav1.OwnerReference) error { - if err := ValidateControllerRef(controllerRef); err != nil { - return err - } - return r.createPods(nodeName, namespace, template, object, controllerRef) -} - -func (r RealPodControl) PatchPod(namespace, name string, data []byte) error { - _, err := r.KubeClient.CoreV1().Pods(namespace).Patch(context.TODO(), name, types.StrategicMergePatchType, data, metav1.PatchOptions{}) - return err -} - -func GetPodFromTemplate(template *v1.PodTemplateSpec, parentObject runtime.Object, controllerRef *metav1.OwnerReference) (*v1.Pod, error) { - desiredLabels := getPodsLabelSet(template) - desiredFinalizers := getPodsFinalizers(template) - desiredAnnotations := getPodsAnnotationSet(template) - - pod := &v1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Labels: desiredLabels, - Annotations: desiredAnnotations, - Name: template.Name, - Finalizers: desiredFinalizers, - }, - } - if controllerRef != nil { - pod.OwnerReferences = append(pod.OwnerReferences, *controllerRef) - } - pod.Spec = *template.Spec.DeepCopy() - return pod, nil -} - -func (r RealPodControl) createPods(nodeName, namespace string, template *v1.PodTemplateSpec, object runtime.Object, controllerRef *metav1.OwnerReference) error { - pod, err := GetPodFromTemplate(template, object, controllerRef) - if err != nil { - return err - } - if len(nodeName) != 0 { - pod.Spec.NodeName = nodeName - } - if labels.Set(pod.Labels).AsSelectorPreValidated().Empty() { - return fmt.Errorf("unable to create pods, no labels") - } - logger := commonutil.LoggerForPod(pod, object.GetObjectKind().GroupVersionKind().Kind) - if newPod, err := r.KubeClient.CoreV1().Pods(namespace).Create(context.TODO(), pod, metav1.CreateOptions{}); err != nil { - r.Recorder.Eventf(object, v1.EventTypeWarning, FailedCreatePodReason, "Error creating: %v", err) - return err - } else { - accessor, err := meta.Accessor(object) - if err != nil { - logger.Errorf("parentObject does not have ObjectMeta, %v", err) - return nil - } - logger.Infof("Controller %v created pod %v", accessor.GetName(), newPod.Name) - r.Recorder.Eventf(object, v1.EventTypeNormal, SuccessfulCreatePodReason, "Created pod: %v", newPod.Name) - } - return nil -} - -func (r RealPodControl) DeletePod(namespace string, podID string, object runtime.Object) error { - accessor, err := meta.Accessor(object) - if err != nil { - return fmt.Errorf("object does not have ObjectMeta, %v", err) - } - logger := commonutil.LoggerForJob(accessor) - pod, err := r.KubeClient.CoreV1().Pods(namespace).Get(context.TODO(), podID, metav1.GetOptions{}) - if err != nil { - if errors.IsNotFound(err) { - return nil - } - return err - } - if pod.DeletionTimestamp != nil { - logger.Infof("pod %s/%s is terminating, skip deleting", pod.Namespace, pod.Name) - return nil - } - logger.Infof("Controller %v deleting pod %v/%v", accessor.GetName(), namespace, podID) - // delete options - if err := r.KubeClient.CoreV1().Pods(namespace).Delete(context.TODO(), podID, metav1.DeleteOptions{}); err != nil { - r.Recorder.Eventf(object, v1.EventTypeWarning, FailedDeletePodReason, "Error deleting: %v", err) - return fmt.Errorf("unable to delete pods: %v", err) - } else { - r.Recorder.Eventf(object, v1.EventTypeNormal, SuccessfulDeletePodReason, "Deleted pod: %v", podID) - } - return nil -} - -type FakePodControl struct { - sync.Mutex - Templates []v1.PodTemplateSpec - ControllerRefs []metav1.OwnerReference - DeletePodName []string - Patches [][]byte - Err error - CreateLimit int - CreateCallCount int -} - -var _ PodControlInterface = &FakePodControl{} - -func (f *FakePodControl) PatchPod(namespace, name string, data []byte) error { - f.Lock() - defer f.Unlock() - f.Patches = append(f.Patches, data) - if f.Err != nil { - return f.Err - } - return nil -} - -func (f *FakePodControl) CreatePods(namespace string, spec *v1.PodTemplateSpec, object runtime.Object) error { - f.Lock() - defer f.Unlock() - f.CreateCallCount++ - if f.CreateLimit != 0 && f.CreateCallCount > f.CreateLimit { - return fmt.Errorf("not creating pod, limit %d already reached (create call %d)", f.CreateLimit, f.CreateCallCount) - } - f.Templates = append(f.Templates, *spec) - if f.Err != nil { - return f.Err - } - return nil -} - -func (f *FakePodControl) CreatePodsWithControllerRef(namespace string, spec *v1.PodTemplateSpec, object runtime.Object, controllerRef *metav1.OwnerReference) error { - f.Lock() - defer f.Unlock() - f.CreateCallCount++ - if f.CreateLimit != 0 && f.CreateCallCount > f.CreateLimit { - return fmt.Errorf("not creating pod, limit %d already reached (create call %d)", f.CreateLimit, f.CreateCallCount) - } - f.Templates = append(f.Templates, *spec) - f.ControllerRefs = append(f.ControllerRefs, *controllerRef) - if f.Err != nil { - return f.Err - } - return nil -} - -func (f *FakePodControl) CreatePodsOnNode(nodeName, namespace string, template *v1.PodTemplateSpec, object runtime.Object, controllerRef *metav1.OwnerReference) error { - f.Lock() - defer f.Unlock() - f.CreateCallCount++ - if f.CreateLimit != 0 && f.CreateCallCount > f.CreateLimit { - return fmt.Errorf("not creating pod, limit %d already reached (create call %d)", f.CreateLimit, f.CreateCallCount) - } - f.Templates = append(f.Templates, *template) - f.ControllerRefs = append(f.ControllerRefs, *controllerRef) - if f.Err != nil { - return f.Err - } - return nil -} - -func (f *FakePodControl) DeletePod(namespace string, podID string, object runtime.Object) error { - f.Lock() - defer f.Unlock() - f.DeletePodName = append(f.DeletePodName, podID) - if f.Err != nil { - return f.Err - } - return nil -} - -func (f *FakePodControl) Clear() { - f.Lock() - defer f.Unlock() - f.DeletePodName = []string{} - f.Templates = []v1.PodTemplateSpec{} - f.ControllerRefs = []metav1.OwnerReference{} - f.Patches = [][]byte{} - f.CreateLimit = 0 - f.CreateCallCount = 0 -} diff --git a/pkg/controller.v1/control/pod_control_test.go b/pkg/controller.v1/control/pod_control_test.go deleted file mode 100644 index 6a9590d1f3..0000000000 --- a/pkg/controller.v1/control/pod_control_test.go +++ /dev/null @@ -1,80 +0,0 @@ -// Copyright 2019 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package control - -import ( - "encoding/json" - "net/http/httptest" - "testing" - - "github.com/stretchr/testify/assert" - corev1 "k8s.io/api/core/v1" - apiequality "k8s.io/apimachinery/pkg/api/equality" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime" - clientset "k8s.io/client-go/kubernetes" - clientscheme "k8s.io/client-go/kubernetes/scheme" - restclient "k8s.io/client-go/rest" - "k8s.io/client-go/tools/record" - utiltesting "k8s.io/client-go/util/testing" - - testutilv1 "github.com/kubeflow/training-operator/test_job/test_util/v1" -) - -func TestCreatePods(t *testing.T) { - ns := metav1.NamespaceDefault - body := runtime.EncodeOrDie( - clientscheme.Codecs.LegacyCodec(corev1.SchemeGroupVersion), - &corev1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "empty_pod"}}) - fakeHandler := utiltesting.FakeHandler{ - StatusCode: 200, - ResponseBody: body, - } - testServer := httptest.NewServer(&fakeHandler) - defer testServer.Close() - k8sClient := clientset.NewForConfigOrDie(&restclient.Config{Host: testServer.URL, ContentConfig: restclient.ContentConfig{GroupVersion: &corev1.SchemeGroupVersion}}) - - podControl := RealPodControl{ - KubeClient: k8sClient, - Recorder: &record.FakeRecorder{}, - } - - testJob := testutilv1.NewTestJob(1) - - testName := "pod-name" - podTemplate := testutilv1.NewTestReplicaSpecTemplate() - podTemplate.Name = testName - podTemplate.Labels = testutilv1.GenLabels(testJob.Name) - podTemplate.SetOwnerReferences([]metav1.OwnerReference{}) - - // Make sure createReplica sends a POST to the apiserver with a pod from the controllers pod template - err := podControl.CreatePods(ns, &podTemplate, testJob) - assert.NoError(t, err, "unexpected error: %v", err) - - expectedPod := corev1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Labels: testutilv1.GenLabels(testJob.Name), - Name: testName, - }, - Spec: podTemplate.Spec, - } - fakeHandler.ValidateRequest(t, - "/api/v1/namespaces/default/pods", "POST", nil) - var actualPod = &corev1.Pod{} - err = json.Unmarshal([]byte(fakeHandler.RequestBody), actualPod) - assert.NoError(t, err, "unexpected error: %v", err) - assert.True(t, apiequality.Semantic.DeepDerivative(&expectedPod, actualPod), - "Body: %s", fakeHandler.RequestBody) -} diff --git a/pkg/controller.v1/control/podgroup_control.go b/pkg/controller.v1/control/podgroup_control.go deleted file mode 100644 index 7efca29f73..0000000000 --- a/pkg/controller.v1/control/podgroup_control.go +++ /dev/null @@ -1,199 +0,0 @@ -/* -Copyright 2023 The Kubeflow Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package control - -import ( - "context" - "fmt" - - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/types" - "k8s.io/klog/v2" - "sigs.k8s.io/controller-runtime/pkg/client" - schedulerpluginsv1alpha1 "sigs.k8s.io/scheduler-plugins/apis/scheduling/v1alpha1" - volcanobatchv1alpha1 "volcano.sh/apis/pkg/apis/batch/v1alpha1" - volcanov1beta1 "volcano.sh/apis/pkg/apis/scheduling/v1beta1" - volcanoclient "volcano.sh/apis/pkg/client/clientset/versioned" -) - -// PodGroupControlInterface is an interface that knows how to add or delete PodGroups -// created as an interface to allow testing. -type PodGroupControlInterface interface { - // NewEmptyPodGroup returns an empty PodGroup. - NewEmptyPodGroup() client.Object - // GetPodGroup gets the PodGroup identified by namespace and name. - GetPodGroup(namespace string, name string) (metav1.Object, error) - // DeletePodGroup deletes the PodGroup identified by namespace and name. - DeletePodGroup(namespace string, name string) error - // UpdatePodGroup updates a PodGroup. - UpdatePodGroup(podGroup client.Object) error - // CreatePodGroup creates a new PodGroup with PodGroup spec fill function. - CreatePodGroup(podGroup client.Object) error - // DelayPodCreationDueToPodGroup determines whether it should delay Pod Creation. - DelayPodCreationDueToPodGroup(pg metav1.Object) bool - // DecoratePodTemplateSpec decorates PodTemplateSpec. - // If the PodTemplateSpec has SchedulerName set, this method will Not override. - DecoratePodTemplateSpec(pts *corev1.PodTemplateSpec, job metav1.Object, rtype string) - // GetSchedulerName returns the name of the gang scheduler. - GetSchedulerName() string -} - -// VolcanoControl is the implementation of PodGroupControlInterface with volcano. -type VolcanoControl struct { - Client volcanoclient.Interface -} - -func (v *VolcanoControl) GetSchedulerName() string { - return "volcano" -} - -func (v *VolcanoControl) DecoratePodTemplateSpec(pts *corev1.PodTemplateSpec, job metav1.Object, rtype string) { - if len(pts.Spec.SchedulerName) == 0 { - pts.Spec.SchedulerName = v.GetSchedulerName() - } - if pts.Annotations == nil { - pts.Annotations = make(map[string]string) - } - pts.Annotations[volcanov1beta1.KubeGroupNameAnnotationKey] = job.GetName() - pts.Annotations[volcanobatchv1alpha1.TaskSpecKey] = rtype -} - -// NewVolcanoControl returns a VolcanoControl -func NewVolcanoControl(vci volcanoclient.Interface) PodGroupControlInterface { - return &VolcanoControl{Client: vci} -} - -func (v *VolcanoControl) DelayPodCreationDueToPodGroup(pg metav1.Object) bool { - if pg == nil { - return true - } - volcanoPodGroup := pg.(*volcanov1beta1.PodGroup) - return len(volcanoPodGroup.Status.Phase) == 0 || volcanoPodGroup.Status.Phase == volcanov1beta1.PodGroupPending -} - -func (v *VolcanoControl) NewEmptyPodGroup() client.Object { - return &volcanov1beta1.PodGroup{} -} - -func (v *VolcanoControl) GetPodGroup(namespace string, name string) (metav1.Object, error) { - pg, err := v.Client.SchedulingV1beta1().PodGroups(namespace).Get(context.TODO(), name, metav1.GetOptions{}) - if err != nil { - return nil, err - } - return pg, nil -} - -func (v *VolcanoControl) DeletePodGroup(namespace string, name string) error { - return v.Client.SchedulingV1beta1().PodGroups(namespace).Delete(context.TODO(), name, metav1.DeleteOptions{}) -} - -func (v *VolcanoControl) UpdatePodGroup(podGroup client.Object) error { - pg := podGroup.(*volcanov1beta1.PodGroup) - _, err := v.Client.SchedulingV1beta1().PodGroups(pg.GetNamespace()).Update(context.TODO(), pg, metav1.UpdateOptions{}) - if err != nil { - return fmt.Errorf("unable to update a PodGroup, '%v': %v", klog.KObj(pg), err) - } - return nil -} - -func (v *VolcanoControl) CreatePodGroup(podGroup client.Object) error { - pg := podGroup.(*volcanov1beta1.PodGroup) - _, err := v.Client.SchedulingV1beta1().PodGroups(pg.GetNamespace()).Create(context.TODO(), pg, metav1.CreateOptions{}) - if err != nil { - return fmt.Errorf("unable to create PodGroup: %v", err) - } - return nil -} - -var _ PodGroupControlInterface = &VolcanoControl{} - -// SchedulerPluginsControl is the implementation of PodGroupControlInterface with scheduler-plugins. -type SchedulerPluginsControl struct { - Client client.Client - SchedulerName string -} - -func (s *SchedulerPluginsControl) DecoratePodTemplateSpec(pts *corev1.PodTemplateSpec, job metav1.Object, _ string) { - if len(pts.Spec.SchedulerName) == 0 { - pts.Spec.SchedulerName = s.GetSchedulerName() - } - - if pts.Labels == nil { - pts.Labels = make(map[string]string) - } - pts.Labels[schedulerpluginsv1alpha1.PodGroupLabel] = job.GetName() -} - -func (s *SchedulerPluginsControl) GetSchedulerName() string { - return s.SchedulerName -} - -// NewSchedulerPluginsControl returns a SchedulerPluginsControl -func NewSchedulerPluginsControl(c client.Client, schedulerName string) PodGroupControlInterface { - return &SchedulerPluginsControl{Client: c, SchedulerName: schedulerName} -} - -func (s *SchedulerPluginsControl) DelayPodCreationDueToPodGroup(pg metav1.Object) bool { - return false -} - -func (s *SchedulerPluginsControl) NewEmptyPodGroup() client.Object { - return &schedulerpluginsv1alpha1.PodGroup{} -} - -func (s *SchedulerPluginsControl) GetPodGroup(namespace, name string) (metav1.Object, error) { - pg := &schedulerpluginsv1alpha1.PodGroup{} - ctx := context.TODO() - key := types.NamespacedName{ - Namespace: namespace, - Name: name, - } - if err := s.Client.Get(ctx, key, pg); err != nil { - return nil, err - } - return pg, nil -} - -func (s *SchedulerPluginsControl) DeletePodGroup(namespace, name string) error { - ctx := context.TODO() - pg := s.NewEmptyPodGroup() - pg.SetNamespace(namespace) - pg.SetName(name) - - return s.Client.Delete(ctx, pg) -} - -func (s *SchedulerPluginsControl) UpdatePodGroup(podGroup client.Object) error { - pg := podGroup.(*schedulerpluginsv1alpha1.PodGroup) - err := s.Client.Update(context.TODO(), pg, &client.UpdateOptions{}) - if err != nil { - return fmt.Errorf("unable to update a PodGroup, '%v': %v", klog.KObj(pg), err) - } - return nil -} - -func (s *SchedulerPluginsControl) CreatePodGroup(podGroup client.Object) error { - pg := podGroup.(*schedulerpluginsv1alpha1.PodGroup) - err := s.Client.Create(context.TODO(), pg, &client.CreateOptions{}) - if err != nil { - return fmt.Errorf("unable to create a PodGroup, '%v': %v", klog.KObj(pg), err) - } - return nil -} - -var _ PodGroupControlInterface = &SchedulerPluginsControl{} diff --git a/pkg/controller.v1/control/service_control.go b/pkg/controller.v1/control/service_control.go deleted file mode 100644 index e306ba7868..0000000000 --- a/pkg/controller.v1/control/service_control.go +++ /dev/null @@ -1,209 +0,0 @@ -// Copyright 2019 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package control - -import ( - "context" - "fmt" - "sync" - - log "github.com/sirupsen/logrus" - "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/errors" - "k8s.io/apimachinery/pkg/api/meta" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/labels" - "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/types" - clientset "k8s.io/client-go/kubernetes" - "k8s.io/client-go/tools/record" -) - -const ( - // FailedCreateServiceReason is added in an event and in a job controller condition - // when a service for a job is failed to be created. - FailedCreateServiceReason = "FailedCreateService" - // SuccessfulCreateServiceReason is added in an event when a service for a job - // is successfully created. - SuccessfulCreateServiceReason = "SuccessfulCreateService" - // FailedDeleteServiceReason is added in an event and in a job condition - // when a service for a job is failed to be deleted. - FailedDeleteServiceReason = "FailedDeleteService" - // SuccessfulDeleteServiceReason is added in an event when a service for a job - // is successfully deleted. - SuccessfulDeleteServiceReason = "SuccessfulDeleteService" -) - -// ServiceControlInterface is an interface that knows how to add or delete Services -// created as an interface to allow testing. -type ServiceControlInterface interface { - // CreateServices creates new Services according to the spec. - CreateServices(namespace string, service *v1.Service, object runtime.Object) error - // CreateServicesWithControllerRef creates new services according to the spec, and sets object as the service's controller. - CreateServicesWithControllerRef(namespace string, service *v1.Service, object runtime.Object, controllerRef *metav1.OwnerReference) error - // PatchService patches the service. - PatchService(namespace, name string, data []byte) error - // DeleteService deletes the service identified by serviceID. - DeleteService(namespace, serviceID string, object runtime.Object) error -} - -// RealServiceControl is the default implementation of ServiceControlInterface. -type RealServiceControl struct { - KubeClient clientset.Interface - Recorder record.EventRecorder -} - -func (r RealServiceControl) PatchService(namespace, name string, data []byte) error { - _, err := r.KubeClient.CoreV1().Services(namespace).Patch(context.TODO(), name, types.StrategicMergePatchType, data, metav1.PatchOptions{}) - return err -} - -func (r RealServiceControl) CreateServices(namespace string, service *v1.Service, object runtime.Object) error { - return r.createServices(namespace, service, object, nil) -} - -func (r RealServiceControl) CreateServicesWithControllerRef(namespace string, service *v1.Service, controllerObject runtime.Object, controllerRef *metav1.OwnerReference) error { - if err := ValidateControllerRef(controllerRef); err != nil { - return err - } - return r.createServices(namespace, service, controllerObject, controllerRef) -} - -func (r RealServiceControl) createServices(namespace string, service *v1.Service, object runtime.Object, controllerRef *metav1.OwnerReference) error { - if labels.Set(service.Labels).AsSelectorPreValidated().Empty() { - return fmt.Errorf("unable to create Services, no labels") - } - serviceWithOwner, err := GetServiceFromTemplate(service, object, controllerRef) - if err != nil { - r.Recorder.Eventf(object, v1.EventTypeWarning, FailedCreateServiceReason, "Error creating: %v", err) - return fmt.Errorf("unable to create services: %v", err) - } - - newService, err := r.KubeClient.CoreV1().Services(namespace).Create(context.TODO(), serviceWithOwner, metav1.CreateOptions{}) - if err != nil { - r.Recorder.Eventf(object, v1.EventTypeWarning, FailedCreateServiceReason, "Error creating: %v", err) - return fmt.Errorf("unable to create services: %v", err) - } - - accessor, err := meta.Accessor(object) - if err != nil { - log.Errorf("parentObject does not have ObjectMeta, %v", err) - return nil - } - log.Infof("Controller %v created service %v", accessor.GetName(), newService.Name) - r.Recorder.Eventf(object, v1.EventTypeNormal, SuccessfulCreateServiceReason, "Created service: %v", newService.Name) - - return nil -} - -// DeleteService deletes the service identified by serviceID. -func (r RealServiceControl) DeleteService(namespace, serviceID string, object runtime.Object) error { - accessor, err := meta.Accessor(object) - if err != nil { - return fmt.Errorf("object does not have ObjectMeta, %v", err) - } - service, err := r.KubeClient.CoreV1().Services(namespace).Get(context.TODO(), serviceID, metav1.GetOptions{}) - if err != nil { - if errors.IsNotFound(err) { - return nil - } - return err - } - if service.DeletionTimestamp != nil { - log.Infof("service %s/%s is terminating, skip deleting", service.Namespace, service.Name) - return nil - } - log.Infof("Controller %v deleting service %v/%v", accessor.GetName(), namespace, serviceID) - if err := r.KubeClient.CoreV1().Services(namespace).Delete(context.TODO(), serviceID, metav1.DeleteOptions{}); err != nil { - r.Recorder.Eventf(object, v1.EventTypeWarning, FailedDeleteServiceReason, "Error deleting: %v", err) - return fmt.Errorf("unable to delete service: %v", err) - } else { - r.Recorder.Eventf(object, v1.EventTypeNormal, SuccessfulDeleteServiceReason, "Deleted service: %v", serviceID) - } - return nil -} - -type FakeServiceControl struct { - sync.Mutex - Templates []v1.Service - ControllerRefs []metav1.OwnerReference - DeleteServiceName []string - Patches [][]byte - Err error - CreateLimit int - CreateCallCount int -} - -var _ ServiceControlInterface = &FakeServiceControl{} - -func (f *FakeServiceControl) PatchService(namespace, name string, data []byte) error { - f.Lock() - defer f.Unlock() - f.Patches = append(f.Patches, data) - if f.Err != nil { - return f.Err - } - return nil -} - -func (f *FakeServiceControl) CreateServices(namespace string, service *v1.Service, object runtime.Object) error { - f.Lock() - defer f.Unlock() - f.CreateCallCount++ - if f.CreateLimit != 0 && f.CreateCallCount > f.CreateLimit { - return fmt.Errorf("not creating service, limit %d already reached (create call %d)", f.CreateLimit, f.CreateCallCount) - } - f.Templates = append(f.Templates, *service) - if f.Err != nil { - return f.Err - } - return nil -} - -func (f *FakeServiceControl) CreateServicesWithControllerRef(namespace string, service *v1.Service, object runtime.Object, controllerRef *metav1.OwnerReference) error { - f.Lock() - defer f.Unlock() - f.CreateCallCount++ - if f.CreateLimit != 0 && f.CreateCallCount > f.CreateLimit { - return fmt.Errorf("not creating service, limit %d already reached (create call %d)", f.CreateLimit, f.CreateCallCount) - } - f.Templates = append(f.Templates, *service) - f.ControllerRefs = append(f.ControllerRefs, *controllerRef) - if f.Err != nil { - return f.Err - } - return nil -} - -func (f *FakeServiceControl) DeleteService(namespace string, serviceID string, object runtime.Object) error { - f.Lock() - defer f.Unlock() - f.DeleteServiceName = append(f.DeleteServiceName, serviceID) - if f.Err != nil { - return f.Err - } - return nil -} - -func (f *FakeServiceControl) Clear() { - f.Lock() - defer f.Unlock() - f.DeleteServiceName = []string{} - f.Templates = []v1.Service{} - f.ControllerRefs = []metav1.OwnerReference{} - f.Patches = [][]byte{} - f.CreateLimit = 0 - f.CreateCallCount = 0 -} diff --git a/pkg/controller.v1/control/service_control_test.go b/pkg/controller.v1/control/service_control_test.go deleted file mode 100644 index ff8a531d23..0000000000 --- a/pkg/controller.v1/control/service_control_test.go +++ /dev/null @@ -1,135 +0,0 @@ -// Copyright 2019 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package control - -import ( - "encoding/json" - "net/http/httptest" - "testing" - - "github.com/stretchr/testify/assert" - v1 "k8s.io/api/core/v1" - apiequality "k8s.io/apimachinery/pkg/api/equality" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime" - clientset "k8s.io/client-go/kubernetes" - clientscheme "k8s.io/client-go/kubernetes/scheme" - restclient "k8s.io/client-go/rest" - "k8s.io/client-go/tools/record" - utiltesting "k8s.io/client-go/util/testing" - - testutilv1 "github.com/kubeflow/training-operator/test_job/test_util/v1" -) - -func TestCreateService(t *testing.T) { - ns := metav1.NamespaceDefault - body := runtime.EncodeOrDie( - clientscheme.Codecs.LegacyCodec(v1.SchemeGroupVersion), - &v1.Service{ObjectMeta: metav1.ObjectMeta{Name: "empty_service"}}) - fakeHandler := utiltesting.FakeHandler{ - StatusCode: 200, - ResponseBody: body, - } - testServer := httptest.NewServer(&fakeHandler) - defer testServer.Close() - clientset := clientset.NewForConfigOrDie(&restclient.Config{ - Host: testServer.URL, - ContentConfig: restclient.ContentConfig{ - GroupVersion: &v1.SchemeGroupVersion, - }, - }) - - serviceControl := RealServiceControl{ - KubeClient: clientset, - Recorder: &record.FakeRecorder{}, - } - - testJob := testutilv1.NewTestJob(1) - - testName := "service-name" - service := testutilv1.NewBaseService(testName, testJob, t) - service.SetOwnerReferences([]metav1.OwnerReference{}) - - // Make sure createReplica sends a POST to the apiserver with a pod from the controllers pod template - err := serviceControl.CreateServices(ns, service, testJob) - assert.NoError(t, err, "unexpected error: %v", err) - - expectedService := v1.Service{ - ObjectMeta: metav1.ObjectMeta{ - Labels: testutilv1.GenLabels(testJob.Name), - Name: testName, - Namespace: ns, - }, - } - fakeHandler.ValidateRequest(t, - "/api/v1/namespaces/default/services", "POST", nil) - var actualService = &v1.Service{} - err = json.Unmarshal([]byte(fakeHandler.RequestBody), actualService) - assert.NoError(t, err, "unexpected error: %v", err) - assert.True(t, apiequality.Semantic.DeepDerivative(&expectedService, actualService), - "Body: %s", fakeHandler.RequestBody) -} - -func TestCreateServicesWithControllerRef(t *testing.T) { - ns := metav1.NamespaceDefault - body := runtime.EncodeOrDie( - clientscheme.Codecs.LegacyCodec(v1.SchemeGroupVersion), - &v1.Service{ObjectMeta: metav1.ObjectMeta{Name: "empty_service"}}) - fakeHandler := utiltesting.FakeHandler{ - StatusCode: 200, - ResponseBody: body, - } - testServer := httptest.NewServer(&fakeHandler) - defer testServer.Close() - clientset := clientset.NewForConfigOrDie(&restclient.Config{ - Host: testServer.URL, - ContentConfig: restclient.ContentConfig{ - GroupVersion: &v1.SchemeGroupVersion, - }, - }) - - serviceControl := RealServiceControl{ - KubeClient: clientset, - Recorder: &record.FakeRecorder{}, - } - - testJob := testutilv1.NewTestJob(1) - - testName := "service-name" - service := testutilv1.NewBaseService(testName, testJob, t) - service.SetOwnerReferences([]metav1.OwnerReference{}) - - ownerRef := testutilv1.GenOwnerReference(testJob) - - // Make sure createReplica sends a POST to the apiserver with a pod from the controllers pod template - err := serviceControl.CreateServicesWithControllerRef(ns, service, testJob, ownerRef) - assert.NoError(t, err, "unexpected error: %v", err) - - expectedService := v1.Service{ - ObjectMeta: metav1.ObjectMeta{ - Labels: testutilv1.GenLabels(testJob.Name), - Name: testName, - Namespace: ns, - OwnerReferences: []metav1.OwnerReference{*ownerRef}, - }, - } - fakeHandler.ValidateRequest(t, - "/api/v1/namespaces/default/services", "POST", nil) - var actualService = &v1.Service{} - err = json.Unmarshal([]byte(fakeHandler.RequestBody), actualService) - assert.NoError(t, err, "unexpected error: %v", err) - assert.True(t, apiequality.Semantic.DeepDerivative(&expectedService, actualService), - "Body: %s", fakeHandler.RequestBody) -} diff --git a/pkg/controller.v1/control/utils.go b/pkg/controller.v1/control/utils.go deleted file mode 100644 index 62b910d357..0000000000 --- a/pkg/controller.v1/control/utils.go +++ /dev/null @@ -1,51 +0,0 @@ -/* -Copyright 2023 The Kubeflow Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package control - -import ( - "fmt" - v1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime" -) - -func ValidateControllerRef(controllerRef *metav1.OwnerReference) error { - if controllerRef == nil { - return fmt.Errorf("controllerRef is nil") - } - if len(controllerRef.APIVersion) == 0 { - return fmt.Errorf("controllerRef has empty APIVersion") - } - if len(controllerRef.Kind) == 0 { - return fmt.Errorf("controllerRef has empty Kind") - } - if controllerRef.Controller == nil || !*controllerRef.Controller { - return fmt.Errorf("controllerRef.Controller is not set to true") - } - if controllerRef.BlockOwnerDeletion == nil || !*controllerRef.BlockOwnerDeletion { - return fmt.Errorf("controllerRef.BlockOwnerDeletion is not set") - } - return nil -} - -func GetServiceFromTemplate(template *v1.Service, parentObject runtime.Object, controllerRef *metav1.OwnerReference) (*v1.Service, error) { - service := template.DeepCopy() - if controllerRef != nil { - service.OwnerReferences = append(service.OwnerReferences, *controllerRef) - } - return service, nil -} diff --git a/pkg/controller.v1/expectation/expectation.go b/pkg/controller.v1/expectation/expectation.go deleted file mode 100644 index 83d5ce3d64..0000000000 --- a/pkg/controller.v1/expectation/expectation.go +++ /dev/null @@ -1,222 +0,0 @@ -/* -Copyright 2023 The Kubeflow Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package expectation - -import ( - "fmt" - "sync/atomic" - "time" - - log "github.com/sirupsen/logrus" - - "k8s.io/client-go/tools/cache" - "k8s.io/utils/clock" -) - -const ( - // If a watch drops a delete event for a pod, it'll take this long - // before a dormant controller waiting for those packets is woken up anyway. It is - // specifically targeted at the case where some problem prevents an update - // of expectations, without it the controller could stay asleep forever. This should - // be set based on the expected latency of watch events. - // - // Currently a controller can service (create *and* observe the watch events for said - // creation) about 10 pods a second, so it takes about 1 min to service - // 500 pods. Just creation is limited to 20qps, and watching happens with ~10-30s - // latency/pod at the scale of 3000 pods over 100 nodes. - ExpectationsTimeout = 5 * time.Minute -) - -// Expectations are a way for controllers to tell the controller manager what they expect. eg: -// ControllerExpectations: { -// controller1: expects 2 adds in 2 minutes -// controller2: expects 2 dels in 2 minutes -// controller3: expects -1 adds in 2 minutes => controller3's expectations have already been met -// } -// -// Implementation: -// ControlleeExpectation = pair of atomic counters to track controllee's creation/deletion -// ControllerExpectationsStore = TTLStore + a ControlleeExpectation per controller -// -// * Once set expectations can only be lowered -// * A controller isn't synced till its expectations are either fulfilled, or expire -// * Controllers that don't set expectations will get woken up for every matching controllee - -// ExpKeyFunc to parse out the key from a ControlleeExpectation -var ExpKeyFunc = func(obj interface{}) (string, error) { - if e, ok := obj.(*ControlleeExpectations); ok { - return e.key, nil - } - return "", fmt.Errorf("could not find key for obj %#v", obj) -} - -// ControllerExpectationsInterface is an interface that allows users to set and wait on expectations. -// Only abstracted out for testing. -// Warning: if using KeyFunc it is not safe to use a single ControllerExpectationsInterface with different -// types of controllers, because the keys might conflict across types. -type ControllerExpectationsInterface interface { - GetExpectations(controllerKey string) (*ControlleeExpectations, bool, error) - SatisfiedExpectations(controllerKey string) bool - DeleteExpectations(controllerKey string) - SetExpectations(controllerKey string, add, del int) error - ExpectCreations(controllerKey string, adds int) error - ExpectDeletions(controllerKey string, dels int) error - CreationObserved(controllerKey string) - DeletionObserved(controllerKey string) - RaiseExpectations(controllerKey string, add, del int) - LowerExpectations(controllerKey string, add, del int) -} - -// ControllerExpectations is a cache mapping controllers to what they expect to see before being woken up for a sync. -type ControllerExpectations struct { - cache.Store -} - -// GetExpectations returns the ControlleeExpectations of the given controller. -func (r *ControllerExpectations) GetExpectations(controllerKey string) (*ControlleeExpectations, bool, error) { - exp, exists, err := r.GetByKey(controllerKey) - if err == nil && exists { - return exp.(*ControlleeExpectations), true, nil - } - return nil, false, err -} - -// DeleteExpectations deletes the expectations of the given controller from the TTLStore. -func (r *ControllerExpectations) DeleteExpectations(controllerKey string) { - if exp, exists, err := r.GetByKey(controllerKey); err == nil && exists { - if err := r.Delete(exp); err != nil { - log.Debugf("Error deleting expectations for controller %v: %v", controllerKey, err) - } - } -} - -// SatisfiedExpectations returns true if the required adds/dels for the given controller have been observed. -// Add/del counts are established by the controller at sync time, and updated as controllees are observed by the controller -// manager. -func (r *ControllerExpectations) SatisfiedExpectations(controllerKey string) bool { - if exp, exists, err := r.GetExpectations(controllerKey); exists { - if exp.Fulfilled() { - log.Debugf("Controller expectations fulfilled %#v", exp) - return true - } else if exp.isExpired() { - log.Debugf("Controller expectations expired %#v", exp) - return true - } else { - log.Debugf("Controller still waiting on expectations %#v", exp) - return false - } - } else if err != nil { - log.Debugf("Error encountered while checking expectations %#v, forcing sync", err) - } else { - // When a new controller is created, it doesn't have expectations. - // When it doesn't see expected watch events for > TTL, the expectations expire. - // - In this case it wakes up, creates/deletes controllees, and sets expectations again. - // When it has satisfied expectations and no controllees need to be created/destroyed > TTL, the expectations expire. - // - In this case it continues without setting expectations till it needs to create/delete controllees. - log.Debugf("Controller %v either never recorded expectations, or the ttl expired.", controllerKey) - } - // Trigger a sync if we either encountered and error (which shouldn't happen since we're - // getting from local store) or this controller hasn't established expectations. - return true -} - -// TODO: Extend ExpirationCache to support explicit expiration. -// TODO: Make this possible to disable in tests. -// TODO: Support injection of clock. -func (exp *ControlleeExpectations) isExpired() bool { - return clock.RealClock{}.Since(exp.timestamp) > ExpectationsTimeout -} - -// SetExpectations registers new expectations for the given controller. Forgets existing expectations. -func (r *ControllerExpectations) SetExpectations(controllerKey string, add, del int) error { - exp := &ControlleeExpectations{add: int64(add), del: int64(del), key: controllerKey, timestamp: clock.RealClock{}.Now()} - log.Debugf("Setting expectations %#v", exp) - return r.Add(exp) -} - -func (r *ControllerExpectations) ExpectCreations(controllerKey string, adds int) error { - return r.SetExpectations(controllerKey, adds, 0) -} - -func (r *ControllerExpectations) ExpectDeletions(controllerKey string, dels int) error { - return r.SetExpectations(controllerKey, 0, dels) -} - -// Decrements the expectation counts of the given controller. -func (r *ControllerExpectations) LowerExpectations(controllerKey string, add, del int) { - if exp, exists, err := r.GetExpectations(controllerKey); err == nil && exists { - exp.Add(int64(-add), int64(-del)) - // The expectations might've been modified since the update on the previous line. - log.Debugf("Lowered expectations %#v", exp) - } -} - -// Increments the expectation counts of the given controller. -func (r *ControllerExpectations) RaiseExpectations(controllerKey string, add, del int) { - if exp, exists, err := r.GetExpectations(controllerKey); err == nil && exists { - exp.Add(int64(add), int64(del)) - // The expectations might've been modified since the update on the previous line. - log.Debugf("Raised expectations %#v", exp) - } -} - -// CreationObserved atomically decrements the `add` expectation count of the given controller. -func (r *ControllerExpectations) CreationObserved(controllerKey string) { - r.LowerExpectations(controllerKey, 1, 0) -} - -// DeletionObserved atomically decrements the `del` expectation count of the given controller. -func (r *ControllerExpectations) DeletionObserved(controllerKey string) { - r.LowerExpectations(controllerKey, 0, 1) -} - -// Expectations are either fulfilled, or expire naturally. -type Expectations interface { - Fulfilled() bool -} - -// ControlleeExpectations track controllee creates/deletes. -type ControlleeExpectations struct { - // Important: Since these two int64 fields are using sync/atomic, they have to be at the top of the struct due to a bug on 32-bit platforms - // See: https://golang.org/pkg/sync/atomic/ for more information - add int64 - del int64 - key string - timestamp time.Time -} - -// Add increments the add and del counters. -func (e *ControlleeExpectations) Add(add, del int64) { - atomic.AddInt64(&e.add, add) - atomic.AddInt64(&e.del, del) -} - -// Fulfilled returns true if this expectation has been fulfilled. -func (e *ControlleeExpectations) Fulfilled() bool { - // TODO: think about why this line being atomic doesn't matter - return atomic.LoadInt64(&e.add) <= 0 && atomic.LoadInt64(&e.del) <= 0 -} - -// GetExpectations returns the add and del expectations of the controllee. -func (e *ControlleeExpectations) GetExpectations() (int64, int64) { - return atomic.LoadInt64(&e.add), atomic.LoadInt64(&e.del) -} - -// NewControllerExpectations returns a store for ControllerExpectations. -func NewControllerExpectations() *ControllerExpectations { - return &ControllerExpectations{cache.NewStore(ExpKeyFunc)} -} diff --git a/pkg/controller.v1/expectation/expectation_test.go b/pkg/controller.v1/expectation/expectation_test.go deleted file mode 100644 index c2c91640b3..0000000000 --- a/pkg/controller.v1/expectation/expectation_test.go +++ /dev/null @@ -1,152 +0,0 @@ -/* -Copyright 2023 The Kubeflow Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package expectation - -import ( - "sync" - "testing" - "time" - - "github.com/stretchr/testify/assert" - v1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/util/uuid" - "k8s.io/client-go/tools/cache" - clock "k8s.io/utils/clock/testing" -) - -var ( - // KeyFunc is the short name to DeletionHandlingMetaNamespaceKeyFunc. - // IndexerInformer uses a delta queue, therefore for deletes we have to use this - // key function but it should be just fine for non delete events. - KeyFunc = cache.DeletionHandlingMetaNamespaceKeyFunc -) - -// NewFakeControllerExpectationsLookup creates a fake store for PodExpectations. -func NewFakeControllerExpectationsLookup(ttl time.Duration) (*ControllerExpectations, *clock.FakeClock) { - fakeTime := time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC) - fakeClock := clock.NewFakeClock(fakeTime) - ttlPolicy := &cache.TTLPolicy{TTL: ttl, Clock: fakeClock} - ttlStore := cache.NewFakeExpirationStore( - ExpKeyFunc, nil, ttlPolicy, fakeClock) - return &ControllerExpectations{ttlStore}, fakeClock -} - -func newReplicationController(replicas int) *v1.ReplicationController { - rc := &v1.ReplicationController{ - TypeMeta: metav1.TypeMeta{APIVersion: "v1"}, - ObjectMeta: metav1.ObjectMeta{ - UID: uuid.NewUUID(), - Name: "foobar", - Namespace: metav1.NamespaceDefault, - ResourceVersion: "18", - }, - Spec: v1.ReplicationControllerSpec{ - Replicas: func() *int32 { i := int32(replicas); return &i }(), - Selector: map[string]string{"foo": "bar"}, - Template: &v1.PodTemplateSpec{ - ObjectMeta: metav1.ObjectMeta{ - Labels: map[string]string{ - "name": "foo", - "type": "production", - }, - }, - Spec: v1.PodSpec{ - Containers: []v1.Container{ - { - Image: "foo/bar", - TerminationMessagePath: v1.TerminationMessagePathDefault, - ImagePullPolicy: v1.PullIfNotPresent, - }, - }, - RestartPolicy: v1.RestartPolicyAlways, - DNSPolicy: v1.DNSDefault, - NodeSelector: map[string]string{ - "baz": "blah", - }, - }, - }, - }, - } - return rc -} - -func TestControllerExpectations(t *testing.T) { - ttl := 30 * time.Second - e, fakeClock := NewFakeControllerExpectationsLookup(ttl) - // In practice we can't really have add and delete expectations since we only either create or - // delete replicas in one rc pass, and the rc goes to sleep soon after until the expectations are - // either fulfilled or timeout. - adds, dels := 10, 30 - rc := newReplicationController(1) - - // RC fires off adds and deletes at apiserver, then sets expectations - rcKey, err := KeyFunc(rc) - assert.NoError(t, err, "Couldn't get key for object %#v: %v", rc, err) - - err = e.SetExpectations(rcKey, adds, dels) - assert.NoError(t, err, "Could not register expectations for rc, err: %v", err) - var wg sync.WaitGroup - for i := 0; i < adds+1; i++ { - wg.Add(1) - go func() { - // In prod this can happen either because of a failed create by the rc - // or after having observed a create via informer - e.CreationObserved(rcKey) - wg.Done() - }() - } - wg.Wait() - - // There are still delete expectations - assert.False(t, e.SatisfiedExpectations(rcKey), "Rc will sync before expectations are met") - - for i := 0; i < dels+1; i++ { - wg.Add(1) - go func() { - e.DeletionObserved(rcKey) - wg.Done() - }() - } - wg.Wait() - - // Expectations have been surpassed - podExp, exists, err := e.GetExpectations(rcKey) - assert.NoError(t, err, "Could not get expectations for rc, exists %v and err %v", exists, err) - assert.True(t, exists, "Could not get expectations for rc, exists %v and err %v", exists, err) - - add, del := podExp.GetExpectations() - assert.Equal(t, int64(-1), add, "Unexpected pod expectations %#v", podExp) - assert.Equal(t, int64(-1), del, "Unexpected pod expectations %#v", podExp) - assert.True(t, e.SatisfiedExpectations(rcKey), "Expectations are met but the rc will not sync") - - // Next round of rc sync, old expectations are cleared - err = e.SetExpectations(rcKey, 1, 2) - assert.NoError(t, err, "Could not register expectations for rc, err %v", err) - podExp, exists, err = e.GetExpectations(rcKey) - assert.NoError(t, err, "Could not get expectations for rc, exists %v and err %v", exists, err) - assert.True(t, exists, "Could not get expectations for rc, exists %v and err %v", exists, err) - add, del = podExp.GetExpectations() - - assert.Equal(t, int64(1), add, "Unexpected pod expectations %#v", podExp) - assert.Equal(t, int64(2), del, "Unexpected pod expectations %#v", podExp) - - // Expectations have expired because of ttl - fakeClock.Step(ttl + 1) - assert.True(t, e.SatisfiedExpectations(rcKey), - "Expectations should have expired but didn't") -} diff --git a/pkg/controller.v1/expectation/util.go b/pkg/controller.v1/expectation/util.go deleted file mode 100644 index b7e5bcb4d3..0000000000 --- a/pkg/controller.v1/expectation/util.go +++ /dev/null @@ -1,15 +0,0 @@ -package expectation - -import ( - "strings" -) - -// GenExpectationPodsKey generates an expectation key for pods of a job -func GenExpectationPodsKey(jobKey string, replicaType string) string { - return jobKey + "/" + strings.ToLower(replicaType) + "/pods" -} - -// GenExpectationPodsKey generates an expectation key for services of a job -func GenExpectationServicesKey(jobKey string, replicaType string) string { - return jobKey + "/" + strings.ToLower(replicaType) + "/services" -} diff --git a/pkg/controller.v1/jax/envvar.go b/pkg/controller.v1/jax/envvar.go deleted file mode 100644 index 469933eeb3..0000000000 --- a/pkg/controller.v1/jax/envvar.go +++ /dev/null @@ -1,107 +0,0 @@ -// Copyright 2024 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License - -package jax - -import ( - "errors" - "strconv" - "strings" - - corev1 "k8s.io/api/core/v1" - "k8s.io/utils/ptr" - - kubeflowv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" -) - -var ( - errorDefaulContainerPortNotExposed = errors.New("default container port is not exposed") - errorFailedToRecognizeRank = errors.New("failed to recognize the JAXJob Rank") -) - -type EnvVarGenerator interface { - Generate(job *kubeflowv1.JAXJob) ([]corev1.EnvVar, error) -} - -func setPodEnv(jaxjob *kubeflowv1.JAXJob, podTemplateSpec *corev1.PodTemplateSpec, rtype, index string) error { - - coordinatorAddr := replicaName(jaxjob.Name, kubeflowv1.JAXJobReplicaTypeWorker, 0) - - coordinatorPort, err := getPortFromJAXJob(jaxjob, kubeflowv1.JAXJobReplicaTypeWorker) - if err != nil { - return err - } - - totalReplicas := getTotalReplicas(jaxjob) - - for i := range podTemplateSpec.Spec.Containers { - - rank, err := strconv.Atoi(index) - if err != nil { - return errorFailedToRecognizeRank - } - // Set PYTHONUNBUFFERED to true, to disable output buffering. - // Ref https://stackoverflow.com/questions/59812009/what-is-the-use-of-pythonunbuffered-in-docker-file. - podTemplateSpec.Spec.Containers[i].Env = append(podTemplateSpec.Spec.Containers[i].Env, corev1.EnvVar{ - Name: "PYTHONUNBUFFERED", - Value: "1", - }) - podTemplateSpec.Spec.Containers[i].Env = append(podTemplateSpec.Spec.Containers[i].Env, corev1.EnvVar{ - Name: "COORDINATOR_PORT", - Value: strconv.Itoa(int(coordinatorPort)), - }) - podTemplateSpec.Spec.Containers[i].Env = append(podTemplateSpec.Spec.Containers[i].Env, corev1.EnvVar{ - Name: "COORDINATOR_ADDRESS", - Value: coordinatorAddr, - }) - podTemplateSpec.Spec.Containers[i].Env = append(podTemplateSpec.Spec.Containers[i].Env, corev1.EnvVar{ - Name: "NUM_PROCESSES", - Value: strconv.Itoa(int(totalReplicas)), - }) - podTemplateSpec.Spec.Containers[i].Env = append(podTemplateSpec.Spec.Containers[i].Env, corev1.EnvVar{ - Name: "PROCESS_ID", - Value: strconv.Itoa(rank), - }) - } - - return nil -} - -func getTotalReplicas(job *kubeflowv1.JAXJob) int { - jobReplicas := 0 - for _, r := range job.Spec.JAXReplicaSpecs { - jobReplicas += int(ptr.Deref[int32](r.Replicas, 0)) - } - return jobReplicas -} - -func replicaName(jobName string, rtype kubeflowv1.ReplicaType, index int) string { - n := jobName + "-" + strings.ToLower(string(rtype)) + "-" + strconv.Itoa(index) - return strings.Replace(n, "/", "-", -1) -} - -func getPortFromJAXJob(job *kubeflowv1.JAXJob, rtype kubeflowv1.ReplicaType) (int32, error) { - containers := job.Spec.JAXReplicaSpecs[rtype].Template.Spec.Containers - for _, container := range containers { - if container.Name == kubeflowv1.JAXJobDefaultContainerName { - ports := container.Ports - for _, port := range ports { - if port.Name == kubeflowv1.JAXJobDefaultPortName { - return port.ContainerPort, nil - } - } - } - } - return -1, errorDefaulContainerPortNotExposed -} diff --git a/pkg/controller.v1/jax/envvar_test.go b/pkg/controller.v1/jax/envvar_test.go deleted file mode 100644 index 3b0f0b5691..0000000000 --- a/pkg/controller.v1/jax/envvar_test.go +++ /dev/null @@ -1,138 +0,0 @@ -package jax - -import ( - "strconv" - "testing" - - "github.com/google/go-cmp/cmp" - "github.com/google/go-cmp/cmp/cmpopts" - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/utils/ptr" - - kubeflowv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" -) - -func TestSetPodEnv(t *testing.T) { - // Define some helper variables/constants for the test cases - validPort := int32(6666) - validIndex := "0" - invalidIndex := "invalid" - - // Define a valid JAXJob structure - validJAXJob := &kubeflowv1.JAXJob{ - ObjectMeta: metav1.ObjectMeta{Name: "test-jaxjob"}, - Spec: kubeflowv1.JAXJobSpec{ - JAXReplicaSpecs: map[kubeflowv1.ReplicaType]*kubeflowv1.ReplicaSpec{ - kubeflowv1.JAXJobReplicaTypeWorker: { - Replicas: ptr.To[int32](1), - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{{ - Name: "jax", - Image: "docker.io/kubeflow/jaxjob-dist-spmd-mnist:latest", - Ports: []corev1.ContainerPort{{ - Name: kubeflowv1.JAXJobDefaultPortName, - ContainerPort: validPort, - }}, - ImagePullPolicy: corev1.PullAlways, - Command: []string{ - "python", - "train.py", - }, - }}, - }, - }, - }, - }, - }, - } - - // Define the test cases - cases := map[string]struct { - jaxJob *kubeflowv1.JAXJob - podTemplate *corev1.PodTemplateSpec - rtype kubeflowv1.ReplicaType - index string - wantPodEnvVars []corev1.EnvVar - wantErr error - }{ - "successful environment variable setup": { - jaxJob: validJAXJob, - podTemplate: &corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{{}}, - }, - }, - rtype: kubeflowv1.JAXJobReplicaTypeWorker, - index: validIndex, - wantPodEnvVars: []corev1.EnvVar{ - {Name: "PYTHONUNBUFFERED", Value: "1"}, - {Name: "COORDINATOR_PORT", Value: strconv.Itoa(int(validPort))}, - {Name: "COORDINATOR_ADDRESS", Value: "test-jaxjob-worker-0"}, - {Name: "NUM_PROCESSES", Value: "1"}, - {Name: "PROCESS_ID", Value: validIndex}, - }, - wantErr: nil, - }, - "invalid index for PROCESS_ID": { - jaxJob: validJAXJob, - podTemplate: &corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{{}}, - }, - }, - rtype: kubeflowv1.JAXJobReplicaTypeWorker, - index: invalidIndex, - wantErr: errorFailedToRecognizeRank, - }, - "missing container port in JAXJob": { - jaxJob: &kubeflowv1.JAXJob{ - Spec: kubeflowv1.JAXJobSpec{ - JAXReplicaSpecs: map[kubeflowv1.ReplicaType]*kubeflowv1.ReplicaSpec{ - kubeflowv1.JAXJobReplicaTypeWorker: { - Replicas: ptr.To[int32](1), - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{{ - Name: "jax", - Ports: []corev1.ContainerPort{ - {Name: "wrong-port", ContainerPort: 0}, - }, - }}, - }, - }, - }, - }, - }, - }, - podTemplate: &corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{{}}, - }, - }, - rtype: kubeflowv1.JAXJobReplicaTypeWorker, - index: validIndex, - wantErr: errorDefaulContainerPortNotExposed, - }, - } - - // Execute the test cases - for name, tc := range cases { - t.Run(name, func(t *testing.T) { - err := setPodEnv(tc.jaxJob, tc.podTemplate, string(tc.rtype), tc.index) - - // Check if an error was expected - if diff := cmp.Diff(tc.wantErr, err, cmpopts.EquateErrors()); len(diff) != 0 { - t.Errorf("Unexpected error (-want,+got):\n%s", diff) - } - - for i, container := range tc.podTemplate.Spec.Containers { - if diff := cmp.Diff(tc.wantPodEnvVars, container.Env); diff != "" { - t.Errorf("Unexpected env vars for container %d (-want,+got):\n%s", i, diff) - } - } - - }) - } -} diff --git a/pkg/controller.v1/jax/jaxjob_controller.go b/pkg/controller.v1/jax/jaxjob_controller.go deleted file mode 100644 index 5334e1eba6..0000000000 --- a/pkg/controller.v1/jax/jaxjob_controller.go +++ /dev/null @@ -1,478 +0,0 @@ -// Copyright 2024 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package jax - -import ( - "context" - "fmt" - "strings" - "time" - - kubeflowv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - trainingoperatorcommon "github.com/kubeflow/training-operator/pkg/common" - "github.com/kubeflow/training-operator/pkg/common/util" - "github.com/kubeflow/training-operator/pkg/controller.v1/common" - "github.com/kubeflow/training-operator/pkg/controller.v1/control" - "github.com/kubeflow/training-operator/pkg/controller.v1/expectation" - commonutil "github.com/kubeflow/training-operator/pkg/util" - - "github.com/go-logr/logr" - "github.com/sirupsen/logrus" - corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/equality" - "k8s.io/apimachinery/pkg/api/errors" - "k8s.io/apimachinery/pkg/api/meta" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/runtime/schema" - "k8s.io/apimachinery/pkg/types" - utilruntime "k8s.io/apimachinery/pkg/util/runtime" - "k8s.io/client-go/informers" - kubeclientset "k8s.io/client-go/kubernetes" - "k8s.io/client-go/tools/record" - ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/controller" - "sigs.k8s.io/controller-runtime/pkg/event" - "sigs.k8s.io/controller-runtime/pkg/handler" - "sigs.k8s.io/controller-runtime/pkg/manager" - "sigs.k8s.io/controller-runtime/pkg/predicate" - "sigs.k8s.io/controller-runtime/pkg/source" - schedulerpluginsv1alpha1 "sigs.k8s.io/scheduler-plugins/apis/scheduling/v1alpha1" - "volcano.sh/apis/pkg/apis/scheduling/v1beta1" -) - -const ( - controllerName = "jaxjob-controller" -) - -// NewReconciler creates a JAXJob Reconciler -func NewReconciler(mgr manager.Manager, gangSchedulingSetupFunc common.GangSchedulingSetupFunc) *JAXJobReconciler { - r := &JAXJobReconciler{ - client: mgr.GetClient(), - scheme: mgr.GetScheme(), - recorder: mgr.GetEventRecorderFor(controllerName), - apiReader: mgr.GetAPIReader(), - log: ctrl.Log.WithName(controllerName), - } - - // Create clients - cfg := mgr.GetConfig() - kubeClientSet := kubeclientset.NewForConfigOrDie(cfg) - sharedInformers := informers.NewSharedInformerFactory(kubeClientSet, 0) - priorityClassInformer := sharedInformers.Scheduling().V1().PriorityClasses() - - // Initialize common job controller - r.JobController = common.JobController{ - Controller: r, - Expectations: expectation.NewControllerExpectations(), - WorkQueue: &util.FakeWorkQueue[string]{}, - Recorder: r.recorder, - KubeClientSet: kubeClientSet, - PriorityClassLister: priorityClassInformer.Lister(), - PriorityClassInformerSynced: priorityClassInformer.Informer().HasSynced, - PodControl: control.RealPodControl{KubeClient: kubeClientSet, Recorder: r.recorder}, - ServiceControl: control.RealServiceControl{KubeClient: kubeClientSet, Recorder: r.recorder}, - } - - gangSchedulingSetupFunc(&r.JobController) - - return r -} - -// JAXJobReconciler reconciles a JAXJob object -type JAXJobReconciler struct { - common.JobController - client client.Client - scheme *runtime.Scheme - log logr.Logger - recorder record.EventRecorder - apiReader client.Reader -} - -// +kubebuilder:rbac:groups=kubeflow.org,resources=jaxjobs,verbs=get;list;watch;create;update;patch;delete -// +kubebuilder:rbac:groups=kubeflow.org,resources=jaxjobs/status,verbs=get;update;patch -// +kubebuilder:rbac:groups=kubeflow.org,resources=jaxjobs/finalizers,verbs=update -// +kubebuilder:rbac:groups="",resources=pods,verbs=get;list;watch;create;update;patch;delete -// +kubebuilder:rbac:groups="",resources=services,verbs=get;list;watch;create;delete -// +kubebuilder:rbac:groups=scheduling.volcano.sh,resources=podgroups,verbs=get;list;watch;create;update;patch;delete -// +kubebuilder:rbac:groups=scheduling.x-k8s.io,resources=podgroups,verbs=get;list;watch;create;update;patch;delete -// +kubebuilder:rbac:groups="",resources=events,verbs=get;list;watch;create;update;patch;delete - -// Reconcile is part of the main kubernetes reconciliation loop which aims to -// move the current state of the cluster closer to the desired state. -// the JAXJob object against the actual cluster state, and then -// perform operations to make the cluster state reflect the state specified by -// the user. -// -// For more details, check Reconcile and its Result here: -// - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.8.3/pkg/reconcile -func (r *JAXJobReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { - jaxjob := &kubeflowv1.JAXJob{} - err := r.client.Get(ctx, req.NamespacedName, jaxjob) - if err != nil { - return ctrl.Result{}, client.IgnoreNotFound(err) - } - - // log := ctrl.LoggerFrom(ctx).WithValues("jaxjob", klog.KObj(&jaxjob)) - // ctrl.LoggerInto(ctx, log) - // log.V(2).Info("Reconciling JAXJob") - - // Check if reconciliation is needed - jobKey, err := common.KeyFunc(jaxjob) - if err != nil { - utilruntime.HandleError(fmt.Errorf("couldn't get jobKey for job object %#v: %v", jaxjob, err)) - } - - replicaTypes := util.GetReplicaTypes(jaxjob.Spec.JAXReplicaSpecs) - needReconcile := util.SatisfiedExpectations(r.Expectations, jobKey, replicaTypes) - - if !needReconcile || jaxjob.GetDeletionTimestamp() != nil { - r.log.Info("reconcile cancelled, job does not need to do reconcile or has been deleted", - "sync", needReconcile, "deleted", jaxjob.GetDeletionTimestamp() != nil) - return ctrl.Result{}, nil - } - - // Set default priorities to jax job - r.scheme.Default(jaxjob) - - // Use common to reconcile the job related pod and service - err = r.ReconcileJobs(jaxjob, jaxjob.Spec.JAXReplicaSpecs, jaxjob.Status, &jaxjob.Spec.RunPolicy) - if err != nil { - r.log.Error(err, "Reconcile JAXJob error") - return ctrl.Result{}, err - } - t, err := util.DurationUntilExpireTime(&jaxjob.Spec.RunPolicy, jaxjob.Status) - if err != nil { - logrus.Warnf("Reconcile JAXJob error %v", err) - return ctrl.Result{}, err - } - if t >= 0 { - return ctrl.Result{Requeue: true, RequeueAfter: t}, nil - } - - return ctrl.Result{}, nil -} - -// SetupWithManager sets up the controller with the Manager. -func (r *JAXJobReconciler) SetupWithManager(mgr ctrl.Manager, controllerThreads int) error { - c, err := controller.New(r.ControllerName(), mgr, controller.Options{ - Reconciler: r, - MaxConcurrentReconciles: controllerThreads, - }) - if err != nil { - return err - } - // using onOwnerCreateFunc is easier to set defaults - if err = c.Watch(source.Kind[*kubeflowv1.JAXJob](mgr.GetCache(), &kubeflowv1.JAXJob{}, - &handler.TypedEnqueueRequestForObject[*kubeflowv1.JAXJob]{}, - predicate.TypedFuncs[*kubeflowv1.JAXJob]{CreateFunc: r.onOwnerCreateFunc()}), - ); err != nil { - return err - } - // inject watching for job related pod - if err = c.Watch(source.Kind[*corev1.Pod](mgr.GetCache(), &corev1.Pod{}, - handler.TypedEnqueueRequestForOwner[*corev1.Pod](mgr.GetScheme(), mgr.GetRESTMapper(), &kubeflowv1.JAXJob{}, handler.OnlyControllerOwner()), - util.OnDependentFuncs[*corev1.Pod](r.scheme, r.Expectations, &r.JobController))); err != nil { - return err - } - // inject watching for job related service - if err = c.Watch(source.Kind[*corev1.Service](mgr.GetCache(), &corev1.Service{}, - handler.TypedEnqueueRequestForOwner[*corev1.Service](mgr.GetScheme(), mgr.GetRESTMapper(), &kubeflowv1.JAXJob{}, handler.OnlyControllerOwner()), - util.OnDependentFuncs[*corev1.Service](r.scheme, r.Expectations, &r.JobController))); err != nil { - return err - } - // skip watching volcano PodGroup if volcano PodGroup is not installed - if _, err = mgr.GetRESTMapper().RESTMapping(schema.GroupKind{Group: v1beta1.GroupName, Kind: "PodGroup"}, - v1beta1.SchemeGroupVersion.Version); err == nil { - // inject watching for job related volcano PodGroup - if err = c.Watch(source.Kind[*v1beta1.PodGroup](mgr.GetCache(), &v1beta1.PodGroup{}, - handler.TypedEnqueueRequestForOwner[*v1beta1.PodGroup](mgr.GetScheme(), mgr.GetRESTMapper(), &kubeflowv1.JAXJob{}, handler.OnlyControllerOwner()), - util.OnDependentFuncs[*v1beta1.PodGroup](r.scheme, r.Expectations, &r.JobController))); err != nil { - return err - } - } - // skip watching scheduler-plugins PodGroup if scheduler-plugins PodGroup is not installed - if _, err = mgr.GetRESTMapper().RESTMapping(schema.GroupKind{Group: schedulerpluginsv1alpha1.SchemeGroupVersion.Group, Kind: "PodGroup"}, - schedulerpluginsv1alpha1.SchemeGroupVersion.Version); err == nil { - // inject watching for job related scheduler-plugins PodGroup - if err = c.Watch(source.Kind[*schedulerpluginsv1alpha1.PodGroup](mgr.GetCache(), &schedulerpluginsv1alpha1.PodGroup{}, - handler.TypedEnqueueRequestForOwner[*schedulerpluginsv1alpha1.PodGroup](mgr.GetScheme(), mgr.GetRESTMapper(), &kubeflowv1.JAXJob{}, handler.OnlyControllerOwner()), - util.OnDependentFuncs[*schedulerpluginsv1alpha1.PodGroup](r.scheme, r.Expectations, &r.JobController))); err != nil { - return err - } - } - return nil -} - -func (r *JAXJobReconciler) ControllerName() string { - return controllerName -} - -func (r *JAXJobReconciler) GetAPIGroupVersionKind() schema.GroupVersionKind { - return kubeflowv1.GroupVersion.WithKind(kubeflowv1.JAXJobKind) -} - -func (r *JAXJobReconciler) GetAPIGroupVersion() schema.GroupVersion { - return kubeflowv1.GroupVersion -} - -func (r *JAXJobReconciler) GetGroupNameLabelValue() string { - return kubeflowv1.GroupVersion.Group -} - -func (r *JAXJobReconciler) GetFrameworkName() string { - return kubeflowv1.JAXJobFrameworkName -} - -func (r *JAXJobReconciler) GetJobFromInformerCache(namespace, name string) (metav1.Object, error) { - job := &kubeflowv1.JAXJob{} - err := r.client.Get(context.Background(), types.NamespacedName{Namespace: namespace, Name: name}, job) - if err != nil { - if errors.IsNotFound(err) { - logrus.Error(err, "jax job not found", "namespace", namespace, "name", name) - } else { - logrus.Error(err, "failed to get job from api-server", "namespace", namespace, "name", name) - } - return nil, err - } - return job, nil -} - -func (r *JAXJobReconciler) GetJobFromAPIClient(namespace, name string) (metav1.Object, error) { - job := &kubeflowv1.JAXJob{} - - err := r.apiReader.Get(context.Background(), types.NamespacedName{Namespace: namespace, Name: name}, job) - if err != nil { - if errors.IsNotFound(err) { - logrus.Error(err, "jax job not found", "namespace", namespace, "name", name) - } else { - logrus.Error(err, "failed to get job from api-server", "namespace", namespace, "name", name) - } - return nil, err - } - return job, nil -} - -func (r *JAXJobReconciler) GetPodsForJob(obj interface{}) ([]*corev1.Pod, error) { - job, err := meta.Accessor(obj) - if err != nil { - return nil, err - } - - // List all pods to include those that don't match the selector anymore - // but have a ControllerRef pointing to this controller. - podlist := &corev1.PodList{} - err = r.client.List(context.Background(), podlist, client.MatchingLabels(r.GenLabels(job.GetName())), client.InNamespace(job.GetNamespace())) - if err != nil { - return nil, err - } - - return util.JobControlledPodList(podlist.Items, job), nil -} - -func (r *JAXJobReconciler) GetServicesForJob(obj interface{}) ([]*corev1.Service, error) { - job, err := meta.Accessor(obj) - if err != nil { - return nil, err - } - - // List all pods to include those that don't match the selector anymore - // but have a ControllerRef pointing to this controller. - serviceList := &corev1.ServiceList{} - err = r.client.List(context.Background(), serviceList, client.MatchingLabels(r.GenLabels(job.GetName())), client.InNamespace(job.GetNamespace())) - if err != nil { - return nil, err - } - - ret := util.ConvertServiceList(serviceList.Items) - return ret, nil -} - -func (r *JAXJobReconciler) DeleteJob(job interface{}) error { - jaxjob, ok := job.(*kubeflowv1.JAXJob) - if !ok { - return fmt.Errorf("%+v is not a type of JAXJob", job) - } - if err := r.client.Delete(context.Background(), jaxjob); err != nil { - r.recorder.Eventf(jaxjob, corev1.EventTypeWarning, control.FailedDeletePodReason, "Error deleting: %v", err) - logrus.Error(err, "failed to delete job", "namespace", jaxjob.Namespace, "name", jaxjob.Name) - return err - } - r.recorder.Eventf(jaxjob, corev1.EventTypeNormal, control.SuccessfulDeletePodReason, "Deleted job: %v", jaxjob.Name) - logrus.Info("job deleted", "namespace", jaxjob.Namespace, "name", jaxjob.Name) - trainingoperatorcommon.DeletedJobsCounterInc(jaxjob.Namespace, r.GetFrameworkName()) - return nil -} - -func (r *JAXJobReconciler) GenLabelSelector(jobName string, - rtype kubeflowv1.ReplicaType) *metav1.LabelSelector { - labels := r.GenLabels(jobName) - labels[kubeflowv1.ReplicaTypeLabel] = strings.ToLower(string(rtype)) - - return &metav1.LabelSelector{ - MatchLabels: labels, - } -} - -// UpdateJobStatus updates the job status and job conditions -func (r *JAXJobReconciler) UpdateJobStatus(job interface{}, - replicas map[kubeflowv1.ReplicaType]*kubeflowv1.ReplicaSpec, - jobStatus *kubeflowv1.JobStatus) error { - jaxjob, ok := job.(*kubeflowv1.JAXJob) - if !ok { - return fmt.Errorf("%+v is not a type of JAXJob", job) - } - jaxjobKey, err := common.KeyFunc(jaxjob) - if err != nil { - utilruntime.HandleError(fmt.Errorf("couldn't get key for jaxjob object %#v: %v", jaxjob, err)) - return err - } - - logger := commonutil.LoggerForJob(jaxjob) - - // Set StartTime. - if jobStatus.StartTime == nil { - now := metav1.Now() - jobStatus.StartTime = &now - // enqueue a sync to check if job past ActiveDeadlineSeconds - if jaxjob.Spec.RunPolicy.ActiveDeadlineSeconds != nil { - logger.Infof("Job with ActiveDeadlineSeconds will sync after %d seconds", *jaxjob.Spec.RunPolicy.ActiveDeadlineSeconds) - r.WorkQueue.AddAfter(jaxjobKey, time.Duration(*jaxjob.Spec.RunPolicy.ActiveDeadlineSeconds)*time.Second) - } - } - - for rtype, spec := range replicas { - status := jobStatus.ReplicaStatuses[rtype] - // Generate the label selector. - status.Selector = metav1.FormatLabelSelector(r.GenLabelSelector(jaxjob.Name, rtype)) - - succeeded := status.Succeeded - expected := *(spec.Replicas) - succeeded - running := status.Active - failed := status.Failed - specReplicas := *spec.Replicas - - logrus.Infof("JAXJob=%s, ReplicaType=%s expected=%d, running=%d, succeeded=%d, failed=%d, Replicas=%d", - jaxjob.Name, rtype, expected, running, succeeded, failed, specReplicas) - - if rtype == kubeflowv1.JAXJobReplicaTypeWorker { - if expected == 0 { - msg := fmt.Sprintf("JAXJob %s/%s successfully completed.", - jaxjob.Namespace, jaxjob.Name) - r.recorder.Event(jaxjob, corev1.EventTypeNormal, commonutil.NewReason(kubeflowv1.JAXJobKind, commonutil.JobSucceededReason), msg) - if jobStatus.CompletionTime == nil { - now := metav1.Now() - jobStatus.CompletionTime = &now - } - commonutil.UpdateJobConditions(jobStatus, kubeflowv1.JobSucceeded, corev1.ConditionTrue, commonutil.NewReason(kubeflowv1.JAXJobKind, commonutil.JobSucceededReason), msg) - trainingoperatorcommon.SuccessfulJobsCounterInc(jaxjob.Namespace, r.GetFrameworkName()) - } else if running > 0 { - // Some workers are still running, leave a running condition. - msg := fmt.Sprintf("JAXJob %s/%s is running.", - jaxjob.Namespace, jaxjob.Name) - commonutil.UpdateJobConditions(jobStatus, kubeflowv1.JobRunning, corev1.ConditionTrue, commonutil.NewReason(kubeflowv1.JAXJobKind, commonutil.JobRunningReason), msg) - } - } - - if failed > 0 && (specReplicas > succeeded+running) { - if spec.RestartPolicy != kubeflowv1.RestartPolicyNever { - msg := fmt.Sprintf("JAXJob %s is restarting because %d %s replica(s) failed.", jaxjob.Name, failed, rtype) - r.Recorder.Event(jaxjob, corev1.EventTypeWarning, commonutil.NewReason(kubeflowv1.JAXJobKind, commonutil.JobRestartingReason), msg) - commonutil.UpdateJobConditions(jobStatus, kubeflowv1.JobRestarting, corev1.ConditionTrue, commonutil.NewReason(kubeflowv1.JAXJobKind, commonutil.JobRestartingReason), msg) - trainingoperatorcommon.RestartedJobsCounterInc(jaxjob.Namespace, r.GetFrameworkName()) - } else { - msg := fmt.Sprintf("JAXJob %s is failed because %d %s replica(s) failed.", jaxjob.Name, failed, rtype) - r.Recorder.Event(jaxjob, corev1.EventTypeNormal, commonutil.NewReason(kubeflowv1.JAXJobKind, commonutil.JobFailedReason), msg) - if jobStatus.CompletionTime == nil { - now := metav1.Now() - jobStatus.CompletionTime = &now - } - commonutil.UpdateJobConditions(jobStatus, kubeflowv1.JobFailed, corev1.ConditionTrue, commonutil.NewReason(kubeflowv1.JAXJobKind, commonutil.JobFailedReason), msg) - trainingoperatorcommon.FailedJobsCounterInc(jaxjob.Namespace, r.GetFrameworkName()) - } - } - } - return nil -} - -// UpdateJobStatusInApiServer updates the job status in to cluster. -func (r *JAXJobReconciler) UpdateJobStatusInApiServer(job interface{}, jobStatus *kubeflowv1.JobStatus) error { - if jobStatus.ReplicaStatuses == nil { - jobStatus.ReplicaStatuses = map[kubeflowv1.ReplicaType]*kubeflowv1.ReplicaStatus{} - } - - jaxjob, ok := job.(*kubeflowv1.JAXJob) - trainingoperatorcommon.ClearGeneratedFields(&jaxjob.ObjectMeta) - if !ok { - return fmt.Errorf("%+v is not a type of JAXJob", job) - } - - // Job status passed in differs with status in job, update in basis of the passed in one. - if !equality.Semantic.DeepEqual(&jaxjob.Status, jobStatus) { - jaxjob = jaxjob.DeepCopy() - jaxjob.Status = *jobStatus.DeepCopy() - } - - result := r.client.Status().Update(context.Background(), jaxjob) - - if result != nil { - r.log.WithValues("jaxjob", types.NamespacedName{ - Namespace: jaxjob.GetNamespace(), - Name: jaxjob.GetName(), - }) - return result - } - - return nil -} - -// SetClusterSpec sets the cluster spec and init container for the pod -func (r *JAXJobReconciler) SetClusterSpec(job interface{}, podTemplate *corev1.PodTemplateSpec, rtype, index string) error { - jaxjob, ok := job.(*kubeflowv1.JAXJob) - if !ok { - return fmt.Errorf("%+v is not a type of JAXJob", job) - } - if err := setPodEnv(jaxjob, podTemplate, rtype, index); err != nil { - return err - } - return nil -} - -func (r *JAXJobReconciler) GetDefaultContainerName() string { - return kubeflowv1.JAXJobDefaultContainerName -} - -func (r *JAXJobReconciler) GetDefaultContainerPortName() string { - return kubeflowv1.JAXJobDefaultPortName -} - -func (r *JAXJobReconciler) IsMasterRole(replicas map[kubeflowv1.ReplicaType]*kubeflowv1.ReplicaSpec, - rtype kubeflowv1.ReplicaType, index int) bool { - return index == 0 -} - -// onOwnerCreateFunc modify creation condition. -func (r *JAXJobReconciler) onOwnerCreateFunc() func(createEvent event.TypedCreateEvent[*kubeflowv1.JAXJob]) bool { - return func(e event.TypedCreateEvent[*kubeflowv1.JAXJob]) bool { - jaxjob := e.Object - r.scheme.Default(jaxjob) - msg := fmt.Sprintf("JAXJob %s is created.", e.Object.GetName()) - logrus.Info(msg) - trainingoperatorcommon.CreatedJobsCounterInc(jaxjob.Namespace, r.GetFrameworkName()) - commonutil.UpdateJobConditions(&jaxjob.Status, kubeflowv1.JobCreated, corev1.ConditionTrue, commonutil.NewReason(kubeflowv1.JAXJobKind, commonutil.JobCreatedReason), msg) - return true - } -} diff --git a/pkg/controller.v1/jax/jaxjob_controller_suite_test.go b/pkg/controller.v1/jax/jaxjob_controller_suite_test.go deleted file mode 100644 index a9471d9c83..0000000000 --- a/pkg/controller.v1/jax/jaxjob_controller_suite_test.go +++ /dev/null @@ -1,125 +0,0 @@ -// Copyright 2024 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package jax - -import ( - "context" - "crypto/tls" - "fmt" - "net" - "path/filepath" - "testing" - "time" - - kubeflowv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - "github.com/kubeflow/training-operator/pkg/controller.v1/common" - jaxwebhook "github.com/kubeflow/training-operator/pkg/webhooks/jax" - - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" - "k8s.io/client-go/kubernetes/scheme" - ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/envtest" - logf "sigs.k8s.io/controller-runtime/pkg/log" - "sigs.k8s.io/controller-runtime/pkg/log/zap" - metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" - "sigs.k8s.io/controller-runtime/pkg/webhook" - "volcano.sh/apis/pkg/apis/scheduling/v1beta1" - //+kubebuilder:scaffold:imports -) - -var ( - testK8sClient client.Client - testEnv *envtest.Environment - testCtx context.Context - testCancel context.CancelFunc -) - -func TestAPIs(t *testing.T) { - RegisterFailHandler(Fail) - - RunSpecs(t, "Controller Suite") -} - -var _ = BeforeSuite(func() { - logf.SetLogger(zap.New(zap.WriteTo(GinkgoWriter), zap.UseDevMode(true))) - - testCtx, testCancel = context.WithCancel(context.TODO()) - - By("bootstrapping test environment") - testEnv = &envtest.Environment{ - CRDDirectoryPaths: []string{filepath.Join("..", "..", "..", "manifests", "base", "crds")}, - ErrorIfCRDPathMissing: true, - WebhookInstallOptions: envtest.WebhookInstallOptions{ - Paths: []string{filepath.Join("..", "..", "..", "manifests", "base", "webhook", "manifests.yaml")}, - }, - } - - cfg, err := testEnv.Start() - Expect(err).NotTo(HaveOccurred()) - Expect(cfg).NotTo(BeNil()) - - err = v1beta1.AddToScheme(scheme.Scheme) - Expect(err).NotTo(HaveOccurred()) - err = kubeflowv1.AddToScheme(scheme.Scheme) - Expect(err).NotTo(HaveOccurred()) - - //+kubebuilder:scaffold:scheme - - testK8sClient, err = client.New(cfg, client.Options{Scheme: scheme.Scheme}) - Expect(err).NotTo(HaveOccurred()) - Expect(testK8sClient).NotTo(BeNil()) - - mgr, err := ctrl.NewManager(cfg, ctrl.Options{ - Metrics: metricsserver.Options{ - BindAddress: "0", - }, - WebhookServer: webhook.NewServer( - webhook.Options{ - Host: testEnv.WebhookInstallOptions.LocalServingHost, - Port: testEnv.WebhookInstallOptions.LocalServingPort, - CertDir: testEnv.WebhookInstallOptions.LocalServingCertDir, - }), - }) - Expect(err).NotTo(HaveOccurred()) - - gangSchedulingSetupFunc := common.GenNonGangSchedulerSetupFunc() - r := NewReconciler(mgr, gangSchedulingSetupFunc) - - Expect(r.SetupWithManager(mgr, 1)).NotTo(HaveOccurred()) - Expect(jaxwebhook.SetupWebhook(mgr)).NotTo(HaveOccurred()) - - go func() { - defer GinkgoRecover() - err = mgr.Start(testCtx) - Expect(err).ToNot(HaveOccurred(), "failed to run manager") - }() - - dialer := &net.Dialer{Timeout: time.Second} - addrPort := fmt.Sprintf("%s:%d", testEnv.WebhookInstallOptions.LocalServingHost, testEnv.WebhookInstallOptions.LocalServingPort) - Eventually(func(g Gomega) { - conn, err := tls.DialWithDialer(dialer, "tcp", addrPort, &tls.Config{InsecureSkipVerify: true}) - g.Expect(err).NotTo(HaveOccurred()) - g.Expect(conn.Close()).NotTo(HaveOccurred()) - }).Should(Succeed()) -}) - -var _ = AfterSuite(func() { - By("tearing down the test environment") - testCancel() - err := testEnv.Stop() - Expect(err).NotTo(HaveOccurred()) -}) diff --git a/pkg/controller.v1/jax/jaxjob_controller_test.go b/pkg/controller.v1/jax/jaxjob_controller_test.go deleted file mode 100644 index 7a6255aef0..0000000000 --- a/pkg/controller.v1/jax/jaxjob_controller_test.go +++ /dev/null @@ -1,316 +0,0 @@ -// Copyright 2024 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package jax - -import ( - "context" - "fmt" - - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" - corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/errors" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/types" - "k8s.io/utils/ptr" - "sigs.k8s.io/controller-runtime/pkg/client" - - kubeflowv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - commonutil "github.com/kubeflow/training-operator/pkg/util" - "github.com/kubeflow/training-operator/pkg/util/testutil" -) - -var _ = Describe("JAXJob controller", func() { - // Define utility constants for object names. - const ( - expectedPort = int32(6666) - ) - - Context("When creating the JAXJob", func() { - const name = "test-job" - var ( - ns *corev1.Namespace - job *kubeflowv1.JAXJob - jobKey types.NamespacedName - worker0Key types.NamespacedName - ctx = context.Background() - ) - BeforeEach(func() { - ns = &corev1.Namespace{ - ObjectMeta: metav1.ObjectMeta{ - GenerateName: "jax-test-", - }, - } - Expect(testK8sClient.Create(ctx, ns)).Should(Succeed()) - - job = &kubeflowv1.JAXJob{ - ObjectMeta: metav1.ObjectMeta{ - Name: name, - Namespace: ns.Name, - }, - } - jobKey = client.ObjectKeyFromObject(job) - - worker0Key = types.NamespacedName{ - Name: fmt.Sprintf("%s-worker-0", name), - Namespace: ns.Name, - } - job.Spec.JAXReplicaSpecs = map[kubeflowv1.ReplicaType]*kubeflowv1.ReplicaSpec{ - kubeflowv1.JAXJobReplicaTypeWorker: { - Replicas: ptr.To[int32](2), - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Image: "test-image", - Name: kubeflowv1.JAXJobDefaultContainerName, - Ports: []corev1.ContainerPort{ - { - Name: kubeflowv1.JAXJobDefaultPortName, - ContainerPort: expectedPort, - Protocol: corev1.ProtocolTCP, - }, - }, - }, - }, - }, - }, - }, - } - }) - AfterEach(func() { - Expect(testK8sClient.Delete(ctx, job)).Should(Succeed()) - Expect(testK8sClient.Delete(ctx, ns)).Should(Succeed()) - }) - - It("Shouldn't create resources if JAXJob is suspended", func() { - By("By creating a new JAXJob with suspend=true") - job.Spec.RunPolicy.Suspend = ptr.To(true) - job.Spec.JAXReplicaSpecs[kubeflowv1.JAXJobReplicaTypeWorker].Replicas = ptr.To[int32](1) - Expect(testK8sClient.Create(ctx, job)).Should(Succeed()) - - created := &kubeflowv1.JAXJob{} - workerPod := &corev1.Pod{} - workerSvc := &corev1.Service{} - - By("Checking created JAXJob") - Eventually(func() bool { - err := testK8sClient.Get(ctx, jobKey, created) - return err == nil - }, testutil.Timeout, testutil.Interval).Should(BeTrue()) - By("Checking created JAXJob has a nil startTime") - Consistently(func() *metav1.Time { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - return created.Status.StartTime - }, testutil.ConsistentDuration, testutil.Interval).Should(BeNil()) - - By("Checking if the pods and services aren't created") - Consistently(func() bool { - errWorkerPod := testK8sClient.Get(ctx, worker0Key, workerPod) - errWorkerSvc := testK8sClient.Get(ctx, worker0Key, workerSvc) - return errors.IsNotFound(errWorkerPod) && - errors.IsNotFound(errWorkerSvc) - }, testutil.ConsistentDuration, testutil.Interval).Should(BeTrue()) - - By("Checking if the JAXJob has suspended condition") - Eventually(func() []kubeflowv1.JobCondition { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - return created.Status.Conditions - }, testutil.ConsistentDuration, testutil.Interval).Should(BeComparableTo([]kubeflowv1.JobCondition{ - { - Type: kubeflowv1.JobCreated, - Status: corev1.ConditionTrue, - Reason: commonutil.NewReason(kubeflowv1.JAXJobKind, commonutil.JobCreatedReason), - Message: fmt.Sprintf("JAXJob %s is created.", name), - }, - { - Type: kubeflowv1.JobSuspended, - Status: corev1.ConditionTrue, - Reason: commonutil.NewReason(kubeflowv1.JAXJobKind, commonutil.JobSuspendedReason), - Message: fmt.Sprintf("JAXJob %s is suspended.", name), - }, - }, testutil.IgnoreJobConditionsTimes)) - }) - - It("Should delete resources after JAXJob is suspended; Should resume JAXJob after JAXJob is unsuspended", func() { - By("By creating a new JAXJob") - job.Spec.JAXReplicaSpecs[kubeflowv1.JAXJobReplicaTypeWorker].Replicas = ptr.To[int32](1) - Expect(testK8sClient.Create(ctx, job)).Should(Succeed()) - - created := &kubeflowv1.JAXJob{} - workerPod := &corev1.Pod{} - workerSvc := &corev1.Service{} - - // We'll need to retry getting this newly created JAXJob, given that creation may not immediately happen. - By("Checking created JAXJob") - Eventually(func() bool { - err := testK8sClient.Get(ctx, jobKey, created) - return err == nil - }, testutil.Timeout, testutil.Interval).Should(BeTrue()) - - var startTimeBeforeSuspended *metav1.Time - Eventually(func() *metav1.Time { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - startTimeBeforeSuspended = created.Status.StartTime - return startTimeBeforeSuspended - }, testutil.Timeout, testutil.Interval).ShouldNot(BeNil()) - - By("Checking the created pods and services") - Eventually(func() bool { - errWorker := testK8sClient.Get(ctx, worker0Key, workerPod) - return errWorker == nil - }, testutil.Timeout, testutil.Interval).Should(BeTrue()) - Eventually(func() bool { - errWorker := testK8sClient.Get(ctx, worker0Key, workerSvc) - return errWorker == nil - }, testutil.Timeout, testutil.Interval).Should(BeTrue()) - - By("Updating the pod's phase with Running") - Eventually(func() error { - Expect(testK8sClient.Get(ctx, worker0Key, workerPod)).Should(Succeed()) - workerPod.Status.Phase = corev1.PodRunning - return testK8sClient.Status().Update(ctx, workerPod) - }, testutil.Timeout, testutil.Interval).Should(Succeed()) - - By("Checking the JAXJob's condition") - Eventually(func() []kubeflowv1.JobCondition { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - return created.Status.Conditions - }, testutil.Timeout, testutil.Interval).Should(BeComparableTo([]kubeflowv1.JobCondition{ - { - Type: kubeflowv1.JobCreated, - Status: corev1.ConditionTrue, - Reason: commonutil.NewReason(kubeflowv1.JAXJobKind, commonutil.JobCreatedReason), - Message: fmt.Sprintf("JAXJob %s is created.", name), - }, - { - Type: kubeflowv1.JobRunning, - Status: corev1.ConditionTrue, - Reason: commonutil.NewReason(kubeflowv1.JAXJobKind, commonutil.JobRunningReason), - Message: fmt.Sprintf("JAXJob %s/%s is running.", ns.Name, name), - }, - }, testutil.IgnoreJobConditionsTimes)) - - By("Updating the JAXJob with suspend=true") - Eventually(func() error { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - created.Spec.RunPolicy.Suspend = ptr.To(true) - return testK8sClient.Update(ctx, created) - }, testutil.Timeout, testutil.Interval).Should(Succeed()) - - By("Checking if the pods and services are removed") - Eventually(func() bool { - errWorker := testK8sClient.Get(ctx, worker0Key, workerPod) - return errors.IsNotFound(errWorker) - }, testutil.Timeout, testutil.Interval).Should(BeTrue()) - Eventually(func() bool { - errWorker := testK8sClient.Get(ctx, worker0Key, workerSvc) - return errors.IsNotFound(errWorker) - }, testutil.Timeout, testutil.Interval).Should(BeTrue()) - Consistently(func() bool { - errWorkerPod := testK8sClient.Get(ctx, worker0Key, workerPod) - errWorkerSvc := testK8sClient.Get(ctx, worker0Key, workerSvc) - return errors.IsNotFound(errWorkerPod) && - errors.IsNotFound(errWorkerSvc) - }, testutil.ConsistentDuration, testutil.Interval).Should(BeTrue()) - - By("Checking if the JAXJob has a suspended condition") - Eventually(func() bool { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - return created.Status.ReplicaStatuses[kubeflowv1.JAXJobReplicaTypeWorker].Active == 0 && - created.Status.StartTime.Equal(startTimeBeforeSuspended) - }, testutil.Timeout, testutil.Interval).Should(BeTrue()) - Consistently(func() bool { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - return created.Status.ReplicaStatuses[kubeflowv1.JAXJobReplicaTypeWorker].Active == 0 && - created.Status.StartTime.Equal(startTimeBeforeSuspended) - }, testutil.ConsistentDuration, testutil.Interval).Should(BeTrue()) - Expect(created.Status.Conditions).Should(BeComparableTo([]kubeflowv1.JobCondition{ - { - Type: kubeflowv1.JobCreated, - Status: corev1.ConditionTrue, - Reason: commonutil.NewReason(kubeflowv1.JAXJobKind, commonutil.JobCreatedReason), - Message: fmt.Sprintf("JAXJob %s is created.", name), - }, - { - Type: kubeflowv1.JobRunning, - Status: corev1.ConditionFalse, - Reason: commonutil.NewReason(kubeflowv1.JAXJobKind, commonutil.JobSuspendedReason), - Message: fmt.Sprintf("JAXJob %s is suspended.", name), - }, - { - Type: kubeflowv1.JobSuspended, - Reason: commonutil.NewReason(kubeflowv1.JAXJobKind, commonutil.JobSuspendedReason), - Message: fmt.Sprintf("JAXJob %s is suspended.", name), - Status: corev1.ConditionTrue, - }, - }, testutil.IgnoreJobConditionsTimes)) - - By("Unsuspending the JAXJob") - Eventually(func() error { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - created.Spec.RunPolicy.Suspend = ptr.To(false) - return testK8sClient.Update(ctx, created) - }, testutil.Timeout, testutil.Interval).Should(Succeed()) - Eventually(func() *metav1.Time { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - return created.Status.StartTime - }, testutil.Timeout, testutil.Interval).ShouldNot(BeNil()) - - By("Check if the pods and services are created") - Eventually(func() error { - return testK8sClient.Get(ctx, worker0Key, workerPod) - }, testutil.Timeout, testutil.Interval).Should(BeNil()) - Eventually(func() error { - return testK8sClient.Get(ctx, worker0Key, workerSvc) - }, testutil.Timeout, testutil.Interval).Should(BeNil()) - - By("Updating Pod's condition with running") - Eventually(func() error { - Expect(testK8sClient.Get(ctx, worker0Key, workerPod)).Should(Succeed()) - workerPod.Status.Phase = corev1.PodRunning - return testK8sClient.Status().Update(ctx, workerPod) - }, testutil.Timeout, testutil.Interval).Should(Succeed()) - - By("Checking if the JAXJob has resumed conditions") - Eventually(func() []kubeflowv1.JobCondition { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - return created.Status.Conditions - }, testutil.Timeout, testutil.Interval).Should(BeComparableTo([]kubeflowv1.JobCondition{ - { - Type: kubeflowv1.JobCreated, - Status: corev1.ConditionTrue, - Reason: commonutil.NewReason(kubeflowv1.JAXJobKind, commonutil.JobCreatedReason), - Message: fmt.Sprintf("JAXJob %s is created.", name), - }, - { - Type: kubeflowv1.JobSuspended, - Reason: commonutil.NewReason(kubeflowv1.JAXJobKind, commonutil.JobResumedReason), - Message: fmt.Sprintf("JAXJob %s is resumed.", name), - Status: corev1.ConditionFalse, - }, - { - Type: kubeflowv1.JobRunning, - Status: corev1.ConditionTrue, - Reason: commonutil.NewReason(kubeflowv1.JAXJobKind, commonutil.JobRunningReason), - Message: fmt.Sprintf("JAXJob %s/%s is running.", ns.Name, name), - }, - }, testutil.IgnoreJobConditionsTimes)) - - By("Checking if the startTime is updated") - Expect(created.Status.StartTime).ShouldNot(Equal(startTimeBeforeSuspended)) - }) - }) -}) diff --git a/pkg/controller.v1/mpi/mpijob.go b/pkg/controller.v1/mpi/mpijob.go deleted file mode 100644 index accb6788ac..0000000000 --- a/pkg/controller.v1/mpi/mpijob.go +++ /dev/null @@ -1,268 +0,0 @@ -// Copyright 2019 The Kubeflow Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package mpi - -import ( - "strings" - - kubeflowv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/labels" -) - -const ( - configSuffix = "-config" - configVolumeName = "mpi-job-config" - configMountPath = "/etc/mpi" - kubexecScriptName = "kubexec.sh" - hostfileName = "hostfile" - discoverHostsScriptName = "discover_hosts.sh" - kubectlDeliveryName = "kubectl-delivery" - kubectlTargetDirEnv = "TARGET_DIR" - kubectlVolumeName = "mpi-job-kubectl" - kubectlMountPath = "/opt/kube" - launcher = "launcher" - worker = "worker" - launcherSuffix = "-launcher" - workerSuffix = "-worker" - gpuResourceNameSuffix = ".com/gpu" - gpuResourceNamePattern = "gpu" - initContainerCpu = "100m" - initContainerEphStorage = "5Gi" - initContainerMem = "512Mi" - iMPIDefaultBootstrap = "rsh" -) - -const ( - // ErrResourceExists is used as part of the Event 'reason' when an MPIJob - // fails to sync due to dependent resources of the same name already - // existing. - ErrResourceExists = "ErrResourceExists" - - // MessageResourceExists is the message used for Events when a resource - // fails to sync due to dependent resources already existing. - MessageResourceExists = "Resource %q of MPIJobKind %q already exists and is not managed by MPIJob" - - // ErrResourceDoesNotExist is used as part of the Event 'reason' when some - // resource is missing in yaml - ErrResourceDoesNotExist = "ErrResourceDoesNotExist" - - // MessageResourceDoesNotExist is used for Events when some - // resource is missing in yaml - MessageResourceDoesNotExist = "Resource %q is missing in yaml" - - // podTemplateRestartPolicyReason is the warning reason when the restart - // policy is set in pod template. - podTemplateRestartPolicyReason = "SettedPodTemplateRestartPolicy" - - // podTemplateSchedulerNameReason is the warning reason when other scheduler name is set - // in pod templates with gang-scheduling enabled - podTemplateSchedulerNameReason = "SettedPodTemplateSchedulerName" - - // mpiJobEvict - mpiJobEvict = "MPIJobEvicted" -) - -// initializeMPIJobStatuses initializes the ReplicaStatuses for MPIJob. -func initializeMPIJobStatuses(mpiJob *kubeflowv1.MPIJob, rType kubeflowv1.ReplicaType) { - if mpiJob.Status.ReplicaStatuses == nil { - mpiJob.Status.ReplicaStatuses = make(map[kubeflowv1.ReplicaType]*kubeflowv1.ReplicaStatus) - } - - mpiJob.Status.ReplicaStatuses[rType] = &kubeflowv1.ReplicaStatus{} -} - -// updateMPIJobConditions updates the conditions of the given mpiJob. -func updateMPIJobConditions(mpiJob *kubeflowv1.MPIJob, conditionType kubeflowv1.JobConditionType, reason, message string) error { - condition := newCondition(conditionType, reason, message) - setCondition(&mpiJob.Status, condition) - return nil -} - -// newCondition creates a new mpiJob condition. -func newCondition(conditionType kubeflowv1.JobConditionType, reason, message string) kubeflowv1.JobCondition { - return kubeflowv1.JobCondition{ - Type: conditionType, - Status: corev1.ConditionTrue, - LastUpdateTime: metav1.Now(), - LastTransitionTime: metav1.Now(), - Reason: reason, - Message: message, - } -} - -// getCondition returns the condition with the provided type. -func getCondition(status kubeflowv1.JobStatus, condType kubeflowv1.JobConditionType) *kubeflowv1.JobCondition { - for _, condition := range status.Conditions { - if condition.Type == condType { - return &condition - } - } - return nil -} - -func isEvicted(status kubeflowv1.JobStatus) bool { - for _, condition := range status.Conditions { - if condition.Type == kubeflowv1.JobFailed && - condition.Status == corev1.ConditionTrue && - condition.Reason == mpiJobEvict { - return true - } - } - return false -} - -// setCondition updates the mpiJob to include the provided condition. -// If the condition that we are about to add already exists -// and has the same status and reason then we are not going to update. -func setCondition(status *kubeflowv1.JobStatus, condition kubeflowv1.JobCondition) { - - currentCond := getCondition(*status, condition.Type) - - // Do nothing if condition doesn't change - if currentCond != nil && currentCond.Status == condition.Status && currentCond.Reason == condition.Reason { - return - } - - // Do not update lastTransitionTime if the status of the condition doesn't change. - if currentCond != nil && currentCond.Status == condition.Status { - condition.LastTransitionTime = currentCond.LastTransitionTime - } - - // Append the updated condition - newConditions := filterOutCondition(status.Conditions, condition.Type) - status.Conditions = append(newConditions, condition) -} - -// filterOutCondition returns a new slice of mpiJob conditions without conditions with the provided type. -func filterOutCondition(conditions []kubeflowv1.JobCondition, condType kubeflowv1.JobConditionType) []kubeflowv1.JobCondition { - var newConditions []kubeflowv1.JobCondition - for _, c := range conditions { - if condType == kubeflowv1.JobRestarting && c.Type == kubeflowv1.JobRunning { - continue - } - if condType == kubeflowv1.JobRunning && c.Type == kubeflowv1.JobRestarting { - continue - } - - if c.Type == condType { - continue - } - - // Set the running condition status to be false when current condition failed or succeeded - if (condType == kubeflowv1.JobFailed || condType == kubeflowv1.JobSucceeded) && (c.Type == kubeflowv1.JobRunning || c.Type == kubeflowv1.JobFailed) { - c.Status = corev1.ConditionFalse - } - - newConditions = append(newConditions, c) - } - return newConditions -} - -func isPodFinished(j *corev1.Pod) bool { - return isPodSucceeded(j) || isPodFailed(j) -} - -func isPodFailed(p *corev1.Pod) bool { - return p.Status.Phase == corev1.PodFailed -} - -func isPodSucceeded(p *corev1.Pod) bool { - return p.Status.Phase == corev1.PodSucceeded -} - -func isPodRunning(p *corev1.Pod) bool { - return p.Status.Phase == corev1.PodRunning -} - -// isGPULauncher checks whether the launcher needs GPU. -func isGPULauncher(mpiJob *kubeflowv1.MPIJob) bool { - for _, container := range mpiJob.Spec.MPIReplicaSpecs[kubeflowv1.MPIJobReplicaTypeLauncher].Template.Spec.Containers { - for key := range container.Resources.Limits { - if strings.HasSuffix(string(key), gpuResourceNameSuffix) { - return true - } - if strings.Contains(string(key), gpuResourceNamePattern) { - return true - } - } - } - return false -} - -// hasIntelMPIBootstrapValues returns the existence of I_MPI_HYDRA_BOOTSTRAP -// and I_MPI_HYDRA_BOOTSTRAP_EXEC values. -// There are also _EXEC_EXTRA_ARGS and _AUTOFORK under the I_MPI_HYDRA_BOOTSTRAP -// prefix but those are not checked on purpose. -func hasIntelMPIBootstrapValues(envs []corev1.EnvVar) (bootstrap, exec bool) { - for _, env := range envs { - if env.Name == "I_MPI_HYDRA_BOOTSTRAP" { - bootstrap = true - } else if env.Name == "I_MPI_HYDRA_BOOTSTRAP_EXEC" { - exec = true - } - - if bootstrap && exec { - break - } - } - - return bootstrap, exec -} - -func defaultReplicaLabels(genericLabels map[string]string, roleLabelVal string) map[string]string { - replicaLabels := map[string]string{} - for k, v := range genericLabels { - replicaLabels[k] = v - } - - replicaLabels[kubeflowv1.ReplicaTypeLabel] = roleLabelVal - return replicaLabels -} - -func defaultWorkerLabels(genericLabels map[string]string) map[string]string { - return defaultReplicaLabels(genericLabels, worker) -} - -func defaultLauncherLabels(genericLabels map[string]string) map[string]string { - return defaultReplicaLabels(genericLabels, launcher) -} - -func workerSelector(genericLabels map[string]string) (labels.Selector, error) { - labels := defaultWorkerLabels(genericLabels) - - labelSelector := metav1.LabelSelector{ - MatchLabels: labels, - } - - selector, err := metav1.LabelSelectorAsSelector(&labelSelector) - if err != nil { - return nil, err - } - - return selector, nil -} - -// initializeReplicaStatuses initializes the ReplicaStatuses for replica. -// originally from pkg/controller.v1/tensorflow/status.go (deleted) -func initializeReplicaStatuses(jobStatus *kubeflowv1.JobStatus, rtype kubeflowv1.ReplicaType) { - if jobStatus.ReplicaStatuses == nil { - jobStatus.ReplicaStatuses = make(map[kubeflowv1.ReplicaType]*kubeflowv1.ReplicaStatus) - } - - jobStatus.ReplicaStatuses[rtype] = &kubeflowv1.ReplicaStatus{} -} diff --git a/pkg/controller.v1/mpi/mpijob_controller.go b/pkg/controller.v1/mpi/mpijob_controller.go deleted file mode 100644 index e85b9d6ce3..0000000000 --- a/pkg/controller.v1/mpi/mpijob_controller.go +++ /dev/null @@ -1,1400 +0,0 @@ -// Copyright 2021 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package mpi - -import ( - "bytes" - "context" - "errors" - "fmt" - "reflect" - "sort" - "strconv" - "strings" - "time" - - "github.com/go-logr/logr" - "github.com/sirupsen/logrus" - corev1 "k8s.io/api/core/v1" - rbacv1 "k8s.io/api/rbac/v1" - apierrors "k8s.io/apimachinery/pkg/api/errors" - "k8s.io/apimachinery/pkg/api/resource" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/runtime/schema" - "k8s.io/apimachinery/pkg/types" - "k8s.io/client-go/informers" - kubeclientset "k8s.io/client-go/kubernetes" - "k8s.io/client-go/tools/record" - "k8s.io/klog/v2" - ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/controller" - "sigs.k8s.io/controller-runtime/pkg/event" - "sigs.k8s.io/controller-runtime/pkg/handler" - "sigs.k8s.io/controller-runtime/pkg/log" - "sigs.k8s.io/controller-runtime/pkg/manager" - "sigs.k8s.io/controller-runtime/pkg/predicate" - "sigs.k8s.io/controller-runtime/pkg/source" - schedulerpluginsv1alpha1 "sigs.k8s.io/scheduler-plugins/apis/scheduling/v1alpha1" - "volcano.sh/apis/pkg/apis/scheduling/v1beta1" - - kubeflowv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - trainingoperatorcommon "github.com/kubeflow/training-operator/pkg/common" - "github.com/kubeflow/training-operator/pkg/common/util" - ctlrconfig "github.com/kubeflow/training-operator/pkg/config" - "github.com/kubeflow/training-operator/pkg/controller.v1/common" - "github.com/kubeflow/training-operator/pkg/controller.v1/control" - "github.com/kubeflow/training-operator/pkg/controller.v1/expectation" - commonutil "github.com/kubeflow/training-operator/pkg/util" -) - -const ( - FailedDeleteJobReason = "FailedDeleteJob" - SuccessfulDeleteJobReason = "SuccessfulDeleteJob" - - controllerName = "mpijob-controller" - labelMPIJobName = "mpi-job-name" -) - -func NewReconciler(mgr manager.Manager, gangSchedulingSetupFunc common.GangSchedulingSetupFunc) *MPIJobReconciler { - r := &MPIJobReconciler{ - Client: mgr.GetClient(), - Scheme: mgr.GetScheme(), - recorder: mgr.GetEventRecorderFor(controllerName), - apiReader: mgr.GetAPIReader(), - Log: log.Log, - } - - cfg := mgr.GetConfig() - kubeClientSet := kubeclientset.NewForConfigOrDie(cfg) - sharedInformers := informers.NewSharedInformerFactory(kubeClientSet, 0) - priorityClassInformer := sharedInformers.Scheduling().V1().PriorityClasses() - - r.JobController = common.JobController{ - Controller: r, - Expectations: expectation.NewControllerExpectations(), - WorkQueue: &util.FakeWorkQueue[string]{}, - Recorder: r.recorder, - KubeClientSet: kubeClientSet, - PriorityClassLister: priorityClassInformer.Lister(), - PriorityClassInformerSynced: priorityClassInformer.Informer().HasSynced, - PodControl: control.RealPodControl{KubeClient: kubeClientSet, Recorder: r.recorder}, - ServiceControl: control.RealServiceControl{KubeClient: kubeClientSet, Recorder: r.recorder}, - } - - gangSchedulingSetupFunc(&r.JobController) - - return r -} - -// MPIJobReconciler reconciles a MPIJob object -type MPIJobReconciler struct { - common.JobController - client.Client - Scheme *runtime.Scheme - recorder record.EventRecorder - apiReader client.Reader - Log logr.Logger -} - -// +kubebuilder:rbac:groups=kubeflow.org,resources=mpijobs,verbs=get;list;watch;create;update;patch;delete -// +kubebuilder:rbac:groups=kubeflow.org,resources=mpijobs/status,verbs=get;update;patch -// +kubebuilder:rbac:groups=kubeflow.org,resources=mpijobs/finalizers,verbs=update -// +kubebuilder:rbac:groups="",resources=pods,verbs=get;list;watch;create;update;patch;delete -// +kubebuilder:rbac:groups="",resources=serviceaccounts,verbs=get;list;watch;create -// +kubebuilder:rbac:groups="",resources=configmaps,verbs=list;watch;create;update -// +kubebuilder:rbac:groups="rbac.authorization.k8s.io",resources=roles,verbs=list;watch;create;update -// +kubebuilder:rbac:groups="rbac.authorization.k8s.io",resources=rolebindings,verbs=list;watch;create;update -// +kubebuilder:rbac:groups="",resources=pods/exec,verbs=create -// +kubebuilder:rbac:groups=scheduling.volcano.sh,resources=podgroups,verbs=get;list;watch;create;update;patch;delete -// +kubebuilder:rbac:groups=scheduling.x-k8s.io,resources=podgroups,verbs=get;list;watch;create;update;patch;delete -// +kubebuilder:rbac:groups="",resources=events,verbs=get;list;watch;create;update;patch;delete - -// Reconcile is part of the main kubernetes reconciliation loop which aims to -// move the current state of the cluster closer to the desired state. -func (jc *MPIJobReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { - _ = log.FromContext(ctx) - logger := jc.Log.WithValues(kubeflowv1.MPIJobSingular, req.NamespacedName) - - mpijob := &kubeflowv1.MPIJob{} - err := jc.Get(ctx, req.NamespacedName, mpijob) - if err != nil { - logger.Info(err.Error(), "unable to fetch MPIJob", req.NamespacedName.String()) - return ctrl.Result{}, client.IgnoreNotFound(err) - } - - if manager := jc.ManagedByExternalController(mpijob.Spec.RunPolicy.ManagedBy); manager != nil { - logger.Info("Skipping MPIJob managed by a custom controller", "managed-by", manager) - return ctrl.Result{}, nil - } - - if err = kubeflowv1.ValidateV1MpiJobSpec(&mpijob.Spec); err != nil { - logger.Error(err, "MPIJob failed validation") - jc.Recorder.Eventf(mpijob, corev1.EventTypeWarning, commonutil.NewReason(kubeflowv1.MPIJobKind, commonutil.JobFailedValidationReason), - "MPIJob failed validation because %s", err) - return ctrl.Result{}, err - } - - // skip for MPIJob that is being deleted - if mpijob.GetDeletionTimestamp() != nil { - return ctrl.Result{}, nil - } - - // Set default priorities to MPIJob - jc.Scheme.Default(mpijob) - - // 1) validation rules out CleanPolicy with contradicting value - // 2) if both fields leave empty, Default function fills with None - // 3) if only one field set, sync value - cleanPolicyDefined := mpijob.Spec.CleanPodPolicy - if mpijob.Spec.RunPolicy.CleanPodPolicy != nil { - cleanPolicyDefined = mpijob.Spec.RunPolicy.CleanPodPolicy - } - mpijob.Spec.CleanPodPolicy = cleanPolicyDefined - mpijob.Spec.RunPolicy.CleanPodPolicy = cleanPolicyDefined - - // Use common to reconcile the job related pod and service - // MPIJob needs not service - err = jc.ReconcileJobs(mpijob, mpijob.Spec.MPIReplicaSpecs, mpijob.Status, &mpijob.Spec.RunPolicy) - if err != nil { - logrus.Warnf("Reconcile MPIJob error %v", err) - return ctrl.Result{}, err - } - - t, err := util.DurationUntilExpireTime(&mpijob.Spec.RunPolicy, mpijob.Status) - if err != nil { - logrus.Warnf("Reconcile MPIJob Job error %v", err) - return ctrl.Result{}, err - } - if t >= 0 { - return ctrl.Result{Requeue: true, RequeueAfter: t}, nil - } - - return ctrl.Result{}, nil -} - -// SetupWithManager sets up the controller with the Manager. -func (jc *MPIJobReconciler) SetupWithManager(mgr ctrl.Manager, controllerThreads int) error { - c, err := controller.New(jc.ControllerName(), mgr, controller.Options{ - Reconciler: jc, - MaxConcurrentReconciles: controllerThreads, - }) - if err != nil { - return err - } - // using onOwnerCreateFunc is easier to set defaults - if err = c.Watch(source.Kind[*kubeflowv1.MPIJob](mgr.GetCache(), &kubeflowv1.MPIJob{}, - &handler.TypedEnqueueRequestForObject[*kubeflowv1.MPIJob]{}, - predicate.TypedFuncs[*kubeflowv1.MPIJob]{CreateFunc: jc.onOwnerCreateFunc()}), - ); err != nil { - return err - } - // inject watching for job related pod - if err = c.Watch(source.Kind[*corev1.Pod](mgr.GetCache(), &corev1.Pod{}, - handler.TypedEnqueueRequestForOwner[*corev1.Pod](mgr.GetScheme(), mgr.GetRESTMapper(), &kubeflowv1.MPIJob{}, handler.OnlyControllerOwner()), - util.OnDependentFuncs[*corev1.Pod](jc.Scheme, jc.Expectations, &jc.JobController))); err != nil { - return err - } - // inject watching for job related ConfigMap - if err = c.Watch(source.Kind[*corev1.ConfigMap](mgr.GetCache(), &corev1.ConfigMap{}, - handler.TypedEnqueueRequestForOwner[*corev1.ConfigMap](mgr.GetScheme(), mgr.GetRESTMapper(), &kubeflowv1.MPIJob{}, handler.OnlyControllerOwner()), - util.OnDependentFuncs[*corev1.ConfigMap](jc.Scheme, jc.Expectations, &jc.JobController))); err != nil { - return err - } - // inject watching for job related Role - if err = c.Watch(source.Kind[*rbacv1.Role](mgr.GetCache(), &rbacv1.Role{}, - handler.TypedEnqueueRequestForOwner[*rbacv1.Role](mgr.GetScheme(), mgr.GetRESTMapper(), &kubeflowv1.MPIJob{}, handler.OnlyControllerOwner()), - util.OnDependentFuncs[*rbacv1.Role](jc.Scheme, jc.Expectations, &jc.JobController))); err != nil { - return err - } - // inject watching for job related RoleBinding - if err = c.Watch(source.Kind[*rbacv1.RoleBinding](mgr.GetCache(), &rbacv1.RoleBinding{}, - handler.TypedEnqueueRequestForOwner[*rbacv1.RoleBinding](mgr.GetScheme(), mgr.GetRESTMapper(), &kubeflowv1.MPIJob{}, handler.OnlyControllerOwner()), - util.OnDependentFuncs[*rbacv1.RoleBinding](jc.Scheme, jc.Expectations, &jc.JobController))); err != nil { - return err - } - // inject watching for job related ServiceAccount - if err = c.Watch(source.Kind[*corev1.ServiceAccount](mgr.GetCache(), &corev1.ServiceAccount{}, - handler.TypedEnqueueRequestForOwner[*corev1.ServiceAccount](mgr.GetScheme(), mgr.GetRESTMapper(), &kubeflowv1.MPIJob{}, handler.OnlyControllerOwner()), - util.OnDependentFuncs[*corev1.ServiceAccount](jc.Scheme, jc.Expectations, &jc.JobController))); err != nil { - return err - } - // skip watching volcano PodGroup if volcano PodGroup is not installed - if _, err = mgr.GetRESTMapper().RESTMapping(schema.GroupKind{Group: v1beta1.GroupName, Kind: "PodGroup"}, - v1beta1.SchemeGroupVersion.Version, - ); err == nil { - // inject watching for job related volcano PodGroup - if err = c.Watch(source.Kind[*v1beta1.PodGroup](mgr.GetCache(), &v1beta1.PodGroup{}, - handler.TypedEnqueueRequestForOwner[*v1beta1.PodGroup](mgr.GetScheme(), mgr.GetRESTMapper(), &kubeflowv1.MPIJob{}, handler.OnlyControllerOwner()), - util.OnDependentFuncs[*v1beta1.PodGroup](jc.Scheme, jc.Expectations, &jc.JobController))); err != nil { - return err - } - } - // skip watching scheduler-plugins PodGroup if scheduler-plugins PodGroup is not installed - if _, err = mgr.GetRESTMapper().RESTMapping( - schema.GroupKind{Group: schedulerpluginsv1alpha1.SchemeGroupVersion.Group, Kind: "PodGroup"}, - schedulerpluginsv1alpha1.SchemeGroupVersion.Version, - ); err == nil { - // inject watching for job related scheduler-plugins PodGroup - if err = c.Watch(source.Kind[*schedulerpluginsv1alpha1.PodGroup](mgr.GetCache(), &schedulerpluginsv1alpha1.PodGroup{}, - handler.TypedEnqueueRequestForOwner[*schedulerpluginsv1alpha1.PodGroup](mgr.GetScheme(), mgr.GetRESTMapper(), &kubeflowv1.MPIJob{}, handler.OnlyControllerOwner()), - util.OnDependentFuncs[*schedulerpluginsv1alpha1.PodGroup](jc.Scheme, jc.Expectations, &jc.JobController))); err != nil { - return err - } - } - - return nil -} - -// ReconcileServices is overridden because mpi-reconciler.v1 does not need to reconcile services -func (jc *MPIJobReconciler) ReconcileServices( - job metav1.Object, - services []*corev1.Service, - rtype kubeflowv1.ReplicaType, - spec *kubeflowv1.ReplicaSpec) error { - return nil -} - -func (jc *MPIJobReconciler) ControllerName() string { - return controllerName -} - -func (jc *MPIJobReconciler) GetAPIGroupVersionKind() schema.GroupVersionKind { - return kubeflowv1.GroupVersion.WithKind(kubeflowv1.MPIJobKind) -} - -func (jc *MPIJobReconciler) GetAPIGroupVersion() schema.GroupVersion { - return kubeflowv1.GroupVersion -} - -func (jc *MPIJobReconciler) GetGroupNameLabelValue() string { - return kubeflowv1.GroupVersion.Group -} - -func (jc *MPIJobReconciler) GetFrameworkName() string { - return kubeflowv1.MPIJobFrameworkName -} - -// SetClusterSpec is overridden because no cluster spec is needed for MPIJob -func (jc *MPIJobReconciler) SetClusterSpec(job interface{}, podTemplate *corev1.PodTemplateSpec, rtype, index string) error { - return nil -} - -func (jc *MPIJobReconciler) GetDefaultContainerName() string { - return kubeflowv1.MPIJobDefaultContainerName -} - -func (jc *MPIJobReconciler) GetDefaultContainerPortName() string { - return kubeflowv1.MPIJobDefaultPortName -} - -func (jc *MPIJobReconciler) IsMasterRole(replicas map[kubeflowv1.ReplicaType]*kubeflowv1.ReplicaSpec, - rtype kubeflowv1.ReplicaType, index int) bool { - return string(rtype) == string(kubeflowv1.MPIJobReplicaTypeLauncher) -} - -func (jc *MPIJobReconciler) GetJobFromInformerCache(namespace, name string) (metav1.Object, error) { - mpijob := &kubeflowv1.MPIJob{} - err := jc.Get(context.Background(), types.NamespacedName{ - Namespace: namespace, Name: name, - }, mpijob) - return mpijob, err -} - -// onOwnerCreateFunc modify creation condition. -func (jc *MPIJobReconciler) onOwnerCreateFunc() func(createEvent event.TypedCreateEvent[*kubeflowv1.MPIJob]) bool { - return func(e event.TypedCreateEvent[*kubeflowv1.MPIJob]) bool { - mpiJob := e.Object - jc.Scheme.Default(mpiJob) - msg := fmt.Sprintf("MPIJob %s is created.", e.Object.GetName()) - logrus.Info(msg) - trainingoperatorcommon.CreatedJobsCounterInc(mpiJob.Namespace, jc.GetFrameworkName()) - commonutil.UpdateJobConditions(&mpiJob.Status, kubeflowv1.JobCreated, corev1.ConditionTrue, commonutil.NewReason(kubeflowv1.MPIJobKind, commonutil.JobCreatedReason), msg) - return true - } -} - -func (jc *MPIJobReconciler) ReconcilePods( - job interface{}, - jobStatus *kubeflowv1.JobStatus, - pods []*corev1.Pod, - rtype kubeflowv1.ReplicaType, - spec *kubeflowv1.ReplicaSpec, - replicas map[kubeflowv1.ReplicaType]*kubeflowv1.ReplicaSpec, -) error { - - mpiJob, ok := job.(*kubeflowv1.MPIJob) - if !ok { - return fmt.Errorf("%v is not a type of MPIJob", mpiJob) - } - - // first set StartTime. - if jobStatus.StartTime == nil { - now := metav1.Now() - jobStatus.StartTime = &now - } - - initializeReplicaStatuses(jobStatus, rtype) - - // Get the launcher Job for this MPIJob. - launcher, err := jc.getLauncherJob(mpiJob) - if err != nil { - return err - } - - var worker []*corev1.Pod - // We're done if the launcher either succeeded or failed. - done := launcher != nil && isPodFinished(launcher) - - if !done { - workerSpec := mpiJob.Spec.MPIReplicaSpecs[kubeflowv1.MPIJobReplicaTypeWorker] - workerReplicas := int32(0) - if workerSpec != nil && workerSpec.Replicas != nil { - workerReplicas = *workerSpec.Replicas - } - isGPULauncher := isGPULauncher(mpiJob) - - // Get the launcher ServiceAccount for this MPIJob. - if sa, err := jc.getOrCreateLauncherServiceAccount(mpiJob); sa == nil || err != nil { - return err - } - - // Get the ConfigMap for this MPIJob. - if config, err := jc.getOrCreateConfigMap(mpiJob, workerReplicas, isGPULauncher); config == nil || err != nil { - return err - } - - // Get the launcher Role for this MPIJob. - if r, err := jc.getOrCreateLauncherRole(mpiJob, workerReplicas); r == nil || err != nil { - return err - } - - // Get the launcher RoleBinding for this MPIJob. - if rb, err := jc.getLauncherRoleBinding(mpiJob); rb == nil || err != nil { - return err - } - - worker, err = jc.getOrCreateWorker(mpiJob) - if err != nil { - return err - } - - if launcher == nil { - launcher, err = jc.KubeClientSet.CoreV1().Pods(mpiJob.Namespace).Create(context.Background(), jc.newLauncher(mpiJob, ctlrconfig.Config.MPIKubectlDeliveryImage, isGPULauncher), metav1.CreateOptions{}) - if err != nil { - jc.Recorder.Eventf(mpiJob, corev1.EventTypeWarning, commonutil.NewReason(kubeflowv1.MPIJobKind, commonutil.JobFailedReason), "launcher pod created failed: %v", err) - return err - } else { - jc.Recorder.Eventf(mpiJob, corev1.EventTypeNormal, commonutil.NewReason(kubeflowv1.MPIJobKind, commonutil.JobRunningReason), "launcher pod created success: %v", launcher.Name) - } - } - } - - // Finally, we update the status block of the MPIJob resource to reflect the - // current state of the world. - err = jc.updateMPIJobStatus(mpiJob, launcher, worker) - if err != nil { - return err - } - return nil -} - -func (jc *MPIJobReconciler) updateMPIJobStatus(mpiJob *kubeflowv1.MPIJob, launcher *corev1.Pod, worker []*corev1.Pod) error { - if launcher != nil { - initializeMPIJobStatuses(mpiJob, kubeflowv1.MPIJobReplicaTypeLauncher) - if isPodSucceeded(launcher) { - mpiJob.Status.ReplicaStatuses[kubeflowv1.MPIJobReplicaTypeLauncher].Succeeded = 1 - msg := fmt.Sprintf("MPIJob %s/%s successfully completed.", mpiJob.Namespace, mpiJob.Name) - jc.Recorder.Event(mpiJob, corev1.EventTypeNormal, commonutil.NewReason(kubeflowv1.MPIJobPlural, commonutil.JobSucceededReason), msg) - if mpiJob.Status.CompletionTime == nil { - now := metav1.Now() - mpiJob.Status.CompletionTime = &now - } - err := updateMPIJobConditions(mpiJob, kubeflowv1.JobSucceeded, commonutil.NewReason(kubeflowv1.MPIJobKind, commonutil.JobSucceededReason), msg) - if err != nil { - return err - } - } else if isPodFailed(launcher) { - mpiJob.Status.ReplicaStatuses[kubeflowv1.MPIJobReplicaTypeLauncher].Failed = 1 - msg := fmt.Sprintf("MPIJob %s/%s has failed", mpiJob.Namespace, mpiJob.Name) - reason := launcher.Status.Reason - if reason == "" { - reason = commonutil.NewReason(kubeflowv1.MPIJobKind, commonutil.JobFailedReason) - } - jc.Recorder.Event(mpiJob, corev1.EventTypeWarning, reason, msg) - if reason == "Evicted" { - reason = mpiJobEvict - } else if !isEvicted(mpiJob.Status) && mpiJob.Status.CompletionTime == nil { - now := metav1.Now() - mpiJob.Status.CompletionTime = &now - } - err := updateMPIJobConditions(mpiJob, kubeflowv1.JobFailed, reason, msg) - if err != nil { - klog.Errorf("Append mpiJob(%s/%s) condition error: %v", mpiJob.Namespace, mpiJob.Name, err) - return err - } - - } else if isPodRunning(launcher) { - mpiJob.Status.ReplicaStatuses[kubeflowv1.MPIJobReplicaTypeLauncher].Active = 1 - } - } - - var ( - running = 0 - evict = 0 - ) - - initializeMPIJobStatuses(mpiJob, kubeflowv1.MPIJobReplicaTypeWorker) - for i := 0; i < len(worker); i++ { - switch worker[i].Status.Phase { - case corev1.PodFailed: - mpiJob.Status.ReplicaStatuses[kubeflowv1.MPIJobReplicaTypeWorker].Failed += 1 - if worker[i].Status.Reason == "Evicted" { - evict += 1 - } - case corev1.PodSucceeded: - mpiJob.Status.ReplicaStatuses[kubeflowv1.MPIJobReplicaTypeWorker].Succeeded += 1 - case corev1.PodRunning: - running += 1 - mpiJob.Status.ReplicaStatuses[kubeflowv1.MPIJobReplicaTypeWorker].Active += 1 - } - } - if evict > 0 { - msg := fmt.Sprintf("%d/%d workers are evicted", evict, len(worker)) - if err := updateMPIJobConditions(mpiJob, kubeflowv1.JobFailed, mpiJobEvict, msg); err != nil { - return err - } - jc.Recorder.Event(mpiJob, corev1.EventTypeWarning, mpiJobEvict, msg) - } - - if launcher != nil && launcher.Status.Phase == corev1.PodRunning && running == len(worker) { - msg := fmt.Sprintf("MPIJob %s/%s is running.", mpiJob.Namespace, mpiJob.Name) - err := updateMPIJobConditions(mpiJob, kubeflowv1.JobRunning, commonutil.NewReason(kubeflowv1.MPIJobKind, commonutil.JobRunningReason), msg) - if err != nil { - return err - } - jc.Recorder.Eventf(mpiJob, corev1.EventTypeNormal, commonutil.NewReason(kubeflowv1.MPIJobKind, commonutil.JobRunningReason), "MPIJob %s/%s is running", mpiJob.Namespace, mpiJob.Name) - } - return nil -} - -func (jc *MPIJobReconciler) GetJobFromAPIClient(namespace, name string) (metav1.Object, error) { - job := &kubeflowv1.MPIJob{} - - err := jc.apiReader.Get(context.Background(), types.NamespacedName{Namespace: namespace, Name: name}, job) - if err != nil { - if apierrors.IsNotFound(err) { - logrus.Error(err, "MPIJob not found", "namespace", namespace, "name", name) - } else { - logrus.Error(err, "failed to get job from api-server", "namespace", namespace, "name", name) - } - return nil, err - } - return job, nil -} - -// GetPodsForJob returns the set of pods that this job should manage. -// It also reconciles ControllerRef by adopting/orphaning. -// Note that the returned Pods are pointers into the cache. -func (jc *MPIJobReconciler) GetPodsForJob(jobObject interface{}) ([]*corev1.Pod, error) { - job, ok := jobObject.(metav1.Object) - if !ok { - return nil, fmt.Errorf("job is not of type metav1.Object") - } - - // Create selector. - selector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{ - MatchLabels: jc.GenLabels(job.GetName()), - }) - - if err != nil { - return nil, fmt.Errorf("couldn't convert Job selector: %v", err) - } - // List all pods to include those that don't match the selector anymore - // but have a ControllerRef pointing to this controller. - podlist := &corev1.PodList{} - err = jc.List(context.Background(), podlist, - client.MatchingLabelsSelector{Selector: selector}, client.InNamespace(job.GetNamespace())) - if err != nil { - return nil, err - } - - return util.JobControlledPodList(podlist.Items, job), nil -} - -func (jc *MPIJobReconciler) DeleteJob(job interface{}) error { - mpiJob, ok := job.(*kubeflowv1.MPIJob) - if !ok { - return fmt.Errorf("%v is not a type of MPIJob", mpiJob) - } - - log := commonutil.LoggerForJob(mpiJob) - if err := jc.Delete(context.Background(), mpiJob); err != nil { - jc.Recorder.Eventf(mpiJob, corev1.EventTypeWarning, FailedDeleteJobReason, "Error deleting: %v", err) - log.Errorf("failed to delete job %s/%s, %v", mpiJob.Namespace, mpiJob.Name, err) - return err - } - - jc.Recorder.Eventf(mpiJob, corev1.EventTypeNormal, SuccessfulDeleteJobReason, "Deleted job: %v", mpiJob.Name) - log.Infof("job %s/%s has been deleted", mpiJob.Namespace, mpiJob.Name) - trainingoperatorcommon.DeletedJobsCounterInc(mpiJob.Namespace, jc.GetFrameworkName()) - return nil -} - -// GetServicesForJob returns the set of services that this job should manage. -// It also reconciles ControllerRef by adopting/orphaning. -// Note that the returned services are pointers into the cache. -func (jc *MPIJobReconciler) GetServicesForJob(jobObject interface{}) ([]*corev1.Service, error) { - return nil, nil -} - -func (jc *MPIJobReconciler) UpdateJobStatus(job interface{}, replicas map[kubeflowv1.ReplicaType]*kubeflowv1.ReplicaSpec, jobStatus *kubeflowv1.JobStatus) error { - mpiJob, ok := job.(*kubeflowv1.MPIJob) - if !ok { - return fmt.Errorf("%+v is not a type of MPIJob", job) - } - - for rtype, spec := range replicas { - status := jobStatus.ReplicaStatuses[rtype] - - succeeded := status.Succeeded - expected := *(spec.Replicas) - succeeded - running := status.Active - failed := status.Failed - - logrus.Infof("MPIJob=%s, ReplicaType=%s expected=%d, running=%d, succeeded=%d , failed=%d", - mpiJob.Name, rtype, expected, running, succeeded, failed) - - if rtype == kubeflowv1.MPIJobReplicaTypeLauncher { - if running > 0 { - msg := fmt.Sprintf("MPIJob %s is running.", mpiJob.Name) - commonutil.UpdateJobConditions(jobStatus, kubeflowv1.JobRunning, corev1.ConditionTrue, commonutil.NewReason(kubeflowv1.MPIJobKind, commonutil.JobRunningReason), msg) - } - // when launcher is succeed, the job is finished. - if expected == 0 { - msg := fmt.Sprintf("MPIJob %s is successfully completed.", mpiJob.Name) - logrus.Info(msg) - jc.Recorder.Event(mpiJob, corev1.EventTypeNormal, commonutil.NewReason(kubeflowv1.MPIJobKind, commonutil.JobSucceededReason), msg) - if jobStatus.CompletionTime == nil { - now := metav1.Now() - jobStatus.CompletionTime = &now - } - commonutil.UpdateJobConditions(jobStatus, kubeflowv1.JobSucceeded, corev1.ConditionTrue, commonutil.NewReason(kubeflowv1.MPIJobKind, commonutil.JobSucceededReason), msg) - trainingoperatorcommon.SuccessfulJobsCounterInc(mpiJob.Namespace, jc.GetFrameworkName()) - return nil - } - } - if failed > 0 { - if spec.RestartPolicy == kubeflowv1.RestartPolicyExitCode { - msg := fmt.Sprintf("MPIJob %s is restarting because %d %s replica(s) failed.", mpiJob.Name, failed, rtype) - jc.Recorder.Event(mpiJob, corev1.EventTypeWarning, commonutil.NewReason(kubeflowv1.MPIJobKind, commonutil.JobRestartingReason), msg) - commonutil.UpdateJobConditions(jobStatus, kubeflowv1.JobRestarting, corev1.ConditionTrue, commonutil.NewReason(kubeflowv1.MPIJobKind, commonutil.JobRestartingReason), msg) - trainingoperatorcommon.RestartedJobsCounterInc(mpiJob.Namespace, jc.GetFrameworkName()) - } else { - msg := fmt.Sprintf("MPIJob %s is failed because %d %s replica(s) failed.", mpiJob.Name, failed, rtype) - jc.Recorder.Event(mpiJob, corev1.EventTypeNormal, commonutil.NewReason(kubeflowv1.MPIJobKind, commonutil.JobFailedReason), msg) - if jobStatus.CompletionTime == nil { - now := metav1.Now() - jobStatus.CompletionTime = &now - } - commonutil.UpdateJobConditions(jobStatus, kubeflowv1.JobFailed, corev1.ConditionTrue, commonutil.NewReason(kubeflowv1.MPIJobKind, commonutil.NewReason(kubeflowv1.MPIJobKind, commonutil.JobFailedReason)), msg) - trainingoperatorcommon.FailedJobsCounterInc(mpiJob.Namespace, jc.GetFrameworkName()) - } - } - } - mpiJob.Status = *jobStatus.DeepCopy() - return nil -} - -func (jc *MPIJobReconciler) UpdateJobStatusInApiServer(job interface{}, jobStatus *kubeflowv1.JobStatus) error { - if jobStatus.ReplicaStatuses == nil { - jobStatus.ReplicaStatuses = map[kubeflowv1.ReplicaType]*kubeflowv1.ReplicaStatus{} - } - - mpiJob, ok := job.(*kubeflowv1.MPIJob) - trainingoperatorcommon.ClearGeneratedFields(&mpiJob.ObjectMeta) - if !ok { - return fmt.Errorf("%v is not a type of MpiJob", mpiJob) - } - - startTime := time.Now() - logger := commonutil.LoggerForJob(mpiJob) - defer func() { - logger.Infof("Finished updating MpiJobs Status %q (%v)", - mpiJob.Name, time.Since(startTime)) - }() - - mpiJob = mpiJob.DeepCopy() - mpiJob.Status = *jobStatus.DeepCopy() - - result := jc.Status().Update(context.Background(), mpiJob) - - if result != nil { - jc.Log.WithValues("mpijob", types.NamespacedName{ - Namespace: mpiJob.GetNamespace(), - Name: mpiJob.GetName(), - }) - return result - } - - return nil -} - -// getLauncherJob gets the launcher Job controlled by this MPIJob. -func (jc *MPIJobReconciler) getLauncherJob(mpiJob *kubeflowv1.MPIJob) (*corev1.Pod, error) { - launcher := &corev1.Pod{} - NamespacedName := types.NamespacedName{Namespace: mpiJob.Namespace, Name: mpiJob.Name + launcherSuffix} - err := jc.Get(context.Background(), NamespacedName, launcher) - if apierrors.IsNotFound(err) { - return nil, nil - } - if err != nil { - // If an error occurs during Get, we'll requeue the item so we can - // attempt processing again later. This could have been caused by a - // temporary network failure, or any other transient reason. - return nil, err - } - - // If the launcher is not controlled by this MPIJob resource, we should log - // a warning to the event recorder and return. - if !metav1.IsControlledBy(launcher, mpiJob) { - msg := fmt.Sprintf(MessageResourceExists, launcher.Name, launcher.Kind) - jc.Recorder.Event(mpiJob, corev1.EventTypeWarning, ErrResourceExists, msg) - return launcher, errors.New(msg) - } - return launcher, nil -} - -// getOrCreateConfigMap gets the ConfigMap controlled by this MPIJob, or creates -// one if it doesn't exist. -func (jc *MPIJobReconciler) getOrCreateConfigMap(mpiJob *kubeflowv1.MPIJob, workerReplicas int32, isGPULauncher bool) (*corev1.ConfigMap, error) { - newCM := newConfigMap(mpiJob, workerReplicas, isGPULauncher) - podList, err := jc.getRunningWorkerPods(mpiJob) - if err != nil { - return nil, err - } - updateDiscoverHostsInConfigMap(newCM, mpiJob, podList, isGPULauncher) - - cm := &corev1.ConfigMap{} - NamespacedName := types.NamespacedName{Namespace: mpiJob.Namespace, Name: mpiJob.Name + configSuffix} - err = jc.Get(context.Background(), NamespacedName, cm) - - // If the ConfigMap doesn't exist, we'll create it. - if apierrors.IsNotFound(err) { - cm, err = jc.KubeClientSet.CoreV1().ConfigMaps(mpiJob.Namespace).Create(context.Background(), newCM, metav1.CreateOptions{}) - } - // If an error occurs during Get/Create, we'll requeue the item so we - // can attempt processing again later. This could have been caused by a - // temporary network failure, or any other transient reason. - if err != nil { - return nil, err - } - - // If the ConfigMap is not controlled by this MPIJob resource, we - // should log a warning to the event recorder and return. - if !metav1.IsControlledBy(cm, mpiJob) { - msg := fmt.Sprintf(MessageResourceExists, cm.Name, cm.Kind) - jc.Recorder.Event(mpiJob, corev1.EventTypeWarning, ErrResourceExists, msg) - return nil, errors.New(msg) - } - - // If the ConfigMap is changed, update it - if !reflect.DeepEqual(cm.Data, newCM.Data) { - cm, err = jc.KubeClientSet.CoreV1().ConfigMaps(mpiJob.Namespace).Update(context.Background(), newCM, metav1.UpdateOptions{}) - if err != nil { - return nil, err - } - } - - return cm, nil -} - -// getOrCreateLauncherServiceAccount gets the launcher ServiceAccount controlled -// by this MPIJob, or creates one if it doesn't exist. -func (jc *MPIJobReconciler) getOrCreateLauncherServiceAccount(mpiJob *kubeflowv1.MPIJob) (*corev1.ServiceAccount, error) { - saName := mpiJob.Spec.MPIReplicaSpecs[kubeflowv1.MPIJobReplicaTypeLauncher].Template.Spec.ServiceAccountName - - if len(saName) == 0 { - saName = mpiJob.Name + launcherSuffix - } - - sa := &corev1.ServiceAccount{} - NamespacedName := types.NamespacedName{Namespace: mpiJob.Namespace, Name: saName} - err := jc.Get(context.Background(), NamespacedName, sa) - - if err == nil { - jc.Recorder.Eventf(mpiJob, corev1.EventTypeNormal, "ServiceAccount is exist", "ServiceAccount: %v", sa.Name) - } - - if apierrors.IsNotFound(err) { - sa, err = jc.KubeClientSet.CoreV1().ServiceAccounts(mpiJob.Namespace).Create(context.Background(), newLauncherServiceAccount(mpiJob), metav1.CreateOptions{}) - } - // If an error occurs during Get/Create, we'll requeue the item so we - // can attempt processing again later. This could have been caused by a - // temporary network failure, or any other transient reason. - if err != nil { - return nil, err - } - - return sa, nil -} - -// getOrCreateLauncherRole gets the launcher Role controlled by this MPIJob. -func (jc *MPIJobReconciler) getOrCreateLauncherRole(mpiJob *kubeflowv1.MPIJob, workerReplicas int32) (*rbacv1.Role, error) { - role := &rbacv1.Role{} - NamespacedName := types.NamespacedName{Namespace: mpiJob.Namespace, Name: mpiJob.Name + launcherSuffix} - err := jc.Get(context.Background(), NamespacedName, role) - - if err == nil { - jc.Recorder.Eventf(mpiJob, corev1.EventTypeNormal, "LauncherRole is exist", "LauncherRole: %v", role.Name) - } - - launcherRole := newLauncherRole(mpiJob, workerReplicas) - // If the Role doesn't exist, we'll create it. - if apierrors.IsNotFound(err) { - role, err = jc.KubeClientSet.RbacV1().Roles(mpiJob.Namespace).Create(context.Background(), launcherRole, metav1.CreateOptions{}) - } - // If an error occurs during Get/Create, we'll requeue the item so we - // can attempt processing again later. This could have been caused by a - // temporary network failure, or any other transient reason. - if err != nil { - return nil, err - } - // If the launcher Role is not controlled by this MPIJob resource, we - // should log a warning to the event recorder and return. - if !metav1.IsControlledBy(role, mpiJob) { - msg := fmt.Sprintf(MessageResourceExists, role.Name, role.Kind) - jc.Recorder.Event(mpiJob, corev1.EventTypeWarning, ErrResourceExists, msg) - return nil, errors.New(msg) - } - - if !reflect.DeepEqual(role.Rules, launcherRole.Rules) { - role, err = jc.KubeClientSet.RbacV1().Roles(mpiJob.Namespace).Update(context.Background(), launcherRole, metav1.UpdateOptions{}) - if err != nil { - return nil, err - } - } - - return role, nil -} - -// getLauncherRoleBinding gets the launcher RoleBinding controlled by this -// MPIJob, or creates one if it doesn't exist. -func (jc *MPIJobReconciler) getLauncherRoleBinding(mpiJob *kubeflowv1.MPIJob) (*rbacv1.RoleBinding, error) { - rb := &rbacv1.RoleBinding{} - NamespacedName := types.NamespacedName{Namespace: mpiJob.Namespace, Name: mpiJob.Name + launcherSuffix} - err := jc.Get(context.Background(), NamespacedName, rb) - // If the RoleBinding doesn't exist, we'll create it. - - if err == nil { - jc.Recorder.Eventf(mpiJob, corev1.EventTypeNormal, "RoleBinding is exist", "RoleBinding: %v", rb.Name) - } - - if apierrors.IsNotFound(err) { - rb, err = jc.KubeClientSet.RbacV1().RoleBindings(mpiJob.Namespace).Create(context.Background(), newLauncherRoleBinding(mpiJob), metav1.CreateOptions{}) - } - // If an error occurs during Get/Create, we'll requeue the item so we - // can attempt processing again later. This could have been caused by a - // temporary network failure, or any other transient reason. - if err != nil { - return nil, err - } - // If the launcher RoleBinding is not controlled by this MPIJob resource, we - // should log a warning to the event recorder and return. - if !metav1.IsControlledBy(rb, mpiJob) { - msg := fmt.Sprintf(MessageResourceExists, rb.Name, rb.Kind) - jc.Recorder.Event(mpiJob, corev1.EventTypeWarning, ErrResourceExists, msg) - return nil, errors.New(msg) - } - - return rb, nil -} - -// getOrCreateWorker gets the worker Pod controlled by this -// MPIJob, or creates one if it doesn't exist. -func (jc *MPIJobReconciler) getOrCreateWorker(mpiJob *kubeflowv1.MPIJob) ([]*corev1.Pod, error) { - var ( - workerPrefix string = mpiJob.Name + workerSuffix - workerPods []*corev1.Pod = []*corev1.Pod{} - i int32 = 0 - workerReplicas *int32 - ) - if worker, ok := mpiJob.Spec.MPIReplicaSpecs[kubeflowv1.MPIJobReplicaTypeWorker]; ok && worker != nil { - workerReplicas = worker.Replicas - } else { - return workerPods, nil - } - - // Remove Pods when replicas are scaled down - genericLabels := jc.GenLabels(mpiJob.GetName()) - selector, err := workerSelector(genericLabels) - if err != nil { - return nil, err - } - - podlist := &corev1.PodList{} - err = jc.List(context.Background(), podlist, client.MatchingLabelsSelector{Selector: selector}, client.InNamespace(mpiJob.GetNamespace())) - - if err != nil { - return nil, err - } - if len(podlist.Items) > int(*workerReplicas) { - for _, pod := range podlist.Items { - indexStr, ok := pod.Labels[kubeflowv1.ReplicaIndexLabel] - if !ok { - return nil, err - } - index, err := strconv.Atoi(indexStr) - if err == nil { - if index >= int(*workerReplicas) { - err = jc.KubeClientSet.CoreV1().Pods(pod.Namespace).Delete(context.Background(), pod.Name, metav1.DeleteOptions{}) - if err != nil { - return nil, err - } - } - } - } - } - - for ; i < *workerReplicas; i++ { - name := fmt.Sprintf("%s-%d", workerPrefix, i) - - pod := &corev1.Pod{} - NamespacedName := types.NamespacedName{Namespace: mpiJob.Namespace, Name: name} - err := jc.Get(context.Background(), NamespacedName, pod) - - // If the worker Pod doesn't exist, we'll create it. - if apierrors.IsNotFound(err) { - worker := jc.newWorker(mpiJob, name) - if worker == nil { - msg := fmt.Sprintf(MessageResourceDoesNotExist, "Worker") - jc.Recorder.Event(mpiJob, corev1.EventTypeWarning, ErrResourceDoesNotExist, msg) - return nil, errors.New(msg) - } - // Insert ReplicaIndexLabel - worker.Labels[kubeflowv1.ReplicaIndexLabel] = strconv.Itoa(int(i)) - pod, err = jc.KubeClientSet.CoreV1().Pods(mpiJob.Namespace).Create(context.Background(), worker, metav1.CreateOptions{}) - if err == nil { - jc.Recorder.Eventf(mpiJob, corev1.EventTypeNormal, "SuccessfulCreatePod", "Created worker pod: %v", pod.Name) - } else { - jc.Recorder.Eventf(mpiJob, corev1.EventTypeWarning, "FailedCreatePod", "Created worker pod: %v", pod.Name) - } - } - - // If an error occurs during Get/Create, we'll requeue the item so we - // can attempt processing again later. This could have been caused by a - // temporary network failure, or any other transient reason. - if err != nil && !apierrors.IsNotFound(err) { - jc.Recorder.Eventf(mpiJob, corev1.EventTypeWarning, commonutil.NewReason(kubeflowv1.MPIJobKind, commonutil.JobFailedReason), - "worker pod created failed: %v", err) - return nil, err - } - // If the worker is not controlled by this MPIJob resource, we should log - // a warning to the event recorder and return. - if pod != nil && !metav1.IsControlledBy(pod, mpiJob) { - msg := fmt.Sprintf(MessageResourceExists, pod.Name, pod.Kind) - jc.Recorder.Event(mpiJob, corev1.EventTypeWarning, ErrResourceExists, msg) - return nil, errors.New(msg) - } - workerPods = append(workerPods, pod) - } - - return workerPods, nil -} - -// newWorker creates a new worker Pod for an MPIJob resource. It also -// sets the appropriate OwnerReferences on the resource so handleObject can -// discover the MPIJob resource that 'owns' it. -func (jc *MPIJobReconciler) newWorker(mpiJob *kubeflowv1.MPIJob, name string) *corev1.Pod { - genericLabels := jc.GenLabels(mpiJob.GetName()) - labels := defaultWorkerLabels(genericLabels) - - podSpec := mpiJob.Spec.MPIReplicaSpecs[kubeflowv1.MPIJobReplicaTypeWorker].Template.DeepCopy() - - // keep the labels which are set in PodTemplate - if len(podSpec.Labels) == 0 { - podSpec.Labels = make(map[string]string) - } - - for key, value := range labels { - podSpec.Labels[key] = value - } - setRestartPolicy(podSpec, mpiJob.Spec.MPIReplicaSpecs[kubeflowv1.MPIJobReplicaTypeWorker]) - logger := commonutil.LoggerForReplica(mpiJob, strings.ToLower(string(kubeflowv1.MPIJobReplicaTypeLauncher))) - if len(podSpec.Spec.Containers) == 0 { - klog.Errorln("Worker pod does not have any containers in its spec") - return nil - } - container := podSpec.Spec.Containers[0] - if len(container.Command) == 0 { - container.Command = []string{"sleep"} - container.Args = []string{"365d"} - } - - // We need the kubexec.sh script here because Open MPI checks for the path - // in every rank. - container.VolumeMounts = append(container.VolumeMounts, corev1.VolumeMount{ - Name: configVolumeName, - MountPath: configMountPath, - }) - podSpec.Spec.Containers[0] = container - - scriptMode := int32(0555) - podSpec.Spec.Volumes = append(podSpec.Spec.Volumes, corev1.Volume{ - Name: configVolumeName, - VolumeSource: corev1.VolumeSource{ - ConfigMap: &corev1.ConfigMapVolumeSource{ - LocalObjectReference: corev1.LocalObjectReference{ - Name: mpiJob.Name + configSuffix, - }, - Items: []corev1.KeyToPath{ - { - Key: kubexecScriptName, - Path: kubexecScriptName, - Mode: &scriptMode, - }, - }, - }, - }, - }) - - // if gang-scheduling is enabled: - // 1. if user has specified other scheduler, we report a warning without overriding any fields. - // 2. if no SchedulerName is set for pods, then we set the SchedulerName to "volcano". - if jc.Config.EnableGangScheduling() { - if !util.IsGangSchedulerSet(mpiJob.Spec.MPIReplicaSpecs, jc.PodGroupControl.GetSchedulerName()) { - errMsg := "Another scheduler is specified when gang-scheduling is enabled and it will not be overwritten" - logger.Warning(errMsg) - jc.Recorder.Event(mpiJob, corev1.EventTypeWarning, podTemplateSchedulerNameReason, errMsg) - } - - rtWorker := strings.ToLower(string(kubeflowv1.MPIJobReplicaTypeWorker)) - jc.PodGroupControl.DecoratePodTemplateSpec(podSpec, mpiJob, rtWorker) - } - - return &corev1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: name, - Namespace: mpiJob.Namespace, - Labels: podSpec.Labels, - Annotations: podSpec.Annotations, - OwnerReferences: []metav1.OwnerReference{ - *metav1.NewControllerRef(mpiJob, kubeflowv1.MPIJobSchemeGroupVersionKind), - }, - }, - Spec: podSpec.Spec, - } -} - -// newLauncher creates a new launcher Job for an MPIJob resource. It also sets -// the appropriate OwnerReferences on the resource so handleObject can discover -// the MPIJob resource that 'owns' it. -func (jc *MPIJobReconciler) newLauncher(mpiJob *kubeflowv1.MPIJob, kubectlDeliveryImage string, isGPULauncher bool) *corev1.Pod { - launcherName := mpiJob.Name + launcherSuffix - - genericLabels := jc.GenLabels(mpiJob.GetName()) - labels := defaultLauncherLabels(genericLabels) - - masterRole := jc.IsMasterRole(mpiJob.Spec.MPIReplicaSpecs, kubeflowv1.MPIJobReplicaTypeLauncher, 0) - if masterRole { - labels[kubeflowv1.JobRoleLabel] = "master" - } - podSpec := mpiJob.Spec.MPIReplicaSpecs[kubeflowv1.MPIJobReplicaTypeLauncher].Template.DeepCopy() - // copy the labels and annotations to pod from PodTemplate - if len(podSpec.Labels) == 0 { - podSpec.Labels = make(map[string]string) - } - for key, value := range labels { - podSpec.Labels[key] = value - } - - logger := commonutil.LoggerForReplica(mpiJob, strings.ToLower(string(kubeflowv1.MPIJobReplicaTypeLauncher))) - // add SchedulerName to podSpec - if jc.Config.EnableGangScheduling() { - if !util.IsGangSchedulerSet(mpiJob.Spec.MPIReplicaSpecs, jc.PodGroupControl.GetSchedulerName()) { - errMsg := "Another scheduler is specified when gang-scheduling is enabled and it will not be overwritten" - logger.Warning(errMsg) - jc.Recorder.Event(mpiJob, corev1.EventTypeWarning, podTemplateSchedulerNameReason, errMsg) - } - - rt := strings.ToLower(string(kubeflowv1.MPIJobReplicaTypeLauncher)) - jc.PodGroupControl.DecoratePodTemplateSpec(podSpec, mpiJob, rt) - } - - if len(mpiJob.Spec.MPIReplicaSpecs[kubeflowv1.MPIJobReplicaTypeLauncher].Template.Spec.ServiceAccountName) == 0 { - podSpec.Spec.ServiceAccountName = launcherName - } - - podSpec.Spec.InitContainers = append(podSpec.Spec.InitContainers, corev1.Container{ - Name: kubectlDeliveryName, - Image: kubectlDeliveryImage, - ImagePullPolicy: corev1.PullIfNotPresent, - Env: []corev1.EnvVar{ - { - Name: kubectlTargetDirEnv, - Value: kubectlMountPath, - }, - { - Name: "NAMESPACE", - Value: mpiJob.Namespace, - }, - }, - VolumeMounts: []corev1.VolumeMount{ - { - Name: kubectlVolumeName, - MountPath: kubectlMountPath, - }, - { - Name: configVolumeName, - MountPath: configMountPath, - }, - }, - Resources: corev1.ResourceRequirements{ - Limits: corev1.ResourceList{ - corev1.ResourceCPU: resource.MustParse(initContainerCpu), - corev1.ResourceMemory: resource.MustParse(initContainerMem), - corev1.ResourceEphemeralStorage: resource.MustParse(initContainerEphStorage), - }, - Requests: corev1.ResourceList{ - corev1.ResourceCPU: resource.MustParse(initContainerCpu), - corev1.ResourceMemory: resource.MustParse(initContainerMem), - corev1.ResourceEphemeralStorage: resource.MustParse(initContainerEphStorage), - }, - }, - }) - if len(podSpec.Spec.Containers) == 0 { - klog.Errorln("Launcher pod does not have any containers in its spec") - msg := fmt.Sprintf(MessageResourceDoesNotExist, "Launcher") - jc.Recorder.Event(mpiJob, corev1.EventTypeWarning, ErrResourceDoesNotExist, msg) - return nil - } - container := podSpec.Spec.Containers[0] - container.Env = append(container.Env, - corev1.EnvVar{ - Name: "OMPI_MCA_plm_rsh_agent", - Value: fmt.Sprintf("%s/%s", configMountPath, kubexecScriptName), - }, - corev1.EnvVar{ - Name: "OMPI_MCA_orte_default_hostfile", - Value: fmt.Sprintf("%s/%s", configMountPath, hostfileName), - }, - ) - - if !isGPULauncher { - container.Env = append(container.Env, - // We overwrite these environment variables so that users will not - // be mistakenly using GPU resources for launcher due to potential - // issues with scheduler/container technologies. - corev1.EnvVar{ - Name: "NVIDIA_VISIBLE_DEVICES", - Value: "", - }, - corev1.EnvVar{ - Name: "NVIDIA_DRIVER_CAPABILITIES", - Value: "", - }) - } - - // Add default Intel MPI bootstrap variables if not provided by the user. - bootstrap, exec := hasIntelMPIBootstrapValues(container.Env) - if !bootstrap { - container.Env = append(container.Env, - corev1.EnvVar{ - Name: "I_MPI_HYDRA_BOOTSTRAP", - Value: iMPIDefaultBootstrap, - }, - ) - } - if !exec { - container.Env = append(container.Env, - corev1.EnvVar{ - Name: "I_MPI_HYDRA_BOOTSTRAP_EXEC", - Value: fmt.Sprintf("%s/%s", configMountPath, kubexecScriptName), - }, - ) - } - - container.VolumeMounts = append(container.VolumeMounts, - corev1.VolumeMount{ - Name: kubectlVolumeName, - MountPath: kubectlMountPath, - }, - corev1.VolumeMount{ - Name: configVolumeName, - MountPath: configMountPath, - }) - podSpec.Spec.Containers[0] = container - - // Submit a warning event if the user specifies restart policy for - // the pod template. We recommend to set it from the replica level. - if podSpec.Spec.RestartPolicy != corev1.RestartPolicy("") { - errMsg := "Restart policy in pod template will be overwritten by restart policy in replica spec" - klog.Warning(errMsg) - jc.Recorder.Event(mpiJob, corev1.EventTypeWarning, podTemplateRestartPolicyReason, errMsg) - } - setRestartPolicy(podSpec, mpiJob.Spec.MPIReplicaSpecs[kubeflowv1.MPIJobReplicaTypeLauncher]) - - scriptsMode := int32(0555) - hostfileMode := int32(0444) - podSpec.Spec.Volumes = append(podSpec.Spec.Volumes, - corev1.Volume{ - Name: kubectlVolumeName, - VolumeSource: corev1.VolumeSource{ - EmptyDir: &corev1.EmptyDirVolumeSource{}, - }, - }, - corev1.Volume{ - Name: configVolumeName, - VolumeSource: corev1.VolumeSource{ - ConfigMap: &corev1.ConfigMapVolumeSource{ - LocalObjectReference: corev1.LocalObjectReference{ - Name: mpiJob.Name + configSuffix, - }, - Items: []corev1.KeyToPath{ - { - Key: kubexecScriptName, - Path: kubexecScriptName, - Mode: &scriptsMode, - }, - { - Key: hostfileName, - Path: hostfileName, - Mode: &hostfileMode, - }, - { - Key: discoverHostsScriptName, - Path: discoverHostsScriptName, - Mode: &scriptsMode, - }, - }, - }, - }, - }) - return &corev1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: launcherName, - Namespace: mpiJob.Namespace, - Labels: podSpec.Labels, - Annotations: podSpec.Annotations, - OwnerReferences: []metav1.OwnerReference{ - *metav1.NewControllerRef(mpiJob, kubeflowv1.MPIJobSchemeGroupVersionKind), - }, - }, - Spec: podSpec.Spec, - } -} - -// getRunningWorkerPods get all worker Pods with Running phase controlled by this MPIJob. -func (jc *MPIJobReconciler) getRunningWorkerPods(mpiJob *kubeflowv1.MPIJob) ([]*corev1.Pod, error) { - genericLabels := jc.GenLabels(mpiJob.GetName()) - selector, err := workerSelector(genericLabels) - if err != nil { - return nil, err - } - - podFullList := &corev1.PodList{} - err = jc.List(context.Background(), podFullList, client.MatchingLabelsSelector{Selector: selector}, client.InNamespace(mpiJob.GetNamespace())) - // podFullList, err := r.PodLister.List(selector) - if err != nil { - return nil, err - } - // Only running Pods should be included within the `discover_hosts.sh` script. - var podList []corev1.Pod - for idx, pod := range podFullList.Items { - if pod.Status.Phase == corev1.PodRunning { - podList = append(podList, podFullList.Items[idx]) - } - } - return util.JobControlledPodList(podList, mpiJob), nil -} - -// newConfigMap creates a new ConfigMap containing configurations for an MPIJob -// resource. It also sets the appropriate OwnerReferences on the resource so -// handleObject can discover the MPIJob resource that 'owns' it. -func newConfigMap(mpiJob *kubeflowv1.MPIJob, workerReplicas int32, isGPULauncher bool) *corev1.ConfigMap { - kubexec := fmt.Sprintf(`#!/bin/sh -set -x -POD_NAME=$1 -shift -%s/kubectl exec ${POD_NAME}`, kubectlMountPath) - if len(mpiJob.Spec.MainContainer) > 0 { - kubexec = fmt.Sprintf("%s --container %s", kubexec, mpiJob.Spec.MainContainer) - } - kubexec = fmt.Sprintf("%s -- /bin/sh -c \"$*\"", kubexec) - - // If no processing unit is specified, default to 1 slot. - slots := 1 - if mpiJob.Spec.SlotsPerWorker != nil { - slots = int(*mpiJob.Spec.SlotsPerWorker) - } - var buffer bytes.Buffer - if isGPULauncher { - buffer.WriteString(fmt.Sprintf("%s%s slots=%d\n", mpiJob.Name, launcherSuffix, slots)) - } - for i := 0; i < int(workerReplicas); i++ { - buffer.WriteString(fmt.Sprintf("%s%s-%d slots=%d\n", mpiJob.Name, workerSuffix, i, slots)) - } - - return &corev1.ConfigMap{ - ObjectMeta: metav1.ObjectMeta{ - Name: mpiJob.Name + configSuffix, - Namespace: mpiJob.Namespace, - Labels: map[string]string{ - "app": mpiJob.Name, - }, - OwnerReferences: []metav1.OwnerReference{ - *metav1.NewControllerRef(mpiJob, kubeflowv1.MPIJobSchemeGroupVersionKind), - }, - }, - Data: map[string]string{ - hostfileName: buffer.String(), - kubexecScriptName: kubexec, - }, - } -} - -// updateDiscoverHostsInConfigMap updates the ConfigMap if the content of `discover_hosts.sh` changes. -func updateDiscoverHostsInConfigMap(configMap *corev1.ConfigMap, mpiJob *kubeflowv1.MPIJob, runningPods []*corev1.Pod, isGPULauncher bool) { - slots := 1 - if mpiJob.Spec.SlotsPerWorker != nil { - slots = int(*mpiJob.Spec.SlotsPerWorker) - } - - // Sort the slice of Pods to make sure the order of entries in `discover_hosts.sh` is maintained. - sort.Slice(runningPods, func(i, j int) bool { - return runningPods[i].Name < runningPods[j].Name - }) - - discoverHosts := "#!/bin/sh" - if isGPULauncher { - discoverHosts = fmt.Sprintf("%s\necho %s%s:%d\n", discoverHosts, mpiJob.Name, launcherSuffix, slots) - } - for _, p := range runningPods { - discoverHosts = fmt.Sprintf("%s\necho %s:%d", discoverHosts, p.Name, slots) - } - - oldDiscoverHosts, exist := configMap.Data[discoverHostsScriptName] - if exist { - if oldDiscoverHosts == discoverHosts { - return - } - } - configMap.Data[discoverHostsScriptName] = discoverHosts -} - -// newLauncherServiceAccount creates a new launcher ServiceAccount for an MPIJob -// resource. It also sets the appropriate OwnerReferences on the resource so -// handleObject can discover the MPIJob resource that 'owns' it. -func newLauncherServiceAccount(mpiJob *kubeflowv1.MPIJob) *corev1.ServiceAccount { - launcherName := mpiJob.Name + launcherSuffix - - if len(mpiJob.Spec.MPIReplicaSpecs[kubeflowv1.MPIJobReplicaTypeLauncher].Template.Spec.ServiceAccountName) > 0 { - launcherName = mpiJob.Spec.MPIReplicaSpecs[kubeflowv1.MPIJobReplicaTypeLauncher].Template.Spec.ServiceAccountName - } - - return &corev1.ServiceAccount{ - ObjectMeta: metav1.ObjectMeta{ - Name: launcherName, - Namespace: mpiJob.Namespace, - Labels: map[string]string{ - "app": mpiJob.Name, - }, - OwnerReferences: []metav1.OwnerReference{ - *metav1.NewControllerRef(mpiJob, kubeflowv1.MPIJobSchemeGroupVersionKind), - }, - }, - } -} - -// newLauncherRole creates a new launcher Role for an MPIJob resource. It also -// sets the appropriate OwnerReferences on the resource so handleObject can -// discover the MPIJob resource that 'owns' it. -func newLauncherRole(mpiJob *kubeflowv1.MPIJob, workerReplicas int32) *rbacv1.Role { - var podNames []string - for i := 0; i < int(workerReplicas); i++ { - podNames = append(podNames, fmt.Sprintf("%s%s-%d", mpiJob.Name, workerSuffix, i)) - } - return &rbacv1.Role{ - ObjectMeta: metav1.ObjectMeta{ - Name: mpiJob.Name + launcherSuffix, - Namespace: mpiJob.Namespace, - Labels: map[string]string{ - "app": mpiJob.Name, - }, - OwnerReferences: []metav1.OwnerReference{ - *metav1.NewControllerRef(mpiJob, kubeflowv1.MPIJobSchemeGroupVersionKind), - }, - }, - Rules: []rbacv1.PolicyRule{ - { - Verbs: []string{"get", "list", "watch"}, - APIGroups: []string{""}, - Resources: []string{"pods"}, - }, - { - Verbs: []string{"create"}, - APIGroups: []string{""}, - Resources: []string{"pods/exec"}, - ResourceNames: podNames, - }, - }, - } -} - -// newLauncherRoleBinding creates a new launcher RoleBinding for an MPIJob -// resource. It also sets the appropriate OwnerReferences on the resource so -// handleObject can discover the MPIJob resource that 'owns' it. -func newLauncherRoleBinding(mpiJob *kubeflowv1.MPIJob) *rbacv1.RoleBinding { - launcherName := mpiJob.Name + launcherSuffix - saName := launcherName - - if len(mpiJob.Spec.MPIReplicaSpecs[kubeflowv1.MPIJobReplicaTypeLauncher].Template.Spec.ServiceAccountName) > 0 { - saName = mpiJob.Spec.MPIReplicaSpecs[kubeflowv1.MPIJobReplicaTypeLauncher].Template.Spec.ServiceAccountName - } - - return &rbacv1.RoleBinding{ - ObjectMeta: metav1.ObjectMeta{ - Name: launcherName, - Namespace: mpiJob.Namespace, - Labels: map[string]string{ - "app": mpiJob.Name, - }, - OwnerReferences: []metav1.OwnerReference{ - *metav1.NewControllerRef(mpiJob, kubeflowv1.MPIJobSchemeGroupVersionKind), - }, - }, - Subjects: []rbacv1.Subject{ - { - Kind: rbacv1.ServiceAccountKind, - Name: saName, - Namespace: mpiJob.Namespace, - }, - }, - RoleRef: rbacv1.RoleRef{ - APIGroup: rbacv1.GroupName, - Kind: "Role", - Name: launcherName, - }, - } -} - -func setRestartPolicy(podTemplateSpec *corev1.PodTemplateSpec, spec *kubeflowv1.ReplicaSpec) { - if spec.RestartPolicy == kubeflowv1.RestartPolicyExitCode { - podTemplateSpec.Spec.RestartPolicy = corev1.RestartPolicyNever - } else { - podTemplateSpec.Spec.RestartPolicy = corev1.RestartPolicy(spec.RestartPolicy) - } -} diff --git a/pkg/controller.v1/mpi/mpijob_controller_test.go b/pkg/controller.v1/mpi/mpijob_controller_test.go deleted file mode 100644 index 024cbaaec1..0000000000 --- a/pkg/controller.v1/mpi/mpijob_controller_test.go +++ /dev/null @@ -1,1150 +0,0 @@ -// Copyright 2021 The Kubeflow Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package mpi - -import ( - "context" - "fmt" - "strings" - - common "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" - corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/errors" - "k8s.io/apimachinery/pkg/api/resource" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/types" - "k8s.io/utils/ptr" - ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/client" - - kubeflowv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - commonutil "github.com/kubeflow/training-operator/pkg/util" - "github.com/kubeflow/training-operator/pkg/util/testutil" -) - -const ( - gpuResourceName = "nvidia.com/gpu" - extendedGPUResourceName = "vendor-domain/gpu" -) - -func newMPIJobCommon(name string, startTime, completionTime *metav1.Time) *kubeflowv1.MPIJob { - mpiJob := &kubeflowv1.MPIJob{ - TypeMeta: metav1.TypeMeta{APIVersion: kubeflowv1.SchemeGroupVersion.String()}, - ObjectMeta: metav1.ObjectMeta{ - Name: name, - Namespace: metav1.NamespaceDefault, - }, - Spec: kubeflowv1.MPIJobSpec{ - RunPolicy: common.RunPolicy{ - CleanPodPolicy: kubeflowv1.CleanPodPolicyPointer(kubeflowv1.CleanPodPolicyAll), - }, - MPIReplicaSpecs: map[common.ReplicaType]*common.ReplicaSpec{ - kubeflowv1.MPIJobReplicaTypeWorker: { - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: "foo", - Image: "bar", - }, - }, - }, - }, - }, - kubeflowv1.MPIJobReplicaTypeLauncher: { - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: "foo", - Image: "bar", - }, - }, - }, - }, - }, - }, - }, - Status: common.JobStatus{}, - } - - if startTime != nil { - mpiJob.Status.StartTime = startTime - } - if completionTime != nil { - mpiJob.Status.CompletionTime = completionTime - } - - return mpiJob -} - -func newMPIJobOld(name string, replicas *int32, pusPerReplica int64, resourceName string, startTime, completionTime *metav1.Time) *kubeflowv1.MPIJob { - mpiJob := newMPIJobCommon(name, startTime, completionTime) - - mpiJob.Spec.MPIReplicaSpecs[kubeflowv1.MPIJobReplicaTypeWorker].Replicas = replicas - - workerContainers := mpiJob.Spec.MPIReplicaSpecs[kubeflowv1.MPIJobReplicaTypeWorker].Template.Spec.Containers - for i := range workerContainers { - container := &workerContainers[i] - container.Resources = corev1.ResourceRequirements{ - Limits: corev1.ResourceList{ - corev1.ResourceName(resourceName): *resource.NewQuantity(pusPerReplica, resource.DecimalExponent), - }, - } - } - - return mpiJob -} - -var newMPIJob = newMPIJobWithLauncher - -func newMPIJobWithLauncher(name string, replicas *int32, pusPerReplica int64, resourceName string, startTime, completionTime *metav1.Time) *kubeflowv1.MPIJob { - mpiJob := newMPIJobOld(name, replicas, pusPerReplica, resourceName, startTime, completionTime) - - mpiJob.Spec.MPIReplicaSpecs[kubeflowv1.MPIJobReplicaTypeLauncher].Replicas = ptr.To[int32](1) - - launcherContainers := mpiJob.Spec.MPIReplicaSpecs[kubeflowv1.MPIJobReplicaTypeLauncher].Template.Spec.Containers - for i := range launcherContainers { - container := &launcherContainers[i] - container.Resources = corev1.ResourceRequirements{ - Limits: corev1.ResourceList{ - corev1.ResourceName(resourceName): *resource.NewQuantity(pusPerReplica, resource.DecimalExponent), - }, - } - } - - return mpiJob -} - -var _ = Describe("MPIJob controller", func() { - Context("Test launcher is GPU launcher", func() { - It("Should pass GPU Launcher verification", func() { - By("By creating MPIJobs with various resource configuration") - - testCases := map[string]struct { - gpu string - expected bool - }{ - "isNvidiaGPU": { - gpu: gpuResourceName, - expected: true, - }, - "isExtendedGPU": { - gpu: extendedGPUResourceName, - expected: true, - }, - "notGPU": { - gpu: "vendor-domain/resourcetype", - expected: false, - }, - } - - startTime := metav1.Now() - completionTime := metav1.Now() - - for testName, testCase := range testCases { - mpiJob := newMPIJobWithLauncher("test-"+strings.ToLower(testName), - ptr.To[int32](64), 1, testCase.gpu, &startTime, &completionTime) - Expect(isGPULauncher(mpiJob) == testCase.expected).To(BeTrue()) - } - }) - }) - - Context("Test MPIJob with succeeded launcher Pod", func() { - It("Should contains desired launcher ReplicaStatus", func() { - By("By marking a launcher pod with Phase Succeeded") - ctx := context.Background() - startTime := metav1.Now() - completionTime := metav1.Now() - - jobName := "test-launcher-succeeded" - - mpiJob := newMPIJobWithLauncher(jobName, ptr.To[int32](64), 1, gpuResourceName, &startTime, &completionTime) - Expect(testK8sClient.Create(ctx, mpiJob)).Should(Succeed()) - - launcher := reconciler.newLauncher(mpiJob, "kubectl-delivery", isGPULauncher(mpiJob)) - launcher.Status.Phase = corev1.PodSucceeded - - launcherKey := types.NamespacedName{ - Namespace: metav1.NamespaceDefault, - Name: launcher.GetName(), - } - Eventually(func() error { - launcherCreated := &corev1.Pod{} - if err := testK8sClient.Get(ctx, launcherKey, launcherCreated); err != nil { - return err - } - launcherCreated.Status.Phase = corev1.PodSucceeded - return testK8sClient.Status().Update(ctx, launcherCreated) - }, testutil.Timeout, testutil.Interval).Should(BeNil()) - - created := &kubeflowv1.MPIJob{} - launcherStatus := &common.ReplicaStatus{ - Active: 0, - Succeeded: 1, - Failed: 0, - } - Eventually(func() bool { - err := testK8sClient.Get(ctx, types.NamespacedName{Namespace: metav1.NamespaceDefault, Name: jobName}, created) - if err != nil { - return false - } - return ReplicaStatusMatch(created.Status.ReplicaStatuses, kubeflowv1.MPIJobReplicaTypeLauncher, launcherStatus) - }, testutil.Timeout, testutil.Interval).Should(BeTrue()) - }) - }) - - Context("Test MPIJob with failed launcher Pod", func() { - It("Should contains desired launcher ReplicaStatus", func() { - By("By marking a launcher pod with Phase Failed") - ctx := context.Background() - startTime := metav1.Now() - completionTime := metav1.Now() - - jobName := "test-launcher-failed" - - mpiJob := newMPIJobWithLauncher(jobName, ptr.To[int32](64), 1, gpuResourceName, &startTime, &completionTime) - Expect(testK8sClient.Create(ctx, mpiJob)).Should(Succeed()) - - launcher := reconciler.newLauncher(mpiJob, "kubectl-delivery", isGPULauncher(mpiJob)) - launcherKey := types.NamespacedName{ - Namespace: metav1.NamespaceDefault, - Name: launcher.GetName(), - } - Eventually(func() error { - launcherCreated := &corev1.Pod{} - if err := testK8sClient.Get(ctx, launcherKey, launcherCreated); err != nil { - return err - } - launcherCreated.Status.Phase = corev1.PodFailed - return testK8sClient.Status().Update(ctx, launcherCreated) - }, testutil.Timeout, testutil.Interval).Should(BeNil()) - - launcherStatus := &common.ReplicaStatus{ - Active: 0, - Succeeded: 0, - Failed: 1, - } - created := &kubeflowv1.MPIJob{} - Eventually(func() bool { - err := testK8sClient.Get(ctx, types.NamespacedName{Namespace: metav1.NamespaceDefault, Name: jobName}, created) - if err != nil { - return false - } - return ReplicaStatusMatch(created.Status.ReplicaStatuses, kubeflowv1.MPIJobReplicaTypeLauncher, launcherStatus) - }, testutil.Timeout, testutil.Interval).Should(BeTrue()) - }) - }) - - Context("Test MPIJob with succeeded launcher pod", func() { - It("Should contain desired ReplicaStatuses for worker", func() { - By("By marking the launcher Pod as Succeeded") - ctx := context.Background() - startTime := metav1.Now() - completionTime := metav1.Now() - - jobName := "test-launcher-succeeded2" - - mpiJob := newMPIJobWithLauncher(jobName, ptr.To[int32](64), 1, gpuResourceName, &startTime, &completionTime) - Expect(testK8sClient.Create(ctx, mpiJob)).Should(Succeed()) - - launcher := reconciler.newLauncher(mpiJob, "kubectl-delivery", isGPULauncher(mpiJob)) - launcher.Status.Phase = corev1.PodSucceeded - - launcherKey := types.NamespacedName{ - Namespace: metav1.NamespaceDefault, - Name: launcher.GetName(), - } - Eventually(func() error { - launcherCreated := &corev1.Pod{} - if err := testK8sClient.Get(ctx, launcherKey, launcherCreated); err != nil { - return err - } - launcherCreated.Status.Phase = corev1.PodSucceeded - return testK8sClient.Status().Update(ctx, launcherCreated) - }, testutil.Timeout, testutil.Interval).Should(BeNil()) - - created := &kubeflowv1.MPIJob{} - launcherStatus := &common.ReplicaStatus{ - Active: 0, - Succeeded: 0, - Failed: 0, - } - Eventually(func() bool { - err := testK8sClient.Get(ctx, types.NamespacedName{Namespace: metav1.NamespaceDefault, Name: jobName}, created) - if err != nil { - return false - } - return ReplicaStatusMatch(created.Status.ReplicaStatuses, kubeflowv1.MPIJobReplicaTypeWorker, launcherStatus) - }, testutil.Timeout, testutil.Interval).Should(BeTrue()) - }) - }) - - Context("Test MPIJob with Running launcher Pod and Pending worker Pods", func() { - It("Should contain desired ReplicaStatuses", func() { - By("By marking an active launcher pod and pending worker pods") - - ctx := context.Background() - startTime := metav1.Now() - completionTime := metav1.Now() - - jobName := "test-launcher-running-worker-pending" - - var replicas int32 = 8 - mpiJob := newMPIJobWithLauncher(jobName, &replicas, 1, gpuResourceName, &startTime, &completionTime) - Expect(testK8sClient.Create(ctx, mpiJob)).Should(Succeed()) - - launcher := reconciler.newLauncher(mpiJob, "kubectl-delivery", isGPULauncher(mpiJob)) - launcherKey := types.NamespacedName{ - Namespace: metav1.NamespaceDefault, - Name: launcher.GetName(), - } - Eventually(func() error { - launcherCreated := &corev1.Pod{} - if err := testK8sClient.Get(ctx, launcherKey, launcherCreated); err != nil { - return err - } - launcherCreated.Status.Phase = corev1.PodRunning - return testK8sClient.Status().Update(ctx, launcherCreated) - }, testutil.Timeout, testutil.Interval).Should(BeNil()) - - for i := 0; i < int(replicas); i++ { - name := fmt.Sprintf("%s-%d", mpiJob.Name+workerSuffix, i) - worker := reconciler.newWorker(mpiJob, name) - workerKey := types.NamespacedName{ - Namespace: metav1.NamespaceDefault, - Name: worker.GetName(), - } - Eventually(func() error { - workerCreated := &corev1.Pod{} - if err := testK8sClient.Get(ctx, workerKey, workerCreated); err != nil { - return err - } - workerCreated.Status.Phase = corev1.PodPending - return testK8sClient.Status().Update(ctx, workerCreated) - }, testutil.Timeout, testutil.Interval).Should(BeNil()) - } - - key := types.NamespacedName{ - Namespace: metav1.NamespaceDefault, - Name: jobName, - } - launcherStatus := &common.ReplicaStatus{ - Active: 1, - Succeeded: 0, - Failed: 0, - } - workerStatus := &common.ReplicaStatus{ - Active: 0, - Succeeded: 0, - Failed: 0, - } - Eventually(func() bool { - created := &kubeflowv1.MPIJob{} - err := testK8sClient.Get(ctx, key, created) - if err != nil { - return false - } - return ReplicaStatusMatch(created.Status.ReplicaStatuses, kubeflowv1.MPIJobReplicaTypeLauncher, - launcherStatus) && ReplicaStatusMatch(created.Status.ReplicaStatuses, kubeflowv1.MPIJobReplicaTypeWorker, - workerStatus) - }, testutil.Timeout, testutil.Interval).Should(BeTrue()) - }) - }) - - Context("Test MPIJob with Running launcher Pod and Running worker Pods", func() { - It("Should contain desired ReplicaStatuses", func() { - By("By creating an active launcher pod and active worker pods") - - ctx := context.Background() - startTime := metav1.Now() - completionTime := metav1.Now() - - jobName := "test-launcher-running-worker-running" - - var replicas int32 = 8 - mpiJob := newMPIJob(jobName, &replicas, 1, gpuResourceName, &startTime, &completionTime) - Expect(testK8sClient.Create(ctx, mpiJob)).Should(Succeed()) - - launcher := reconciler.newLauncher(mpiJob, "kubectl-delivery", isGPULauncher(mpiJob)) - launcherKey := types.NamespacedName{ - Namespace: metav1.NamespaceDefault, - Name: launcher.GetName(), - } - Eventually(func() error { - launcherCreated := &corev1.Pod{} - if err := testK8sClient.Get(ctx, launcherKey, launcherCreated); err != nil { - return err - } - launcherCreated.Status.Phase = corev1.PodRunning - return testK8sClient.Status().Update(ctx, launcherCreated) - }, testutil.Timeout, testutil.Interval).Should(BeNil()) - - for i := 0; i < int(replicas); i++ { - name := fmt.Sprintf("%s-%d", mpiJob.Name+workerSuffix, i) - worker := reconciler.newWorker(mpiJob, name) - workerKey := types.NamespacedName{ - Namespace: metav1.NamespaceDefault, - Name: worker.GetName(), - } - Eventually(func() error { - workerCreated := &corev1.Pod{} - if err := testK8sClient.Get(ctx, workerKey, workerCreated); err != nil { - return err - } - workerCreated.Status.Phase = corev1.PodRunning - return testK8sClient.Status().Update(ctx, workerCreated) - }, testutil.Timeout, testutil.Interval).Should(BeNil()) - } - - key := types.NamespacedName{ - Namespace: metav1.NamespaceDefault, - Name: jobName, - } - launcherStatus := &common.ReplicaStatus{ - Active: 1, - Succeeded: 0, - Failed: 0, - } - workerStatus := &common.ReplicaStatus{ - Active: 8, - Succeeded: 0, - Failed: 0, - } - Eventually(func() bool { - created := &kubeflowv1.MPIJob{} - err := testK8sClient.Get(ctx, key, created) - if err != nil { - return false - } - return ReplicaStatusMatch(created.Status.ReplicaStatuses, kubeflowv1.MPIJobReplicaTypeLauncher, - launcherStatus) && ReplicaStatusMatch(created.Status.ReplicaStatuses, kubeflowv1.MPIJobReplicaTypeWorker, - workerStatus) - }, testutil.Timeout, testutil.Interval).Should(BeTrue()) - }) - }) - - Context("Test MPIJob with Running worker Pods", func() { - It("Should contain desired ReplicaStatuses and create a launcher pod", func() { - By("By creating only active worker pods") - - ctx := context.Background() - startTime := metav1.Now() - completionTime := metav1.Now() - - jobName := "test-worker-running" - - var replicas int32 = 16 - mpiJob := newMPIJob(jobName, &replicas, 1, gpuResourceName, &startTime, &completionTime) - Expect(testK8sClient.Create(ctx, mpiJob)).Should(Succeed()) - - for i := 0; i < int(replicas); i++ { - name := fmt.Sprintf("%s-%d", mpiJob.Name+workerSuffix, i) - worker := reconciler.newWorker(mpiJob, name) - workerKey := types.NamespacedName{ - Namespace: metav1.NamespaceDefault, - Name: worker.GetName(), - } - Eventually(func() error { - workerCreated := &corev1.Pod{} - if err := testK8sClient.Get(ctx, workerKey, workerCreated); err != nil { - return err - } - workerCreated.Status.Phase = corev1.PodRunning - return testK8sClient.Status().Update(ctx, workerCreated) - }, testutil.Timeout, testutil.Interval).Should(BeNil()) - } - - launcherKey := types.NamespacedName{ - Namespace: metav1.NamespaceDefault, - Name: mpiJob.Name + launcherSuffix, - } - launcher := &kubeflowv1.MPIJob{} - Eventually(func() bool { - err := testK8sClient.Get(ctx, launcherKey, launcher) - return err != nil - }, testutil.Timeout, testutil.Interval).Should(BeTrue()) - - key := types.NamespacedName{ - Namespace: metav1.NamespaceDefault, - Name: jobName, - } - launcherStatus := &common.ReplicaStatus{ - Active: 0, - Succeeded: 0, - Failed: 0, - } - workerStatus := &common.ReplicaStatus{ - Active: 16, - Succeeded: 0, - Failed: 0, - } - Eventually(func() bool { - created := &kubeflowv1.MPIJob{} - err := testK8sClient.Get(ctx, key, created) - if err != nil { - return false - } - return ReplicaStatusMatch(created.Status.ReplicaStatuses, kubeflowv1.MPIJobReplicaTypeLauncher, - launcherStatus) && ReplicaStatusMatch(created.Status.ReplicaStatuses, kubeflowv1.MPIJobReplicaTypeWorker, - workerStatus) - }, testutil.Timeout, testutil.Interval).Should(BeTrue()) - }) - }) - - Context("MPIJob not found", func() { - It("Should do nothing", func() { - By("Calling Reconcile method") - jobName := "test-not-exist" - - ctx := context.Background() - - req := ctrl.Request{NamespacedName: types.NamespacedName{ - Namespace: metav1.NamespaceDefault, - Name: jobName, - }} - _, err := reconciler.Reconcile(ctx, req) - Expect(err).Should(BeNil()) - }) - }) - - Context("MPI Job succeeds with predefined service account", func() { - It("should run with the defined service account", func() { - By("Calling Reconcile method") - jobName := "test-sa-orphan" - launcherSaName := "launcher-sa" - - ctx := context.Background() - startTime := metav1.Now() - completionTime := metav1.Now() - - mpiJob := newMPIJob(jobName, ptr.To[int32](64), 1, gpuResourceName, &startTime, &completionTime) - mpiJob.Spec.MPIReplicaSpecs[kubeflowv1.MPIJobReplicaTypeLauncher].Template.Spec.ServiceAccountName = launcherSaName - sa := newLauncherServiceAccount(mpiJob) - sa.OwnerReferences = nil - - Expect(sa.Name).Should(Equal(launcherSaName)) - Expect(testK8sClient.Create(ctx, sa)).Should(Succeed()) - Expect(testK8sClient.Create(ctx, mpiJob)).Should(Succeed()) - - Eventually(func() error { - req := ctrl.Request{NamespacedName: types.NamespacedName{ - Namespace: metav1.NamespaceDefault, - Name: mpiJob.GetName(), - }} - - _, err := reconciler.Reconcile(ctx, req) - - if err != nil { - return err - } - - return nil - }, testutil.Timeout, testutil.Interval).Should(BeNil()) - - Eventually(func() string { - launcherCreated := &corev1.Pod{} - - launcherKey := types.NamespacedName{ - Namespace: metav1.NamespaceDefault, - Name: mpiJob.Name + launcherSuffix, - } - - if err := testK8sClient.Get(ctx, launcherKey, launcherCreated); err != nil { - return "" - } - - return launcherCreated.Spec.ServiceAccountName - }, testutil.Timeout, testutil.Interval).Should(Equal(launcherSaName)) - }) - }) - - Context("MPIJob with launcher Pod not controlled by itself", func() { - It("Should return error", func() { - By("Calling Reconcile method") - jobName := "test-launcher-orphan" - testKind := "Pod" - - ctx := context.Background() - startTime := metav1.Now() - completionTime := metav1.Now() - - mpiJob := newMPIJob(jobName, ptr.To[int32](64), 1, gpuResourceName, &startTime, &completionTime) - - launcher := reconciler.newLauncher(mpiJob, "kubectl-delivery", isGPULauncher(mpiJob)) - launcher.OwnerReferences = nil - Expect(testK8sClient.Create(ctx, launcher)).Should(Succeed()) - - Expect(testK8sClient.Create(ctx, mpiJob)).Should(Succeed()) - - req := ctrl.Request{NamespacedName: types.NamespacedName{ - Namespace: metav1.NamespaceDefault, - Name: mpiJob.GetName(), - }} - expectedErr := fmt.Errorf(MessageResourceExists, launcher.Name, testKind) - Eventually(func() error { - _, err := reconciler.Reconcile(ctx, req) - return err - }, testutil.Timeout, testutil.Interval).Should(MatchError(expectedErr)) - }) - }) - - Context("MPIJob with worker Pod not controlled by itself", func() { - It("Should return error", func() { - By("Calling Reconcile method") - jobName := "test-worker-orphan" - testKind := "Pod" - - ctx := context.Background() - startTime := metav1.Now() - completionTime := metav1.Now() - - mpiJob := newMPIJob(jobName, ptr.To[int32](1), 1, gpuResourceName, &startTime, &completionTime) - - for i := 0; i < 1; i++ { - name := fmt.Sprintf("%s-%d", mpiJob.Name+workerSuffix, i) - worker := reconciler.newWorker(mpiJob, name) - worker.OwnerReferences = nil - Expect(testK8sClient.Create(ctx, worker)).Should(Succeed()) - } - - Expect(testK8sClient.Create(ctx, mpiJob)).Should(Succeed()) - - req := ctrl.Request{NamespacedName: types.NamespacedName{ - Namespace: metav1.NamespaceDefault, - Name: mpiJob.GetName(), - }} - expectedErr := fmt.Errorf(MessageResourceExists, fmt.Sprintf("%s-%d", mpiJob.Name+workerSuffix, 0), testKind) - Eventually(func() error { - _, err := reconciler.Reconcile(ctx, req) - return err - }, testutil.Timeout, testutil.Interval).Should(MatchError(expectedErr)) - }) - }) - - Context("MPIJob with ConfigMap not controlled by itself", func() { - It("Should return error", func() { - By("Calling Reconcile method") - jobName := "test-cm-orphan" - testKind := "ConfigMap" - - ctx := context.Background() - startTime := metav1.Now() - completionTime := metav1.Now() - - mpiJob := newMPIJob(jobName, ptr.To[int32](64), 1, gpuResourceName, &startTime, &completionTime) - - cm := newConfigMap(mpiJob, 64, isGPULauncher(mpiJob)) - cm.OwnerReferences = nil - Expect(testK8sClient.Create(ctx, cm)).Should(Succeed()) - - Expect(testK8sClient.Create(ctx, mpiJob)).Should(Succeed()) - - req := ctrl.Request{NamespacedName: types.NamespacedName{ - Namespace: metav1.NamespaceDefault, - Name: mpiJob.GetName(), - }} - expectedErr := fmt.Errorf(MessageResourceExists, cm.Name, testKind) - Eventually(func() error { - _, err := reconciler.Reconcile(ctx, req) - return err - }, testutil.Timeout, testutil.Interval).Should(MatchError(expectedErr)) - }) - }) - - Context("MPIJob with Role not controlled by itself", func() { - It("Should return error", func() { - By("Calling Reconcile method") - jobName := "test-role-orphan" - testKind := "Role" - - ctx := context.Background() - startTime := metav1.Now() - completionTime := metav1.Now() - - mpiJob := newMPIJob(jobName, ptr.To[int32](64), 1, gpuResourceName, &startTime, &completionTime) - - role := newLauncherRole(mpiJob, 64) - role.OwnerReferences = nil - Expect(testK8sClient.Create(ctx, role)).Should(Succeed()) - - Expect(testK8sClient.Create(ctx, mpiJob)).Should(Succeed()) - - req := ctrl.Request{NamespacedName: types.NamespacedName{ - Namespace: metav1.NamespaceDefault, - Name: mpiJob.GetName(), - }} - expectedErr := fmt.Errorf(MessageResourceExists, role.Name, testKind) - Eventually(func() error { - _, err := reconciler.Reconcile(ctx, req) - return err - }, testutil.Timeout, testutil.Interval).Should(MatchError(expectedErr)) - }) - }) - - Context("MPIJob with RoleBinding not controlled by itself", func() { - It("Should return error", func() { - By("Calling Reconcile method") - jobName := "test-rb-orphan" - testKind := "RoleBinding" - - ctx := context.Background() - startTime := metav1.Now() - completionTime := metav1.Now() - - mpiJob := newMPIJob(jobName, ptr.To[int32](64), 1, gpuResourceName, &startTime, &completionTime) - - rb := newLauncherRoleBinding(mpiJob) - rb.OwnerReferences = nil - Expect(testK8sClient.Create(ctx, rb)).Should(Succeed()) - - Expect(testK8sClient.Create(ctx, mpiJob)).Should(Succeed()) - - req := ctrl.Request{NamespacedName: types.NamespacedName{ - Namespace: metav1.NamespaceDefault, - Name: mpiJob.GetName(), - }} - expectedErr := fmt.Errorf(MessageResourceExists, rb.Name, testKind) - Eventually(func() error { - _, err := reconciler.Reconcile(ctx, req) - return err - }, testutil.Timeout, testutil.Interval).Should(MatchError(expectedErr)) - }) - }) - - Context("Test launcher's Intel MPI handling", func() { - It("Should create a launcher job with Intel MPI env variables", func() { - By("By creating MPIJobs with and without preset env variables") - - testCases := map[string]struct { - envVariables map[string]string - expectedEnvVariables map[string]string - }{ - "withoutIMPIValues": { - envVariables: map[string]string{ - "X_MPI_HYDRA_BOOTSTRAP": "foo", - }, - expectedEnvVariables: map[string]string{ - "I_MPI_HYDRA_BOOTSTRAP": iMPIDefaultBootstrap, - "I_MPI_HYDRA_BOOTSTRAP_EXEC": fmt.Sprintf("%s/%s", configMountPath, kubexecScriptName), - }, - }, - "withIMPIBootstrap": { - envVariables: map[string]string{ - "I_MPI_HYDRA_BOOTSTRAP": "RSH", - }, - expectedEnvVariables: map[string]string{ - "I_MPI_HYDRA_BOOTSTRAP": "RSH", - "I_MPI_HYDRA_BOOTSTRAP_EXEC": fmt.Sprintf("%s/%s", configMountPath, kubexecScriptName), - }, - }, - "withIMPIBootstrapExec": { - envVariables: map[string]string{ - "I_MPI_HYDRA_BOOTSTRAP_EXEC": "/script.sh", - }, - expectedEnvVariables: map[string]string{ - "I_MPI_HYDRA_BOOTSTRAP": iMPIDefaultBootstrap, - "I_MPI_HYDRA_BOOTSTRAP_EXEC": "/script.sh", - }, - }, - "withIMPIBootstrapAndExec": { - envVariables: map[string]string{ - "I_MPI_HYDRA_BOOTSTRAP": "RSH", - "I_MPI_HYDRA_BOOTSTRAP_EXEC": "/script.sh", - }, - expectedEnvVariables: map[string]string{ - "I_MPI_HYDRA_BOOTSTRAP": "RSH", - "I_MPI_HYDRA_BOOTSTRAP_EXEC": "/script.sh", - }, - }, - } - - for testName, testCase := range testCases { - ctx := context.Background() - startTime := metav1.Now() - completionTime := metav1.Now() - - jobName := "test-launcher-creation-" + strings.ToLower(testName) - - mpiJob := newMPIJob(jobName, ptr.To[int32](1), 1, gpuResourceName, &startTime, &completionTime) - Expect(testK8sClient.Create(ctx, mpiJob)).Should(Succeed()) - - template := &mpiJob.Spec.MPIReplicaSpecs[kubeflowv1.MPIJobReplicaTypeLauncher].Template - Expect(len(template.Spec.Containers) == 1).To(BeTrue()) - - cont := &template.Spec.Containers[0] - - for k, v := range testCase.envVariables { - cont.Env = append(cont.Env, - corev1.EnvVar{ - Name: k, - Value: v, - }, - ) - } - - launcher := reconciler.newLauncher(mpiJob, "kubectl-delivery", false) - - Expect(len(launcher.Spec.Containers) == 1).To(BeTrue()) - for expectedKey, expectedValue := range testCase.expectedEnvVariables { - Expect(launcher.Spec.Containers[0].Env).Should(ContainElements( - corev1.EnvVar{ - Name: expectedKey, - Value: expectedValue, - }), - ) - } - } - }) - }) - - Context("When creating the MPIJob with the suspend semantics", func() { - const name = "test-job" - var ( - ns *corev1.Namespace - job *kubeflowv1.MPIJob - jobKey types.NamespacedName - launcherKey types.NamespacedName - worker0Key types.NamespacedName - ctx = context.Background() - ) - BeforeEach(func() { - ns = &corev1.Namespace{ - ObjectMeta: metav1.ObjectMeta{ - GenerateName: "mpijob-test-", - }, - } - Expect(testK8sClient.Create(ctx, ns)).Should(Succeed()) - - now := metav1.Now() - job = newMPIJob(name, ptr.To[int32](1), 1, gpuResourceName, &now, &now) - job.Namespace = ns.Name - jobKey = client.ObjectKeyFromObject(job) - launcherKey = types.NamespacedName{ - Name: fmt.Sprintf("%s-launcher", name), - Namespace: ns.Name, - } - worker0Key = types.NamespacedName{ - Name: fmt.Sprintf("%s-worker-0", name), - Namespace: ns.Name, - } - }) - AfterEach(func() { - Expect(testK8sClient.Delete(ctx, job)).Should(Succeed()) - Expect(testK8sClient.Delete(ctx, ns)).Should(Succeed()) - }) - It("Shouldn't create resources if MPIJob is suspended", func() { - By("By creating a new MPIJob with suspend=true") - job.Spec.RunPolicy.Suspend = ptr.To(true) - Expect(testK8sClient.Create(ctx, job)).Should(Succeed()) - - created := &kubeflowv1.MPIJob{} - launcherPod := &corev1.Pod{} - workerPod := &corev1.Pod{} - - By("Checking created MPIJob") - Eventually(func() bool { - err := testK8sClient.Get(ctx, jobKey, created) - return err == nil - }, testutil.Timeout, testutil.Interval).Should(BeTrue()) - By("Checking created MPIJob has a nil startTime") - Consistently(func() *metav1.Time { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - return created.Status.StartTime - }, testutil.ConsistentDuration, testutil.Interval).Should(BeNil()) - - By("Checking if the pods aren't created") - Consistently(func() bool { - errLauncherPod := testK8sClient.Get(ctx, launcherKey, launcherPod) - errWorkerPod := testK8sClient.Get(ctx, worker0Key, workerPod) - return errors.IsNotFound(errLauncherPod) && errors.IsNotFound(errWorkerPod) - }, testutil.ConsistentDuration, testutil.Interval).Should(BeTrue()) - - By("Checking if the MPIJob has suspended condition") - Eventually(func() []kubeflowv1.JobCondition { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - return created.Status.Conditions - }, testutil.ConsistentDuration, testutil.Interval).Should(BeComparableTo([]kubeflowv1.JobCondition{ - { - Type: kubeflowv1.JobCreated, - Status: corev1.ConditionTrue, - Reason: commonutil.NewReason(kubeflowv1.MPIJobKind, commonutil.JobCreatedReason), - Message: fmt.Sprintf("MPIJob %s is created.", name), - }, - { - Type: kubeflowv1.JobSuspended, - Status: corev1.ConditionTrue, - Reason: commonutil.NewReason(kubeflowv1.MPIJobKind, commonutil.JobSuspendedReason), - Message: fmt.Sprintf("MPIJob %s is suspended.", name), - }, - }, testutil.IgnoreJobConditionsTimes)) - }) - - It("Should delete resources after MPIJob is suspended; Should resume MPIJob after MPIJob is unsuspended", func() { - By("By creating a new MPIJob") - Expect(testK8sClient.Create(ctx, job)).Should(Succeed()) - - created := &kubeflowv1.MPIJob{} - launcherPod := &corev1.Pod{} - workerPod := &corev1.Pod{} - - // We'll need to retry getting this newly created MPIJob, given that creation may not immediately happen. - By("Checking created MPIJob") - Eventually(func() bool { - err := testK8sClient.Get(ctx, jobKey, created) - return err == nil - }, testutil.Timeout, testutil.Interval).Should(BeTrue()) - - var startTimeBeforeSuspended *metav1.Time - Eventually(func() *metav1.Time { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - startTimeBeforeSuspended = created.Status.StartTime - return startTimeBeforeSuspended - }, testutil.Timeout, testutil.Interval).ShouldNot(BeNil()) - - By("Checking the created pods") - Eventually(func() bool { - errLauncher := testK8sClient.Get(ctx, launcherKey, launcherPod) - errWorker := testK8sClient.Get(ctx, worker0Key, workerPod) - return errLauncher == nil && errWorker == nil - }, testutil.Timeout, testutil.Interval).Should(BeTrue()) - - By("Updating the Pod's phase with Running") - Eventually(func() error { - Expect(testK8sClient.Get(ctx, launcherKey, launcherPod)).Should(Succeed()) - launcherPod.Status.Phase = corev1.PodRunning - return testK8sClient.Status().Update(ctx, launcherPod) - }, testutil.Timeout, testutil.Interval).Should(Succeed()) - Eventually(func() error { - Expect(testK8sClient.Get(ctx, worker0Key, workerPod)).Should(Succeed()) - workerPod.Status.Phase = corev1.PodRunning - return testK8sClient.Status().Update(ctx, workerPod) - }, testutil.Timeout, testutil.Interval).Should(Succeed()) - - By("Checking the MPIJob's condition") - Eventually(func() []kubeflowv1.JobCondition { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - return created.Status.Conditions - }, testutil.Timeout, testutil.Interval).Should(BeComparableTo([]kubeflowv1.JobCondition{ - { - Type: kubeflowv1.JobCreated, - Status: corev1.ConditionTrue, - Reason: commonutil.NewReason(kubeflowv1.MPIJobKind, commonutil.JobCreatedReason), - Message: fmt.Sprintf("MPIJob %s is created.", name), - }, - { - Type: kubeflowv1.JobRunning, - Status: corev1.ConditionTrue, - Reason: commonutil.NewReason(kubeflowv1.MPIJobKind, commonutil.JobRunningReason), - Message: fmt.Sprintf("MPIJob %s is running.", name), - }, - }, testutil.IgnoreJobConditionsTimes)) - - By("Updating the MPIJob with suspend=true") - Eventually(func() error { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - created.Spec.RunPolicy.Suspend = ptr.To(true) - return testK8sClient.Update(ctx, created) - }, testutil.Timeout, testutil.Interval).Should(Succeed()) - - By("Checking if the pods are removed") - Eventually(func() bool { - errLauncher := testK8sClient.Get(ctx, launcherKey, launcherPod) - errWorker := testK8sClient.Get(ctx, worker0Key, workerPod) - return errors.IsNotFound(errLauncher) && errors.IsNotFound(errWorker) - }, testutil.Timeout, testutil.Interval).Should(BeTrue()) - Consistently(func() bool { - errLauncherPod := testK8sClient.Get(ctx, launcherKey, launcherPod) - errWorkerPod := testK8sClient.Get(ctx, worker0Key, workerPod) - return errors.IsNotFound(errLauncherPod) && errors.IsNotFound(errWorkerPod) - }, testutil.ConsistentDuration, testutil.Interval).Should(BeTrue()) - - By("Checking if the MPIJob has a suspended condition") - Eventually(func() bool { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - return created.Status.ReplicaStatuses[kubeflowv1.MPIJobReplicaTypeLauncher].Active == 0 && - created.Status.ReplicaStatuses[kubeflowv1.MPIJobReplicaTypeWorker].Active == 0 && - created.Status.StartTime.Equal(startTimeBeforeSuspended) - }, testutil.Timeout, testutil.Interval).Should(BeTrue()) - Consistently(func() bool { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - return created.Status.ReplicaStatuses[kubeflowv1.MPIJobReplicaTypeLauncher].Active == 0 && - created.Status.ReplicaStatuses[kubeflowv1.MPIJobReplicaTypeWorker].Active == 0 && - created.Status.StartTime.Equal(startTimeBeforeSuspended) - }, testutil.ConsistentDuration, testutil.Interval).Should(BeTrue()) - Expect(created.Status.Conditions).Should(BeComparableTo([]kubeflowv1.JobCondition{ - { - Type: kubeflowv1.JobCreated, - Status: corev1.ConditionTrue, - Reason: commonutil.NewReason(kubeflowv1.MPIJobKind, commonutil.JobCreatedReason), - Message: fmt.Sprintf("MPIJob %s is created.", name), - }, - { - Type: kubeflowv1.JobRunning, - Status: corev1.ConditionFalse, - Reason: commonutil.NewReason(kubeflowv1.MPIJobKind, commonutil.JobSuspendedReason), - Message: fmt.Sprintf("MPIJob %s is suspended.", name), - }, - { - Type: kubeflowv1.JobSuspended, - Reason: commonutil.NewReason(kubeflowv1.MPIJobKind, commonutil.JobSuspendedReason), - Message: fmt.Sprintf("MPIJob %s is suspended.", name), - Status: corev1.ConditionTrue, - }, - }, testutil.IgnoreJobConditionsTimes)) - - By("Unsuspending the MPIJob") - Eventually(func() error { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - created.Spec.RunPolicy.Suspend = ptr.To(false) - return testK8sClient.Update(ctx, created) - }, testutil.Timeout, testutil.Interval).Should(Succeed()) - Eventually(func() *metav1.Time { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - return created.Status.StartTime - }, testutil.Timeout, testutil.Interval).ShouldNot(BeNil()) - - By("Check if the pods are created") - Eventually(func() error { - return testK8sClient.Get(ctx, launcherKey, launcherPod) - }, testutil.Timeout, testutil.Interval).Should(BeNil()) - Eventually(func() error { - return testK8sClient.Get(ctx, worker0Key, workerPod) - }, testutil.Timeout, testutil.Interval).Should(BeNil()) - - By("Updating Pod's condition with Running") - Eventually(func() error { - Expect(testK8sClient.Get(ctx, launcherKey, launcherPod)).Should(Succeed()) - launcherPod.Status.Phase = corev1.PodRunning - return testK8sClient.Status().Update(ctx, launcherPod) - }, testutil.Timeout, testutil.Interval).Should(Succeed()) - Eventually(func() error { - Expect(testK8sClient.Get(ctx, worker0Key, workerPod)).Should(Succeed()) - workerPod.Status.Phase = corev1.PodRunning - return testK8sClient.Status().Update(ctx, workerPod) - }, testutil.Timeout, testutil.Interval).Should(Succeed()) - - By("Checking if the MPIJob has resumed conditions") - Eventually(func() []kubeflowv1.JobCondition { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - return created.Status.Conditions - }, testutil.Timeout, testutil.Interval).Should(BeComparableTo([]kubeflowv1.JobCondition{ - { - Type: kubeflowv1.JobCreated, - Status: corev1.ConditionTrue, - Reason: commonutil.NewReason(kubeflowv1.MPIJobKind, commonutil.JobCreatedReason), - Message: fmt.Sprintf("MPIJob %s is created.", name), - }, - { - Type: kubeflowv1.JobSuspended, - Reason: commonutil.NewReason(kubeflowv1.MPIJobKind, commonutil.JobResumedReason), - Message: fmt.Sprintf("MPIJob %s is resumed.", name), - Status: corev1.ConditionFalse, - }, - { - Type: kubeflowv1.JobRunning, - Status: corev1.ConditionTrue, - Reason: commonutil.NewReason(kubeflowv1.MPIJobKind, commonutil.JobRunningReason), - Message: fmt.Sprintf("MPIJob %s is running.", name), - }, - }, testutil.IgnoreJobConditionsTimes)) - - By("Checking if the startTime is updated") - Expect(created.Status.StartTime).ShouldNot(Equal(startTimeBeforeSuspended)) - }) - - It("Should not reconcile a job while managed by external controller", func() { - By("Creating a MPIJob managed by external controller") - job.Spec.RunPolicy = kubeflowv1.RunPolicy{ - ManagedBy: ptr.To(kubeflowv1.MultiKueueController), - } - job.Spec.RunPolicy.Suspend = ptr.To(true) - Expect(testK8sClient.Create(ctx, job)).Should(Succeed()) - - created := &kubeflowv1.MPIJob{} - By("Checking created MPIJob") - Eventually(func() bool { - err := testK8sClient.Get(ctx, jobKey, created) - return err == nil - }, testutil.Timeout, testutil.Interval).Should(BeTrue()) - - By("Checking created MPIJob has a nil startTime") - Consistently(func() *metav1.Time { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - return created.Status.StartTime - }, testutil.ConsistentDuration, testutil.Interval).Should(BeNil()) - - By("Checking if the pods and services aren't created") - Consistently(func() bool { - launcherPod := &corev1.Pod{} - workerPod := &corev1.Pod{} - launcherSvc := &corev1.Service{} - workerSvc := &corev1.Service{} - errMasterPod := testK8sClient.Get(ctx, launcherKey, launcherPod) - errWorkerPod := testK8sClient.Get(ctx, worker0Key, workerPod) - errMasterSvc := testK8sClient.Get(ctx, launcherKey, launcherSvc) - errWorkerSvc := testK8sClient.Get(ctx, worker0Key, workerSvc) - return errors.IsNotFound(errMasterPod) && errors.IsNotFound(errWorkerPod) && - errors.IsNotFound(errMasterSvc) && errors.IsNotFound(errWorkerSvc) - }, testutil.ConsistentDuration, testutil.Interval).Should(BeTrue(), "pods and services should be created by external controller (here not existent)") - - By("Checking if the MPIJob status was not updated") - Eventually(func() []kubeflowv1.JobCondition { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - return created.Status.Conditions - }, testutil.Timeout, testutil.Interval).Should(BeComparableTo([]kubeflowv1.JobCondition(nil))) - - By("Unsuspending the MPIJob") - Eventually(func() error { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - created.Spec.RunPolicy.Suspend = ptr.To(false) - return testK8sClient.Update(ctx, created) - }, testutil.Timeout, testutil.Interval).Should(Succeed()) - - By("Checking created MPIJob still has a nil startTime") - Consistently(func() *metav1.Time { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - return created.Status.StartTime - }, testutil.ConsistentDuration, testutil.Interval).Should(BeNil()) - - By("Checking if the MPIJob status was not updated, even after unsuspending") - Eventually(func() []kubeflowv1.JobCondition { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - return created.Status.Conditions - }, testutil.Timeout, testutil.Interval).Should(BeComparableTo([]kubeflowv1.JobCondition(nil))) - }) - }) -}) - -func ReplicaStatusMatch(replicaStatuses map[common.ReplicaType]*common.ReplicaStatus, - replicaType common.ReplicaType, status *common.ReplicaStatus) bool { - - result := true - - if replicaStatuses == nil { - return false - } - if val, exist := replicaStatuses[replicaType]; !exist { - return false - } else { - result = result && (val.Active == status.Active) - result = result && (val.Succeeded == status.Succeeded) - result = result && (val.Failed == status.Failed) - } - - return result -} diff --git a/pkg/controller.v1/mpi/suite_test.go b/pkg/controller.v1/mpi/suite_test.go deleted file mode 100644 index 1335c73c48..0000000000 --- a/pkg/controller.v1/mpi/suite_test.go +++ /dev/null @@ -1,108 +0,0 @@ -// Copyright 2021 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package mpi - -import ( - "context" - "path/filepath" - "testing" - - kubeflowv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - "github.com/kubeflow/training-operator/pkg/config" - "github.com/kubeflow/training-operator/pkg/controller.v1/common" - - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" - "k8s.io/client-go/kubernetes/scheme" - ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/envtest" - logf "sigs.k8s.io/controller-runtime/pkg/log" - "sigs.k8s.io/controller-runtime/pkg/log/zap" - metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" - "volcano.sh/apis/pkg/apis/scheduling/v1beta1" - //+kubebuilder:scaffold:imports -) - -// These tests use Ginkgo (BDD-style Go testing framework). Refer to -// http://onsi.github.io/ginkgo/ to learn more about Ginkgo. - -var ( - testK8sClient client.Client - testEnv *envtest.Environment - testCtx context.Context - testCancel context.CancelFunc - reconciler *MPIJobReconciler -) - -func TestAPIs(t *testing.T) { - RegisterFailHandler(Fail) - - RunSpecs(t, "Controller Suite") -} - -var _ = BeforeSuite(func() { - logf.SetLogger(zap.New(zap.WriteTo(GinkgoWriter), zap.UseDevMode(true))) - - testCtx, testCancel = context.WithCancel(context.TODO()) - - By("bootstrapping test environment") - testEnv = &envtest.Environment{ - CRDDirectoryPaths: []string{filepath.Join("..", "..", "..", "manifests", "base", "crds")}, - ErrorIfCRDPathMissing: true, - } - - cfg, err := testEnv.Start() - Expect(err).NotTo(HaveOccurred()) - Expect(cfg).NotTo(BeNil()) - - err = v1beta1.AddToScheme(scheme.Scheme) - Expect(err).NotTo(HaveOccurred()) - err = kubeflowv1.AddToScheme(scheme.Scheme) - Expect(err).NotTo(HaveOccurred()) - - // Set Default kubectl delivery image - config.Config.MPIKubectlDeliveryImage = config.MPIKubectlDeliveryImageDefault - - //+kubebuilder:scaffold:scheme - - testK8sClient, err = client.New(cfg, client.Options{Scheme: scheme.Scheme}) - Expect(err).NotTo(HaveOccurred()) - Expect(testK8sClient).NotTo(BeNil()) - - mgr, err := ctrl.NewManager(cfg, ctrl.Options{ - Metrics: metricsserver.Options{ - BindAddress: "0", - }, - }) - Expect(err).NotTo(HaveOccurred()) - - gangSchedulingSetupFunc := common.GenNonGangSchedulerSetupFunc() - reconciler = NewReconciler(mgr, gangSchedulingSetupFunc) - Expect(reconciler.SetupWithManager(mgr, 1)).NotTo(HaveOccurred()) - - go func() { - defer GinkgoRecover() - err = mgr.Start(testCtx) - Expect(err).ToNot(HaveOccurred(), "failed to run manager") - }() -}) - -var _ = AfterSuite(func() { - By("tearing down the test environment") - testCancel() - err := testEnv.Stop() - Expect(err).NotTo(HaveOccurred()) -}) diff --git a/pkg/controller.v1/paddlepaddle/envvar.go b/pkg/controller.v1/paddlepaddle/envvar.go deleted file mode 100644 index 2bcd379642..0000000000 --- a/pkg/controller.v1/paddlepaddle/envvar.go +++ /dev/null @@ -1,172 +0,0 @@ -// Copyright 2022 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License - -package paddle - -import ( - "fmt" - "strconv" - "strings" - - corev1 "k8s.io/api/core/v1" - - kubeflowv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" -) - -const ( - EnvMasterEndpoint = "PADDLE_MASTER" - EnvNumNodes = "PADDLE_NNODES" - EnvJobID = "PADDLE_JOB_ID" - EnvServerNum = "PADDLE_SERVER_NUM" - EnvTrainerNum = "PADDLE_TRAINER_NUM" -) - -// EnvVarGenerator is the environment variable generator interface. -type EnvVarGenerator interface { - Generate(job *kubeflowv1.PaddleJob) ([]corev1.EnvVar, error) -} - -func setPodEnv(obj interface{}, podTemplateSpec *corev1.PodTemplateSpec, rtype, index string) error { - paddlejob, ok := obj.(*kubeflowv1.PaddleJob) - if !ok { - return fmt.Errorf("%+v is not a type of PaddleJob", obj) - } - - rank, err := strconv.Atoi(index) - if err != nil { - return err - } - - totalReplicas := getTotalReplicas(paddlejob) - - for i := range podTemplateSpec.Spec.Containers { - // Initialize the environment variables. - if len(podTemplateSpec.Spec.Containers[i].Env) == 0 { - podTemplateSpec.Spec.Containers[i].Env = make([]corev1.EnvVar, 0) - } - // Set PYTHONUNBUFFERED to true, to disable output buffering. - // Ref https://stackoverflow.com/questions/59812009/what-is-the-use-of-pythonunbuffered-in-docker-file. - podTemplateSpec.Spec.Containers[i].Env = append(podTemplateSpec.Spec.Containers[i].Env, corev1.EnvVar{ - Name: "PYTHONUNBUFFERED", - Value: "1", - }) - - podTemplateSpec.Spec.Containers[i].Env = append(podTemplateSpec.Spec.Containers[i].Env, corev1.EnvVar{ - Name: EnvJobID, - Value: paddlejob.Name, - }) - podTemplateSpec.Spec.Containers[i].Env = append(podTemplateSpec.Spec.Containers[i].Env, corev1.EnvVar{ - Name: EnvNumNodes, - Value: strconv.Itoa(int(totalReplicas)), - }) - - // If the master is null, run in Collective mode - if paddlejob.Spec.PaddleReplicaSpecs[kubeflowv1.PaddleJobReplicaTypeMaster] == nil { - - // We pick the worker 0 as the rendezvous endpoint - masterAddr := replicaName(paddlejob.Name, kubeflowv1.PaddleJobReplicaTypeWorker, 0) - masterPort := getPortFromPaddleJob(paddlejob, kubeflowv1.PaddleJobReplicaTypeWorker) - if rank == 0 { - podTemplateSpec.Spec.Containers[i].Env = append(podTemplateSpec.Spec.Containers[i].Env, corev1.EnvVar{ - Name: "POD_IP_DUMMY", - ValueFrom: &corev1.EnvVarSource{ - FieldRef: &corev1.ObjectFieldSelector{ - FieldPath: "status.podIP", - }, - }, - }) - podTemplateSpec.Spec.Containers[i].Env = append(podTemplateSpec.Spec.Containers[i].Env, corev1.EnvVar{ - Name: EnvMasterEndpoint, - Value: fmt.Sprintf("$(POD_IP_DUMMY):%d", masterPort), - }) - } else { - // NOTE(kuizhiqing): no need to ensure master ready by initcontainer or alternative methods, paddle launch will handle it. - podTemplateSpec.Spec.Containers[i].Env = append(podTemplateSpec.Spec.Containers[i].Env, corev1.EnvVar{ - Name: EnvMasterEndpoint, - Value: fmt.Sprintf("%s:%d", masterAddr, masterPort), - }) - } - - // Otherwise, run in PS mode - } else { - - // We pick the master 0 as the rendezvous endpoint - masterAddr := replicaName(paddlejob.Name, kubeflowv1.PaddleJobReplicaTypeMaster, 0) - masterPort := getPortFromPaddleJob(paddlejob, kubeflowv1.PaddleJobReplicaTypeMaster) - if rank == 0 && rtype == strings.ToLower(string(kubeflowv1.PaddleJobReplicaTypeMaster)) { - podTemplateSpec.Spec.Containers[i].Env = append(podTemplateSpec.Spec.Containers[i].Env, corev1.EnvVar{ - Name: "POD_IP_DUMMY", - ValueFrom: &corev1.EnvVarSource{ - FieldRef: &corev1.ObjectFieldSelector{ - FieldPath: "status.podIP", - }, - }, - }) - podTemplateSpec.Spec.Containers[i].Env = append(podTemplateSpec.Spec.Containers[i].Env, corev1.EnvVar{ - Name: EnvMasterEndpoint, - Value: fmt.Sprintf("$(POD_IP_DUMMY):%d", masterPort), - }) - } else { - podTemplateSpec.Spec.Containers[i].Env = append(podTemplateSpec.Spec.Containers[i].Env, corev1.EnvVar{ - Name: EnvMasterEndpoint, - Value: fmt.Sprintf("%s:%d", masterAddr, masterPort), - }) - } - - // Each pod will have only one server or trainer. - if rtype == strings.ToLower(string(kubeflowv1.PaddleJobReplicaTypeMaster)) { - podTemplateSpec.Spec.Containers[i].Env = append(podTemplateSpec.Spec.Containers[i].Env, corev1.EnvVar{ - Name: EnvServerNum, - Value: "1", - }) - } else { - podTemplateSpec.Spec.Containers[i].Env = append(podTemplateSpec.Spec.Containers[i].Env, corev1.EnvVar{ - Name: EnvTrainerNum, - Value: "1", - }) - } - - } - } - - return nil -} - -func getTotalReplicas(job *kubeflowv1.PaddleJob) int32 { - jobReplicas := int32(0) - for _, r := range job.Spec.PaddleReplicaSpecs { - jobReplicas += *r.Replicas - } - return jobReplicas -} - -func replicaName(jobName string, rtype kubeflowv1.ReplicaType, index int) string { - n := jobName + "-" + strings.ToLower(string(rtype)) + "-" + strconv.Itoa(index) - return strings.Replace(n, "/", "-", -1) -} - -func getPortFromPaddleJob(job *kubeflowv1.PaddleJob, rtype kubeflowv1.ReplicaType) int32 { - containers := job.Spec.PaddleReplicaSpecs[rtype].Template.Spec.Containers - for _, container := range containers { - if container.Name == kubeflowv1.PaddleJobDefaultContainerName { - ports := container.Ports - for _, port := range ports { - if port.Name == kubeflowv1.PaddleJobDefaultPortName { - return port.ContainerPort - } - } - } - } - return kubeflowv1.PaddleJobDefaultPort -} diff --git a/pkg/controller.v1/paddlepaddle/paddlepaddle_controller.go b/pkg/controller.v1/paddlepaddle/paddlepaddle_controller.go deleted file mode 100644 index b3aa54c3f4..0000000000 --- a/pkg/controller.v1/paddlepaddle/paddlepaddle_controller.go +++ /dev/null @@ -1,521 +0,0 @@ -// Copyright 2022 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package paddle - -import ( - "context" - "fmt" - "strings" - "time" - - kubeflowv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - trainingoperatorcommon "github.com/kubeflow/training-operator/pkg/common" - "github.com/kubeflow/training-operator/pkg/common/util" - "github.com/kubeflow/training-operator/pkg/controller.v1/common" - "github.com/kubeflow/training-operator/pkg/controller.v1/control" - "github.com/kubeflow/training-operator/pkg/controller.v1/expectation" - commonutil "github.com/kubeflow/training-operator/pkg/util" - - "github.com/go-logr/logr" - "github.com/sirupsen/logrus" - corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/equality" - "k8s.io/apimachinery/pkg/api/errors" - "k8s.io/apimachinery/pkg/api/meta" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/runtime/schema" - "k8s.io/apimachinery/pkg/types" - utilruntime "k8s.io/apimachinery/pkg/util/runtime" - "k8s.io/client-go/informers" - kubeclientset "k8s.io/client-go/kubernetes" - "k8s.io/client-go/tools/record" - ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/controller" - "sigs.k8s.io/controller-runtime/pkg/event" - "sigs.k8s.io/controller-runtime/pkg/handler" - "sigs.k8s.io/controller-runtime/pkg/log" - "sigs.k8s.io/controller-runtime/pkg/manager" - "sigs.k8s.io/controller-runtime/pkg/predicate" - "sigs.k8s.io/controller-runtime/pkg/source" - schedulerpluginsv1alpha1 "sigs.k8s.io/scheduler-plugins/apis/scheduling/v1alpha1" - "volcano.sh/apis/pkg/apis/scheduling/v1beta1" -) - -const ( - controllerName = "paddlejob-controller" -) - -// NewReconciler creates a PaddleJob Reconciler -func NewReconciler(mgr manager.Manager, gangSchedulingSetupFunc common.GangSchedulingSetupFunc) *PaddleJobReconciler { - r := &PaddleJobReconciler{ - Client: mgr.GetClient(), - Scheme: mgr.GetScheme(), - recorder: mgr.GetEventRecorderFor(controllerName), - apiReader: mgr.GetAPIReader(), - Log: log.Log, - } - - // Create clients - cfg := mgr.GetConfig() - kubeClientSet := kubeclientset.NewForConfigOrDie(cfg) - sharedInformers := informers.NewSharedInformerFactory(kubeClientSet, 0) - priorityClassInformer := sharedInformers.Scheduling().V1().PriorityClasses() - - // Initialize common job controller - r.JobController = common.JobController{ - Controller: r, - Expectations: expectation.NewControllerExpectations(), - WorkQueue: &util.FakeWorkQueue[string]{}, - Recorder: r.recorder, - KubeClientSet: kubeClientSet, - PriorityClassLister: priorityClassInformer.Lister(), - PriorityClassInformerSynced: priorityClassInformer.Informer().HasSynced, - PodControl: control.RealPodControl{KubeClient: kubeClientSet, Recorder: r.recorder}, - ServiceControl: control.RealServiceControl{KubeClient: kubeClientSet, Recorder: r.recorder}, - } - - gangSchedulingSetupFunc(&r.JobController) - - return r -} - -// PaddleJobReconciler reconciles a PaddleJob object -type PaddleJobReconciler struct { - common.JobController - client.Client - Scheme *runtime.Scheme - Log logr.Logger - recorder record.EventRecorder - apiReader client.Reader -} - -// +kubebuilder:rbac:groups=kubeflow.org,resources=paddlejobs,verbs=get;list;watch;create;update;patch;delete -// +kubebuilder:rbac:groups=kubeflow.org,resources=paddlejobs/status,verbs=get;update;patch -// +kubebuilder:rbac:groups=kubeflow.org,resources=paddlejobs/finalizers,verbs=update -// +kubebuilder:rbac:groups="",resources=pods,verbs=get;list;watch;create;update;patch;delete -// +kubebuilder:rbac:groups="",resources=services,verbs=get;list;watch;create;delete -// +kubebuilder:rbac:groups=scheduling.volcano.sh,resources=podgroups,verbs=get;list;watch;create;update;patch;delete -// +kubebuilder:rbac:groups=scheduling.x-k8s.io,resources=podgroups,verbs=get;list;watch;create;update;patch;delete -// +kubebuilder:rbac:groups="",resources=events,verbs=get;list;watch;create;update;patch;delete - -// Reconcile is part of the main kubernetes reconciliation loop which aims to -// move the current state of the cluster closer to the desired state. -// the PaddleJob object against the actual cluster state, and then -// perform operations to make the cluster state reflect the state specified by -// the user. -// -// For more details, check Reconcile and its Result here: -// - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.8.3/pkg/reconcile -func (r *PaddleJobReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { - _ = log.FromContext(ctx) - logger := r.Log.WithValues(kubeflowv1.PaddleJobSingular, req.NamespacedName) - - paddlejob := &kubeflowv1.PaddleJob{} - err := r.Get(ctx, req.NamespacedName, paddlejob) - if err != nil { - logger.Info(err.Error(), "unable to fetch PaddleJob", req.NamespacedName.String()) - return ctrl.Result{}, client.IgnoreNotFound(err) - } - - if manager := r.ManagedByExternalController(paddlejob.Spec.RunPolicy.ManagedBy); manager != nil { - logger.Info("Skipping PaddleJob managed by a custom controller", "managed-by", manager) - return ctrl.Result{}, nil - } - - // Check if reconciliation is needed - jobKey, err := common.KeyFunc(paddlejob) - if err != nil { - utilruntime.HandleError(fmt.Errorf("couldn't get jobKey for job object %#v: %v", paddlejob, err)) - } - - replicaTypes := util.GetReplicaTypes(paddlejob.Spec.PaddleReplicaSpecs) - needReconcile := util.SatisfiedExpectations(r.Expectations, jobKey, replicaTypes) - - if !needReconcile || paddlejob.GetDeletionTimestamp() != nil { - logger.Info("reconcile cancelled, job does not need to do reconcile or has been deleted", - "sync", needReconcile, "deleted", paddlejob.GetDeletionTimestamp() != nil) - return ctrl.Result{}, nil - } - - // Set default priorities to paddle job - r.Scheme.Default(paddlejob) - - // Use common to reconcile the job related pod and service - err = r.ReconcileJobs(paddlejob, paddlejob.Spec.PaddleReplicaSpecs, paddlejob.Status, &paddlejob.Spec.RunPolicy) - if err != nil { - logger.Error(err, "Reconcile PaddleJob error") - return ctrl.Result{}, err - } - - t, err := util.DurationUntilExpireTime(&paddlejob.Spec.RunPolicy, paddlejob.Status) - if err != nil { - logrus.Warnf("Reconcile PaddleJob error %v", err) - return ctrl.Result{}, err - } - if t >= 0 { - return ctrl.Result{Requeue: true, RequeueAfter: t}, nil - } - - return ctrl.Result{}, nil -} - -// SetupWithManager sets up the controller with the Manager. -func (r *PaddleJobReconciler) SetupWithManager(mgr ctrl.Manager, controllerThreads int) error { - c, err := controller.New(r.ControllerName(), mgr, controller.Options{ - Reconciler: r, - MaxConcurrentReconciles: controllerThreads, - }) - - if err != nil { - return err - } - - // using onOwnerCreateFunc is easier to set defaults - if err = c.Watch(source.Kind[*kubeflowv1.PaddleJob](mgr.GetCache(), &kubeflowv1.PaddleJob{}, - &handler.TypedEnqueueRequestForObject[*kubeflowv1.PaddleJob]{}, - predicate.TypedFuncs[*kubeflowv1.PaddleJob]{CreateFunc: r.onOwnerCreateFunc()}), - ); err != nil { - return err - } - // inject watching for job related pod - if err = c.Watch(source.Kind[*corev1.Pod](mgr.GetCache(), &corev1.Pod{}, - handler.TypedEnqueueRequestForOwner[*corev1.Pod](mgr.GetScheme(), mgr.GetRESTMapper(), &kubeflowv1.PaddleJob{}, handler.OnlyControllerOwner()), - util.OnDependentFuncs[*corev1.Pod](r.Scheme, r.Expectations, &r.JobController))); err != nil { - return err - } - // inject watching for job related service - if err = c.Watch(source.Kind[*corev1.Service](mgr.GetCache(), &corev1.Service{}, - handler.TypedEnqueueRequestForOwner[*corev1.Service](mgr.GetScheme(), mgr.GetRESTMapper(), &kubeflowv1.PaddleJob{}, handler.OnlyControllerOwner()), - util.OnDependentFuncs[*corev1.Service](r.Scheme, r.Expectations, &r.JobController))); err != nil { - return err - } - // skip watching volcano PodGroup if volcano PodGroup is not installed - if _, err = mgr.GetRESTMapper().RESTMapping(schema.GroupKind{Group: v1beta1.GroupName, Kind: "PodGroup"}, - v1beta1.SchemeGroupVersion.Version, - ); err == nil { - // inject watching for job related volcano PodGroup - if err = c.Watch(source.Kind[*v1beta1.PodGroup](mgr.GetCache(), &v1beta1.PodGroup{}, - handler.TypedEnqueueRequestForOwner[*v1beta1.PodGroup](mgr.GetScheme(), mgr.GetRESTMapper(), &kubeflowv1.PaddleJob{}, handler.OnlyControllerOwner()), - util.OnDependentFuncs[*v1beta1.PodGroup](r.Scheme, r.Expectations, &r.JobController))); err != nil { - return err - } - } - // skip watching scheduler-plugins PodGroup if scheduler-plugins PodGroup is not installed - if _, err = mgr.GetRESTMapper().RESTMapping( - schema.GroupKind{Group: schedulerpluginsv1alpha1.SchemeGroupVersion.Group, Kind: "PodGroup"}, - schedulerpluginsv1alpha1.SchemeGroupVersion.Version, - ); err == nil { - // inject watching for job related scheduler-plugins PodGroup - if err = c.Watch(source.Kind[*schedulerpluginsv1alpha1.PodGroup](mgr.GetCache(), &schedulerpluginsv1alpha1.PodGroup{}, - handler.TypedEnqueueRequestForOwner[*schedulerpluginsv1alpha1.PodGroup](mgr.GetScheme(), mgr.GetRESTMapper(), &kubeflowv1.PaddleJob{}, handler.OnlyControllerOwner()), - util.OnDependentFuncs[*schedulerpluginsv1alpha1.PodGroup](r.Scheme, r.Expectations, &r.JobController))); err != nil { - return err - } - } - - return nil -} - -func (r *PaddleJobReconciler) ControllerName() string { - return controllerName -} - -func (r *PaddleJobReconciler) GetAPIGroupVersionKind() schema.GroupVersionKind { - return kubeflowv1.GroupVersion.WithKind(kubeflowv1.PaddleJobKind) -} - -func (r *PaddleJobReconciler) GetAPIGroupVersion() schema.GroupVersion { - return kubeflowv1.GroupVersion -} - -func (r *PaddleJobReconciler) GetGroupNameLabelValue() string { - return kubeflowv1.GroupVersion.Group -} - -func (r *PaddleJobReconciler) GetFrameworkName() string { - return kubeflowv1.PaddleJobFrameworkName -} - -func (r *PaddleJobReconciler) GetJobFromInformerCache(namespace, name string) (metav1.Object, error) { - job := &kubeflowv1.PaddleJob{} - err := r.Get(context.Background(), types.NamespacedName{Namespace: namespace, Name: name}, job) - if err != nil { - if errors.IsNotFound(err) { - logrus.Error(err, "paddle job not found", "namespace", namespace, "name", name) - } else { - logrus.Error(err, "failed to get job from api-server", "namespace", namespace, "name", name) - } - return nil, err - } - return job, nil -} - -func (r *PaddleJobReconciler) GetJobFromAPIClient(namespace, name string) (metav1.Object, error) { - job := &kubeflowv1.PaddleJob{} - - err := r.apiReader.Get(context.Background(), types.NamespacedName{Namespace: namespace, Name: name}, job) - if err != nil { - if errors.IsNotFound(err) { - logrus.Error(err, "paddle job not found", "namespace", namespace, "name", name) - } else { - logrus.Error(err, "failed to get job from api-server", "namespace", namespace, "name", name) - } - return nil, err - } - return job, nil -} - -func (r *PaddleJobReconciler) GetPodsForJob(obj interface{}) ([]*corev1.Pod, error) { - job, err := meta.Accessor(obj) - if err != nil { - return nil, err - } - - // List all pods to include those that don't match the selector anymore - // but have a ControllerRef pointing to this controller. - podlist := &corev1.PodList{} - err = r.List(context.Background(), podlist, client.MatchingLabels(r.GenLabels(job.GetName())), client.InNamespace(job.GetNamespace())) - if err != nil { - return nil, err - } - - return util.JobControlledPodList(podlist.Items, job), nil -} - -func (r *PaddleJobReconciler) GetServicesForJob(obj interface{}) ([]*corev1.Service, error) { - job, err := meta.Accessor(obj) - if err != nil { - return nil, err - } - - // List all pods to include those that don't match the selector anymore - // but have a ControllerRef pointing to this controller. - serviceList := &corev1.ServiceList{} - err = r.List(context.Background(), serviceList, client.MatchingLabels(r.GenLabels(job.GetName())), client.InNamespace(job.GetNamespace())) - if err != nil { - return nil, err - } - - ret := util.ConvertServiceList(serviceList.Items) - return ret, nil -} - -func (r *PaddleJobReconciler) DeleteJob(job interface{}) error { - paddlejob, ok := job.(*kubeflowv1.PaddleJob) - if !ok { - return fmt.Errorf("%+v is not a type of PaddleJob", job) - } - if err := r.Delete(context.Background(), paddlejob); err != nil { - r.recorder.Eventf(paddlejob, corev1.EventTypeWarning, control.FailedDeletePodReason, "Error deleting: %v", err) - logrus.Error(err, "failed to delete job", "namespace", paddlejob.Namespace, "name", paddlejob.Name) - return err - } - r.recorder.Eventf(paddlejob, corev1.EventTypeNormal, control.SuccessfulDeletePodReason, "Deleted job: %v", paddlejob.Name) - logrus.Info("job deleted", "namespace", paddlejob.Namespace, "name", paddlejob.Name) - trainingoperatorcommon.DeletedJobsCounterInc(paddlejob.Namespace, r.GetFrameworkName()) - return nil -} - -func (jc *PaddleJobReconciler) GenLabelSelector(jobName string, - rtype kubeflowv1.ReplicaType) *metav1.LabelSelector { - labels := jc.GenLabels(jobName) - labels[kubeflowv1.ReplicaTypeLabel] = strings.ToLower(string(rtype)) - - return &metav1.LabelSelector{ - MatchLabels: labels, - } -} - -// UpdateJobStatus updates the job status and job conditions -func (r *PaddleJobReconciler) UpdateJobStatus(job interface{}, - replicas map[kubeflowv1.ReplicaType]*kubeflowv1.ReplicaSpec, - jobStatus *kubeflowv1.JobStatus) error { - paddlejob, ok := job.(*kubeflowv1.PaddleJob) - if !ok { - return fmt.Errorf("%+v is not a type of PaddleJob", job) - } - - paddlejobKey, err := common.KeyFunc(paddlejob) - if err != nil { - utilruntime.HandleError(fmt.Errorf("couldn't get key for paddlejob object %#v: %v", paddlejob, err)) - return err - } - - logger := commonutil.LoggerForJob(paddlejob) - - // Set StartTime. - if jobStatus.StartTime == nil { - now := metav1.Now() - jobStatus.StartTime = &now - // enqueue a sync to check if job past ActiveDeadlineSeconds - if paddlejob.Spec.RunPolicy.ActiveDeadlineSeconds != nil { - logger.Infof("Job with ActiveDeadlineSeconds will sync after %d seconds", *paddlejob.Spec.RunPolicy.ActiveDeadlineSeconds) - r.WorkQueue.AddAfter(paddlejobKey, time.Duration(*paddlejob.Spec.RunPolicy.ActiveDeadlineSeconds)*time.Second) - } - } - - for rtype, spec := range replicas { - status := jobStatus.ReplicaStatuses[rtype] - // Generate the label selector. - status.Selector = metav1.FormatLabelSelector(r.GenLabelSelector(paddlejob.Name, rtype)) - - succeeded := status.Succeeded - expected := *(spec.Replicas) - succeeded - running := status.Active - failed := status.Failed - specReplicas := *spec.Replicas - - logrus.Infof("PaddleJob=%s, ReplicaType=%s expected=%d, running=%d, succeeded=%d, failed=%d, Replicas=%d", - paddlejob.Name, rtype, expected, running, succeeded, failed, specReplicas) - - if ContainsMasterSpec(replicas) { - if rtype == kubeflowv1.PaddleJobReplicaTypeMaster { - if running > 0 { - msg := fmt.Sprintf("PaddleJob %s is running.", paddlejob.Name) - commonutil.UpdateJobConditions(jobStatus, kubeflowv1.JobRunning, corev1.ConditionTrue, commonutil.NewReason(kubeflowv1.PaddleJobKind, commonutil.JobRunningReason), msg) - } - // when master is succeed, the job is finished. - if expected == 0 { - msg := fmt.Sprintf("PaddleJob %s is successfully completed.", paddlejob.Name) - logrus.Info(msg) - r.Recorder.Event(paddlejob, corev1.EventTypeNormal, commonutil.NewReason(kubeflowv1.PaddleJobKind, commonutil.JobSucceededReason), msg) - if jobStatus.CompletionTime == nil { - now := metav1.Now() - jobStatus.CompletionTime = &now - } - commonutil.UpdateJobConditions(jobStatus, kubeflowv1.JobSucceeded, corev1.ConditionTrue, commonutil.NewReason(kubeflowv1.PaddleJobKind, commonutil.JobSucceededReason), msg) - trainingoperatorcommon.SuccessfulJobsCounterInc(paddlejob.Namespace, r.GetFrameworkName()) - return nil - } - } - } else { - if rtype == kubeflowv1.PaddleJobReplicaTypeWorker { - // TODO(gaocegege): Support SuccessPolicy - if expected == 0 { - msg := fmt.Sprintf("PaddleJob %s/%s successfully completed.", - paddlejob.Namespace, paddlejob.Name) - r.recorder.Event(paddlejob, corev1.EventTypeNormal, commonutil.NewReason(kubeflowv1.PaddleJobKind, commonutil.JobSucceededReason), msg) - if jobStatus.CompletionTime == nil { - now := metav1.Now() - jobStatus.CompletionTime = &now - } - commonutil.UpdateJobConditions(jobStatus, kubeflowv1.JobSucceeded, corev1.ConditionTrue, commonutil.NewReason(kubeflowv1.PaddleJobKind, commonutil.JobSucceededReason), msg) - trainingoperatorcommon.SuccessfulJobsCounterInc(paddlejob.Namespace, r.GetFrameworkName()) - } else if running > 0 { - // Some workers are still running, leave a running condition. - msg := fmt.Sprintf("PaddleJob %s/%s is running.", - paddlejob.Namespace, paddlejob.Name) - commonutil.UpdateJobConditions(jobStatus, kubeflowv1.JobRunning, corev1.ConditionTrue, commonutil.NewReason(kubeflowv1.PaddleJobKind, commonutil.JobRunningReason), msg) - } - } - } - - if failed > 0 && (specReplicas > succeeded+running) { - if spec.RestartPolicy != kubeflowv1.RestartPolicyNever { - msg := fmt.Sprintf("PaddleJob %s is restarting because %d %s replica(s) failed.", paddlejob.Name, failed, rtype) - r.Recorder.Event(paddlejob, corev1.EventTypeWarning, commonutil.NewReason(kubeflowv1.PaddleJobKind, commonutil.JobRestartingReason), msg) - commonutil.UpdateJobConditions(jobStatus, kubeflowv1.JobRestarting, corev1.ConditionTrue, commonutil.NewReason(kubeflowv1.PaddleJobKind, commonutil.JobRestartingReason), msg) - trainingoperatorcommon.RestartedJobsCounterInc(paddlejob.Namespace, r.GetFrameworkName()) - } else { - msg := fmt.Sprintf("PaddleJob %s is failed because %d %s replica(s) failed.", paddlejob.Name, failed, rtype) - r.Recorder.Event(paddlejob, corev1.EventTypeNormal, commonutil.NewReason(kubeflowv1.PaddleJobKind, commonutil.JobFailedReason), msg) - if jobStatus.CompletionTime == nil { - now := metav1.Now() - jobStatus.CompletionTime = &now - } - commonutil.UpdateJobConditions(jobStatus, kubeflowv1.JobFailed, corev1.ConditionTrue, commonutil.NewReason(kubeflowv1.PaddleJobKind, commonutil.JobFailedReason), msg) - trainingoperatorcommon.FailedJobsCounterInc(paddlejob.Namespace, r.GetFrameworkName()) - } - } - } - - return nil -} - -// ContainsMasterSpec returns true if the paddlejob contains master spec. -func ContainsMasterSpec(replicas map[kubeflowv1.ReplicaType]*kubeflowv1.ReplicaSpec) bool { - if _, ok := replicas[kubeflowv1.PaddleJobReplicaTypeMaster]; ok { - return true - } - return false -} - -// UpdateJobStatusInApiServer updates the job status in to cluster. -func (r *PaddleJobReconciler) UpdateJobStatusInApiServer(job interface{}, jobStatus *kubeflowv1.JobStatus) error { - if jobStatus.ReplicaStatuses == nil { - jobStatus.ReplicaStatuses = map[kubeflowv1.ReplicaType]*kubeflowv1.ReplicaStatus{} - } - - paddlejob, ok := job.(*kubeflowv1.PaddleJob) - trainingoperatorcommon.ClearGeneratedFields(&paddlejob.ObjectMeta) - if !ok { - return fmt.Errorf("%+v is not a type of PaddleJob", job) - } - - // Job status passed in differs with status in job, update in basis of the passed in one. - if !equality.Semantic.DeepEqual(&paddlejob.Status, jobStatus) { - paddlejob = paddlejob.DeepCopy() - paddlejob.Status = *jobStatus.DeepCopy() - } - - result := r.Status().Update(context.Background(), paddlejob) - - if result != nil { - r.Log.WithValues("paddlejob", types.NamespacedName{ - Namespace: paddlejob.GetNamespace(), - Name: paddlejob.GetName(), - }) - return result - } - - return nil -} - -// SetClusterSpec sets the cluster spec and init container for the pod -func (r *PaddleJobReconciler) SetClusterSpec(job interface{}, podTemplate *corev1.PodTemplateSpec, rtype, index string) error { - // TODO - if err := setPodEnv(job, podTemplate, rtype, index); err != nil { - return err - } - return nil -} - -func (r *PaddleJobReconciler) GetDefaultContainerName() string { - return kubeflowv1.PaddleJobDefaultContainerName -} - -func (r *PaddleJobReconciler) GetDefaultContainerPortName() string { - return kubeflowv1.PaddleJobDefaultPortName -} - -func (r *PaddleJobReconciler) IsMasterRole(replicas map[kubeflowv1.ReplicaType]*kubeflowv1.ReplicaSpec, - rtype kubeflowv1.ReplicaType, index int) bool { - return string(rtype) == string(kubeflowv1.PaddleJobReplicaTypeMaster) -} - -// onOwnerCreateFunc modify creation condition. -func (r *PaddleJobReconciler) onOwnerCreateFunc() func(createEvent event.TypedCreateEvent[*kubeflowv1.PaddleJob]) bool { - return func(e event.TypedCreateEvent[*kubeflowv1.PaddleJob]) bool { - paddlejob := e.Object - r.Scheme.Default(paddlejob) - msg := fmt.Sprintf("PaddleJob %s is created.", e.Object.GetName()) - logrus.Info(msg) - trainingoperatorcommon.CreatedJobsCounterInc(paddlejob.Namespace, r.GetFrameworkName()) - commonutil.UpdateJobConditions(&paddlejob.Status, kubeflowv1.JobCreated, corev1.ConditionTrue, commonutil.NewReason(kubeflowv1.PaddleJobKind, commonutil.JobCreatedReason), msg) - return true - } -} diff --git a/pkg/controller.v1/paddlepaddle/paddlepaddle_controller_suite_test.go b/pkg/controller.v1/paddlepaddle/paddlepaddle_controller_suite_test.go deleted file mode 100644 index 5d3505cb71..0000000000 --- a/pkg/controller.v1/paddlepaddle/paddlepaddle_controller_suite_test.go +++ /dev/null @@ -1,127 +0,0 @@ -// Copyright 2022 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package paddle - -import ( - "context" - "crypto/tls" - "fmt" - "net" - "path/filepath" - "testing" - "time" - - kubeflowv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - "github.com/kubeflow/training-operator/pkg/controller.v1/common" - paddlewebhook "github.com/kubeflow/training-operator/pkg/webhooks/paddlepaddle" - - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" - "k8s.io/client-go/kubernetes/scheme" - ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/envtest" - logf "sigs.k8s.io/controller-runtime/pkg/log" - "sigs.k8s.io/controller-runtime/pkg/log/zap" - metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" - "sigs.k8s.io/controller-runtime/pkg/webhook" - "volcano.sh/apis/pkg/apis/scheduling/v1beta1" - //+kubebuilder:scaffold:imports -) - -// These tests use Ginkgo (BDD-style Go testing framework). Refer to -// http://onsi.github.io/ginkgo/ to learn more about Ginkgo. - -var ( - testK8sClient client.Client - testEnv *envtest.Environment - testCtx context.Context - testCancel context.CancelFunc -) - -func TestAPIs(t *testing.T) { - RegisterFailHandler(Fail) - - RunSpecs(t, "Controller Suite") -} - -var _ = BeforeSuite(func() { - logf.SetLogger(zap.New(zap.WriteTo(GinkgoWriter), zap.UseDevMode(true))) - - testCtx, testCancel = context.WithCancel(context.TODO()) - - By("bootstrapping test environment") - testEnv = &envtest.Environment{ - CRDDirectoryPaths: []string{filepath.Join("..", "..", "..", "manifests", "base", "crds")}, - ErrorIfCRDPathMissing: true, - WebhookInstallOptions: envtest.WebhookInstallOptions{ - Paths: []string{filepath.Join("..", "..", "..", "manifests", "base", "webhook", "manifests.yaml")}, - }, - } - - cfg, err := testEnv.Start() - Expect(err).NotTo(HaveOccurred()) - Expect(cfg).NotTo(BeNil()) - - err = v1beta1.AddToScheme(scheme.Scheme) - Expect(err).NotTo(HaveOccurred()) - err = kubeflowv1.AddToScheme(scheme.Scheme) - Expect(err).NotTo(HaveOccurred()) - - //+kubebuilder:scaffold:scheme - - testK8sClient, err = client.New(cfg, client.Options{Scheme: scheme.Scheme}) - Expect(err).NotTo(HaveOccurred()) - Expect(testK8sClient).NotTo(BeNil()) - - mgr, err := ctrl.NewManager(cfg, ctrl.Options{ - Metrics: metricsserver.Options{ - BindAddress: "0", - }, - WebhookServer: webhook.NewServer( - webhook.Options{ - Host: testEnv.WebhookInstallOptions.LocalServingHost, - Port: testEnv.WebhookInstallOptions.LocalServingPort, - CertDir: testEnv.WebhookInstallOptions.LocalServingCertDir, - }), - }) - Expect(err).NotTo(HaveOccurred()) - - gangSchedulingSetupFunc := common.GenNonGangSchedulerSetupFunc() - r := NewReconciler(mgr, gangSchedulingSetupFunc) - Expect(r.SetupWithManager(mgr, 1)).NotTo(HaveOccurred()) - Expect(paddlewebhook.SetupWebhook(mgr)).NotTo(HaveOccurred()) - - go func() { - defer GinkgoRecover() - err = mgr.Start(testCtx) - Expect(err).ToNot(HaveOccurred(), "failed to run manager") - }() - - dialer := &net.Dialer{Timeout: time.Second} - addrPort := fmt.Sprintf("%s:%d", testEnv.WebhookInstallOptions.LocalServingHost, testEnv.WebhookInstallOptions.LocalServingPort) - Eventually(func(g Gomega) { - conn, err := tls.DialWithDialer(dialer, "tcp", addrPort, &tls.Config{InsecureSkipVerify: true}) - g.Expect(err).NotTo(HaveOccurred()) - g.Expect(conn.Close()).NotTo(HaveOccurred()) - }).Should(Succeed()) -}) - -var _ = AfterSuite(func() { - By("tearing down the test environment") - testCancel() - err := testEnv.Stop() - Expect(err).NotTo(HaveOccurred()) -}) diff --git a/pkg/controller.v1/paddlepaddle/paddlepaddle_controller_test.go b/pkg/controller.v1/paddlepaddle/paddlepaddle_controller_test.go deleted file mode 100644 index 72a851642a..0000000000 --- a/pkg/controller.v1/paddlepaddle/paddlepaddle_controller_test.go +++ /dev/null @@ -1,514 +0,0 @@ -// Copyright 2022 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package paddle - -import ( - "context" - "fmt" - - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" - corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/errors" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/types" - "k8s.io/utils/ptr" - "sigs.k8s.io/controller-runtime/pkg/client" - - kubeflowv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - commonutil "github.com/kubeflow/training-operator/pkg/util" - "github.com/kubeflow/training-operator/pkg/util/testutil" -) - -var _ = Describe("PaddleJob controller", func() { - // Define utility constants for object names and testing timeouts/durations and intervals. - const ( - expectedPort = int32(8080) - ) - Context("When creating the PaddleJob", func() { - const name = "test-job" - var ( - ctx = context.Background() - ns *corev1.Namespace - job *kubeflowv1.PaddleJob - jobKey types.NamespacedName - masterKey types.NamespacedName - worker0Key types.NamespacedName - ) - BeforeEach(func() { - ns = &corev1.Namespace{ - ObjectMeta: metav1.ObjectMeta{ - GenerateName: "paddle-test-", - }, - } - Expect(testK8sClient.Create(ctx, ns)).Should(Succeed()) - - job = newPaddleJobForTest(name, ns.Name) - jobKey = client.ObjectKeyFromObject(job) - masterKey = types.NamespacedName{ - Name: fmt.Sprintf("%s-master-0", name), - Namespace: ns.Name, - } - worker0Key = types.NamespacedName{ - Name: fmt.Sprintf("%s-worker-0", name), - Namespace: ns.Name, - } - job.Spec.PaddleReplicaSpecs = map[kubeflowv1.ReplicaType]*kubeflowv1.ReplicaSpec{ - kubeflowv1.PaddleJobReplicaTypeMaster: { - Replicas: ptr.To[int32](1), - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Image: "test-image", - Name: kubeflowv1.PaddleJobDefaultContainerName, - Ports: []corev1.ContainerPort{ - { - Name: kubeflowv1.PaddleJobDefaultPortName, - ContainerPort: expectedPort, - Protocol: corev1.ProtocolTCP, - }, - }, - }, - }, - }, - }, - }, - kubeflowv1.PaddleJobReplicaTypeWorker: { - Replicas: ptr.To[int32](2), - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Image: "test-image", - Name: kubeflowv1.PaddleJobDefaultContainerName, - Ports: []corev1.ContainerPort{ - { - Name: kubeflowv1.PaddleJobDefaultPortName, - ContainerPort: expectedPort, - Protocol: corev1.ProtocolTCP, - }, - }, - }, - }, - }, - }, - }, - } - }) - AfterEach(func() { - Expect(testK8sClient.Delete(ctx, job)).Should(Succeed()) - Expect(testK8sClient.Delete(ctx, ns)).Should(Succeed()) - }) - It("Should get the corresponding resources successfully", func() { - By("By creating a new PaddleJob") - Expect(testK8sClient.Create(ctx, job)).Should(Succeed()) - - created := &kubeflowv1.PaddleJob{} - - // We'll need to retry getting this newly created PaddleJob, given that creation may not immediately happen. - Eventually(func() bool { - err := testK8sClient.Get(ctx, jobKey, created) - return err == nil - }, testutil.Timeout, testutil.Interval).Should(BeTrue()) - - masterPod := &corev1.Pod{} - Eventually(func() bool { - err := testK8sClient.Get(ctx, masterKey, masterPod) - return err == nil - }, testutil.Timeout, testutil.Interval).Should(BeTrue()) - - masterSvc := &corev1.Service{} - Eventually(func() bool { - err := testK8sClient.Get(ctx, masterKey, masterSvc) - return err == nil - }, testutil.Timeout, testutil.Interval).Should(BeTrue()) - - // Check the pod port. - Expect(masterPod.Spec.Containers[0].Ports).To(ContainElement(corev1.ContainerPort{ - Name: kubeflowv1.PaddleJobDefaultPortName, - ContainerPort: expectedPort, - Protocol: corev1.ProtocolTCP})) - // Check env variable - Expect(masterPod.Spec.Containers[0].Env).To(ContainElements(corev1.EnvVar{ - Name: EnvMasterEndpoint, - Value: fmt.Sprintf("$(POD_IP_DUMMY):%d", expectedPort), - })) - // Check service port. - Expect(masterSvc.Spec.Ports[0].Port).To(Equal(expectedPort)) - // Check owner reference. - trueVal := true - Expect(masterPod.OwnerReferences).To(ContainElement(metav1.OwnerReference{ - APIVersion: kubeflowv1.SchemeGroupVersion.String(), - Kind: kubeflowv1.PaddleJobKind, - Name: name, - UID: created.UID, - Controller: &trueVal, - BlockOwnerDeletion: &trueVal, - })) - Expect(masterSvc.OwnerReferences).To(ContainElement(metav1.OwnerReference{ - APIVersion: kubeflowv1.SchemeGroupVersion.String(), - Kind: kubeflowv1.PaddleJobKind, - Name: name, - UID: created.UID, - Controller: &trueVal, - BlockOwnerDeletion: &trueVal, - })) - - // Test job status. - masterPod.Status.Phase = corev1.PodSucceeded - masterPod.ResourceVersion = "" - Expect(testK8sClient.Status().Update(ctx, masterPod)).Should(Succeed()) - Eventually(func() bool { - err := testK8sClient.Get(ctx, jobKey, created) - if err != nil { - return false - } - return created.Status.ReplicaStatuses != nil && created.Status. - ReplicaStatuses[kubeflowv1.PaddleJobReplicaTypeMaster].Succeeded == 1 - }, testutil.Timeout, testutil.Interval).Should(BeTrue()) - // Check if the job is succeeded. - cond := getCondition(created.Status, kubeflowv1.JobSucceeded) - Expect(cond.Status).To(Equal(corev1.ConditionTrue)) - }) - It("Shouldn't create resources if PaddleJob is suspended", func() { - By("By creating a new PaddleJob with suspend=true") - job.Spec.RunPolicy.Suspend = ptr.To(true) - job.Spec.PaddleReplicaSpecs[kubeflowv1.PaddleJobReplicaTypeWorker].Replicas = ptr.To[int32](1) - Expect(testK8sClient.Create(ctx, job)).Should(Succeed()) - - created := &kubeflowv1.PaddleJob{} - masterPod := &corev1.Pod{} - workerPod := &corev1.Pod{} - masterSvc := &corev1.Service{} - workerSvc := &corev1.Service{} - - By("Checking created PaddleJob") - Eventually(func() bool { - err := testK8sClient.Get(ctx, jobKey, created) - return err == nil - }, testutil.Timeout, testutil.Interval).Should(BeTrue()) - By("Checking created PaddleJob has a nil startTime") - Consistently(func() *metav1.Time { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - return created.Status.StartTime - }, testutil.ConsistentDuration, testutil.Interval).Should(BeNil()) - - By("Checking if the pods and services aren't created") - Consistently(func() bool { - errMasterPod := testK8sClient.Get(ctx, masterKey, masterPod) - errWorkerPod := testK8sClient.Get(ctx, worker0Key, workerPod) - errMasterSvc := testK8sClient.Get(ctx, masterKey, masterSvc) - errWorkerSvc := testK8sClient.Get(ctx, worker0Key, workerSvc) - return errors.IsNotFound(errMasterPod) && errors.IsNotFound(errWorkerPod) && - errors.IsNotFound(errMasterSvc) && errors.IsNotFound(errWorkerSvc) - }, testutil.ConsistentDuration, testutil.Interval).Should(BeTrue()) - - By("Checking if the PaddleJob has suspended condition") - Eventually(func() []kubeflowv1.JobCondition { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - return created.Status.Conditions - }, testutil.ConsistentDuration, testutil.Interval).Should(BeComparableTo([]kubeflowv1.JobCondition{ - { - Type: kubeflowv1.JobCreated, - Status: corev1.ConditionTrue, - Reason: commonutil.NewReason(kubeflowv1.PaddleJobKind, commonutil.JobCreatedReason), - Message: fmt.Sprintf("PaddleJob %s is created.", name), - }, - { - Type: kubeflowv1.JobSuspended, - Status: corev1.ConditionTrue, - Reason: commonutil.NewReason(kubeflowv1.PaddleJobKind, commonutil.JobSuspendedReason), - Message: fmt.Sprintf("PaddleJob %s is suspended.", name), - }, - }, testutil.IgnoreJobConditionsTimes)) - }) - - It("Should delete resources after PaddleJob is suspended; Should resume PaddleJob after PaddleJob is unsuspended", func() { - By("By creating a new PaddleJob") - job.Spec.PaddleReplicaSpecs[kubeflowv1.PaddleJobReplicaTypeWorker].Replicas = ptr.To[int32](1) - Expect(testK8sClient.Create(ctx, job)).Should(Succeed()) - - created := &kubeflowv1.PaddleJob{} - masterPod := &corev1.Pod{} - workerPod := &corev1.Pod{} - masterSvc := &corev1.Service{} - workerSvc := &corev1.Service{} - - // We'll need to retry getting this newly created PaddleJob, given that creation may not immediately happen. - By("Checking created PaddleJob") - Eventually(func() bool { - err := testK8sClient.Get(ctx, jobKey, created) - return err == nil - }, testutil.Timeout, testutil.Interval).Should(BeTrue()) - - var startTimeBeforeSuspended *metav1.Time - Eventually(func() *metav1.Time { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - startTimeBeforeSuspended = created.Status.StartTime - return startTimeBeforeSuspended - }, testutil.Timeout, testutil.Interval).ShouldNot(BeNil()) - - By("Checking the created pods and services") - Eventually(func() bool { - errMaster := testK8sClient.Get(ctx, masterKey, masterPod) - errWorker := testK8sClient.Get(ctx, worker0Key, workerPod) - return errMaster == nil && errWorker == nil - }, testutil.Timeout, testutil.Interval).Should(BeTrue()) - Eventually(func() bool { - errMaster := testK8sClient.Get(ctx, masterKey, masterSvc) - errWorker := testK8sClient.Get(ctx, worker0Key, workerSvc) - return errMaster == nil && errWorker == nil - }, testutil.Timeout, testutil.Interval).Should(BeTrue()) - - By("Updating the pod's phase with Running") - Eventually(func() error { - Expect(testK8sClient.Get(ctx, masterKey, masterPod)).Should(Succeed()) - masterPod.Status.Phase = corev1.PodRunning - return testK8sClient.Status().Update(ctx, masterPod) - }, testutil.Timeout, testutil.Interval).Should(Succeed()) - Eventually(func() error { - Expect(testK8sClient.Get(ctx, worker0Key, workerPod)).Should(Succeed()) - workerPod.Status.Phase = corev1.PodRunning - return testK8sClient.Status().Update(ctx, workerPod) - }, testutil.Timeout, testutil.Interval).Should(Succeed()) - - By("Checking the PaddleJob's condition") - Eventually(func() []kubeflowv1.JobCondition { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - return created.Status.Conditions - }, testutil.Timeout, testutil.Interval).Should(BeComparableTo([]kubeflowv1.JobCondition{ - { - Type: kubeflowv1.JobCreated, - Status: corev1.ConditionTrue, - Reason: commonutil.NewReason(kubeflowv1.PaddleJobKind, commonutil.JobCreatedReason), - Message: fmt.Sprintf("PaddleJob %s is created.", name), - }, - { - Type: kubeflowv1.JobRunning, - Status: corev1.ConditionTrue, - Reason: commonutil.NewReason(kubeflowv1.PaddleJobKind, commonutil.JobRunningReason), - Message: fmt.Sprintf("PaddleJob %s is running.", name), - }, - }, testutil.IgnoreJobConditionsTimes)) - - By("Updating the PaddleJob with suspend=true") - Eventually(func() error { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - created.Spec.RunPolicy.Suspend = ptr.To(true) - return testK8sClient.Update(ctx, created) - }, testutil.Timeout, testutil.Interval).Should(Succeed()) - - By("Checking if the pods and services are removed") - Eventually(func() bool { - errMaster := testK8sClient.Get(ctx, masterKey, masterPod) - errWorker := testK8sClient.Get(ctx, worker0Key, workerPod) - return errors.IsNotFound(errMaster) && errors.IsNotFound(errWorker) - }, testutil.Timeout, testutil.Interval).Should(BeTrue()) - Eventually(func() bool { - errMaster := testK8sClient.Get(ctx, masterKey, masterSvc) - errWorker := testK8sClient.Get(ctx, worker0Key, workerSvc) - return errors.IsNotFound(errMaster) && errors.IsNotFound(errWorker) - }, testutil.Timeout, testutil.Interval).Should(BeTrue()) - Consistently(func() bool { - errMasterPod := testK8sClient.Get(ctx, masterKey, masterPod) - errWorkerPod := testK8sClient.Get(ctx, worker0Key, workerPod) - errMasterSvc := testK8sClient.Get(ctx, masterKey, masterSvc) - errWorkerSvc := testK8sClient.Get(ctx, worker0Key, workerSvc) - return errors.IsNotFound(errMasterPod) && errors.IsNotFound(errWorkerPod) && - errors.IsNotFound(errMasterSvc) && errors.IsNotFound(errWorkerSvc) - }, testutil.ConsistentDuration, testutil.Interval).Should(BeTrue()) - - By("Checking if the PaddleJob has a suspended condition") - Eventually(func() bool { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - return created.Status.ReplicaStatuses[kubeflowv1.PaddleJobReplicaTypeMaster].Active == 0 && - created.Status.ReplicaStatuses[kubeflowv1.PaddleJobReplicaTypeWorker].Active == 0 && - created.Status.StartTime.Equal(startTimeBeforeSuspended) - }, testutil.Timeout, testutil.Interval).Should(BeTrue()) - Consistently(func() bool { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - return created.Status.ReplicaStatuses[kubeflowv1.PaddleJobReplicaTypeMaster].Active == 0 && - created.Status.ReplicaStatuses[kubeflowv1.PaddleJobReplicaTypeWorker].Active == 0 && - created.Status.StartTime.Equal(startTimeBeforeSuspended) - }, testutil.ConsistentDuration, testutil.Interval).Should(BeTrue()) - Expect(created.Status.Conditions).Should(BeComparableTo([]kubeflowv1.JobCondition{ - { - Type: kubeflowv1.JobCreated, - Status: corev1.ConditionTrue, - Reason: commonutil.NewReason(kubeflowv1.PaddleJobKind, commonutil.JobCreatedReason), - Message: fmt.Sprintf("PaddleJob %s is created.", name), - }, - { - Type: kubeflowv1.JobRunning, - Status: corev1.ConditionFalse, - Reason: commonutil.NewReason(kubeflowv1.PaddleJobKind, commonutil.JobSuspendedReason), - Message: fmt.Sprintf("PaddleJob %s is suspended.", name), - }, - { - Type: kubeflowv1.JobSuspended, - Reason: commonutil.NewReason(kubeflowv1.PaddleJobKind, commonutil.JobSuspendedReason), - Message: fmt.Sprintf("PaddleJob %s is suspended.", name), - Status: corev1.ConditionTrue, - }, - }, testutil.IgnoreJobConditionsTimes)) - - By("Unsuspending the PaddleJob") - Eventually(func() error { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - created.Spec.RunPolicy.Suspend = ptr.To(false) - return testK8sClient.Update(ctx, created) - }, testutil.Timeout, testutil.Interval).Should(Succeed()) - Eventually(func() *metav1.Time { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - return created.Status.StartTime - }, testutil.Timeout, testutil.Interval).ShouldNot(BeNil()) - - By("Check if the pods and services are created") - Eventually(func() error { - return testK8sClient.Get(ctx, masterKey, masterPod) - }, testutil.Timeout, testutil.Interval).Should(BeNil()) - Eventually(func() error { - return testK8sClient.Get(ctx, worker0Key, workerPod) - }, testutil.Timeout, testutil.Interval).Should(BeNil()) - Eventually(func() error { - return testK8sClient.Get(ctx, masterKey, masterSvc) - }, testutil.Timeout, testutil.Interval).Should(BeNil()) - Eventually(func() error { - return testK8sClient.Get(ctx, worker0Key, workerSvc) - }, testutil.Timeout, testutil.Interval).Should(BeNil()) - - By("Updating Pod's condition with running") - Eventually(func() error { - Expect(testK8sClient.Get(ctx, masterKey, masterPod)).Should(Succeed()) - masterPod.Status.Phase = corev1.PodRunning - return testK8sClient.Status().Update(ctx, masterPod) - }, testutil.Timeout, testutil.Interval).Should(Succeed()) - Eventually(func() error { - Expect(testK8sClient.Get(ctx, worker0Key, workerPod)).Should(Succeed()) - workerPod.Status.Phase = corev1.PodRunning - return testK8sClient.Status().Update(ctx, workerPod) - }, testutil.Timeout, testutil.Interval).Should(Succeed()) - - By("Checking if the PaddleJob has resumed conditions") - Eventually(func() []kubeflowv1.JobCondition { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - return created.Status.Conditions - }, testutil.Timeout, testutil.Interval).Should(BeComparableTo([]kubeflowv1.JobCondition{ - { - Type: kubeflowv1.JobCreated, - Status: corev1.ConditionTrue, - Reason: commonutil.NewReason(kubeflowv1.PaddleJobKind, commonutil.JobCreatedReason), - Message: fmt.Sprintf("PaddleJob %s is created.", name), - }, - { - Type: kubeflowv1.JobSuspended, - Reason: commonutil.NewReason(kubeflowv1.PaddleJobKind, commonutil.JobResumedReason), - Message: fmt.Sprintf("PaddleJob %s is resumed.", name), - Status: corev1.ConditionFalse, - }, - { - Type: kubeflowv1.JobRunning, - Status: corev1.ConditionTrue, - Reason: commonutil.NewReason(kubeflowv1.PaddleJobKind, commonutil.JobRunningReason), - Message: fmt.Sprintf("PaddleJob %s is running.", name), - }, - }, testutil.IgnoreJobConditionsTimes)) - - By("Checking if the startTime is updated") - Expect(created.Status.StartTime).ShouldNot(Equal(startTimeBeforeSuspended)) - }) - - It("Should not reconcile a job while managed by external controller", func() { - By("Creating a PaddleJob managed by external controller") - job.Spec.RunPolicy = kubeflowv1.RunPolicy{ - ManagedBy: ptr.To(kubeflowv1.MultiKueueController), - } - job.Spec.RunPolicy.Suspend = ptr.To(true) - Expect(testK8sClient.Create(ctx, job)).Should(Succeed()) - - created := &kubeflowv1.PaddleJob{} - By("Checking created PaddleJob") - Eventually(func() bool { - err := testK8sClient.Get(ctx, jobKey, created) - return err == nil - }, testutil.Timeout, testutil.Interval).Should(BeTrue()) - - By("Checking created PaddleJob has a nil startTime") - Consistently(func() *metav1.Time { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - return created.Status.StartTime - }, testutil.ConsistentDuration, testutil.Interval).Should(BeNil()) - - By("Checking if the pods and services aren't created") - Consistently(func() bool { - masterPod := &corev1.Pod{} - workerPod := &corev1.Pod{} - masterSvc := &corev1.Service{} - workerSvc := &corev1.Service{} - errMasterPod := testK8sClient.Get(ctx, masterKey, masterPod) - errWorkerPod := testK8sClient.Get(ctx, worker0Key, workerPod) - errMasterSvc := testK8sClient.Get(ctx, masterKey, masterSvc) - errWorkerSvc := testK8sClient.Get(ctx, worker0Key, workerSvc) - return errors.IsNotFound(errMasterPod) && errors.IsNotFound(errWorkerPod) && - errors.IsNotFound(errMasterSvc) && errors.IsNotFound(errWorkerSvc) - }, testutil.ConsistentDuration, testutil.Interval).Should(BeTrue(), "pods and services should be created by external controller (here not existent)") - - By("Checking if the PaddleJob status was not updated") - Eventually(func() []kubeflowv1.JobCondition { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - return created.Status.Conditions - }, testutil.Timeout, testutil.Interval).Should(BeComparableTo([]kubeflowv1.JobCondition(nil))) - - By("Unsuspending the PaddleJob") - Eventually(func() error { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - created.Spec.RunPolicy.Suspend = ptr.To(false) - return testK8sClient.Update(ctx, created) - }, testutil.Timeout, testutil.Interval).Should(Succeed()) - - By("Checking created PaddleJob still has a nil startTime") - Consistently(func() *metav1.Time { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - return created.Status.StartTime - }, testutil.ConsistentDuration, testutil.Interval).Should(BeNil()) - - By("Checking if the PaddleJob status was not updated, even after unsuspending") - Eventually(func() []kubeflowv1.JobCondition { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - return created.Status.Conditions - }, testutil.Timeout, testutil.Interval).Should(BeComparableTo([]kubeflowv1.JobCondition(nil))) - }) - }) -}) - -func newPaddleJobForTest(name, namespace string) *kubeflowv1.PaddleJob { - return &kubeflowv1.PaddleJob{ - ObjectMeta: metav1.ObjectMeta{ - Name: name, - Namespace: namespace, - }, - } -} - -// getCondition returns the condition with the provided type. -func getCondition(status kubeflowv1.JobStatus, condType kubeflowv1.JobConditionType) *kubeflowv1.JobCondition { - for _, condition := range status.Conditions { - if condition.Type == condType { - return &condition - } - } - return nil -} diff --git a/pkg/controller.v1/pytorch/elastic.go b/pkg/controller.v1/pytorch/elastic.go deleted file mode 100644 index ee0a4f6308..0000000000 --- a/pkg/controller.v1/pytorch/elastic.go +++ /dev/null @@ -1,197 +0,0 @@ -// Copyright 2021 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License - -package pytorch - -import ( - "fmt" - "strconv" - "sync" - - corev1 "k8s.io/api/core/v1" - - kubeflowv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" -) - -const ( - // Rendezvous related arguments - - // EnvRDZVBackend is the environment variable name for the rdzv backend. - EnvRDZVBackend = "PET_RDZV_BACKEND" - // EnvRDZVID is the environment variable name for the rdzv id. - EnvRDZVID = "PET_RDZV_ID" - // ENVRDZVConf is the environment variable name for the rdzv conf. - EnvRDZVConf = "PET_RDZV_CONF" - // EnvRDZVEndpoint is the environment variable name for the rdzv endpoint. - EnvRDZVEndpoint = "PET_RDZV_ENDPOINT" - // EnvRDZVStandalone is the environment variable name for the standalone mode. - EnvStandalone = "PET_STANDALONE" - - // User-code launch related arguments. - - // EnvMaxRestarts is the environment variable name for the maximum number of worker group restarts before failing. - EnvMaxRestarts = "PET_MAX_RESTARTS" - // EnvMonitorInterval is the environment variable name for the interval, in seconds, to monitor the state of workers. - EnvMonitorInterval = "PET_MONITOR_INTERVAL" - // EnvStartMethod is the environment variable name for the multiprocessing start method to use when creating workers, which could be fork, spawn and forkserver. - EnvStartMethod = "PET_START_METHOD" - // EnvNProcPerNode is the environment variable name for the number of processes per node. - EnvNProcPerNode = "PET_NPROC_PER_NODE" -) - -var ( - elasticGenerator EnvVarGenerator - onceElastic sync.Once -) - -// ElasticEnvVarGenerator is the environment variable generator for Elastic related arguments. -type ElasticEnvVarGenerator struct{} - -func GetElasticEnvVarGenerator() EnvVarGenerator { - onceElastic.Do(func() { - elasticGenerator = &ElasticEnvVarGenerator{} - }) - return elasticGenerator -} - -func (e ElasticEnvVarGenerator) Generate( - job *kubeflowv1.PyTorchJob) ([]corev1.EnvVar, error) { - var envVars []corev1.EnvVar - - elasticPolicy := job.Spec.ElasticPolicy - if elasticPolicy == nil { - // Return empty env vars. - return nil, nil - } - - // Generate RDZV_ENDPOINT. - if envVar, err := e.generateEnvRDZVEndpoint(job); err != nil { - return nil, err - } else { - envVars = append(envVars, *envVar) - } - // Generate RDZV_BACKEND. - envVars = append(envVars, e.generateEnvBackend(elasticPolicy)) - // Generate NNODES. - if envVar, err := e.generateEnvNnodes(job); err != nil { - return nil, err - } else { - envVars = append(envVars, *envVar) - } - - if elasticPolicy.MaxRestarts != nil { - envVars = append(envVars, corev1.EnvVar{ - Name: EnvMaxRestarts, - Value: strconv.Itoa(int(*elasticPolicy.MaxRestarts)), - }) - } - if elasticPolicy.NProcPerNode != nil { - envVars = append(envVars, corev1.EnvVar{ - Name: EnvNProcPerNode, - Value: strconv.Itoa(int(*elasticPolicy.NProcPerNode)), - }) - } - if elasticPolicy.RDZVID != nil { - envVars = append(envVars, corev1.EnvVar{ - Name: EnvRDZVID, - Value: *elasticPolicy.RDZVID, - }) - } - if envVar := e.generateEnvRDZVConf(elasticPolicy); envVar != nil { - envVars = append(envVars, *envVar) - } - if elasticPolicy.Standalone != nil && *elasticPolicy.Standalone { - envVars = append(envVars, corev1.EnvVar{ - Name: EnvStandalone, - Value: "", - }) - } - - return envVars, nil -} - -func (e ElasticEnvVarGenerator) generateEnvNnodes(job *kubeflowv1.PyTorchJob) (*corev1.EnvVar, error) { - // Return worker.replicas if there is no max and min replicas specified. - if job.Spec.ElasticPolicy.MinReplicas == nil && - job.Spec.ElasticPolicy.MaxReplicas == nil { - if job.Spec.PyTorchReplicaSpecs[kubeflowv1.PyTorchJobReplicaTypeWorker] == nil { - return nil, fmt.Errorf("cannot find the worker spec") - } - return &corev1.EnvVar{ - Name: EnvNnodes, - Value: strconv.Itoa( - int(*job.Spec.PyTorchReplicaSpecs[kubeflowv1.PyTorchJobReplicaTypeWorker]. - Replicas)), - }, nil - } - - return &corev1.EnvVar{ - Name: EnvNnodes, - Value: fmt.Sprintf("%d:%d", - *job.Spec.ElasticPolicy.MinReplicas, *job.Spec.ElasticPolicy.MaxReplicas), - }, nil -} - -func (e ElasticEnvVarGenerator) generateEnvRDZVEndpoint(job *kubeflowv1.PyTorchJob) (*corev1.EnvVar, error) { - var err error - host := "" - if job.Spec.ElasticPolicy.RDZVHost == nil { - host = fmt.Sprintf("%s-worker-0", job.Name) - } else { - host = *job.Spec.ElasticPolicy.RDZVHost - } - - var port int32 - if job.Spec.ElasticPolicy.RDZVPort == nil { - // Generate RDZV_Endpoint. - port, err = getPortFromPyTorchJob(job, kubeflowv1.PyTorchJobReplicaTypeWorker) - if err != nil { - return nil, err - } - } else { - port = *job.Spec.ElasticPolicy.RDZVPort - } - return &corev1.EnvVar{ - Name: EnvRDZVEndpoint, - Value: fmt.Sprintf("%s:%d", host, port), - }, nil -} - -func (e ElasticEnvVarGenerator) generateEnvRDZVConf(elasticPolicy *kubeflowv1.ElasticPolicy) *corev1.EnvVar { - if elasticPolicy.RDZVConf == nil { - return nil - } - val := "" - for _, conf := range elasticPolicy.RDZVConf { - val += fmt.Sprintf("%s=%s,", conf.Key, conf.Value) - } - return &corev1.EnvVar{ - Name: EnvRDZVConf, - // Remove the last comma. - Value: val[:len(val)-1], - } -} - -func (e ElasticEnvVarGenerator) generateEnvBackend(elasticPolicy *kubeflowv1.ElasticPolicy) corev1.EnvVar { - if elasticPolicy.RDZVBackend != nil { - return corev1.EnvVar{ - Name: EnvRDZVBackend, - Value: string(*elasticPolicy.RDZVBackend), - } - } - return corev1.EnvVar{ - Name: EnvRDZVBackend, - Value: string(kubeflowv1.BackendC10D), - } -} diff --git a/pkg/controller.v1/pytorch/elastic_test.go b/pkg/controller.v1/pytorch/elastic_test.go deleted file mode 100644 index 390376e316..0000000000 --- a/pkg/controller.v1/pytorch/elastic_test.go +++ /dev/null @@ -1,127 +0,0 @@ -// Copyright 2021 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License - -package pytorch - -import ( - "testing" - - "github.com/onsi/ginkgo/v2" - "github.com/onsi/gomega" - corev1 "k8s.io/api/core/v1" - "k8s.io/utils/ptr" - - kubeflowv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" -) - -func TestElasticGenerate(t *testing.T) { - gomega.RegisterFailHandler(ginkgo.Fail) - defer ginkgo.GinkgoRecover() - - backendC10D := kubeflowv1.BackendC10D - - tests := []struct { - name string - job *kubeflowv1.PyTorchJob - expectedErr error - expected []corev1.EnvVar - }{ - { - name: "Without ElasticPolicy", - job: &kubeflowv1.PyTorchJob{ - Spec: kubeflowv1.PyTorchJobSpec{ - PyTorchReplicaSpecs: map[kubeflowv1.ReplicaType]*kubeflowv1.ReplicaSpec{ - kubeflowv1.PyTorchJobReplicaTypeWorker: { - Replicas: ptr.To[int32](1), - }, - }, - }, - }, - expectedErr: nil, - expected: nil, - }, - { - name: "With ElasticPolicy", - job: &kubeflowv1.PyTorchJob{ - Spec: kubeflowv1.PyTorchJobSpec{ - ElasticPolicy: &kubeflowv1.ElasticPolicy{ - MinReplicas: ptr.To[int32](1), - MaxReplicas: ptr.To[int32](3), - RDZVBackend: &backendC10D, - RDZVPort: ptr.To[int32](1234), - RDZVHost: ptr.To("localhost"), - RDZVID: ptr.To("rdzv-id"), - RDZVConf: []kubeflowv1.RDZVConf{ - { - Key: "rdzv-conf-name", - Value: "rdzv-conf-value", - }, - { - Key: "rdzv-conf-name-1", - Value: "rdzv-conf-value-1", - }, - }, - MaxRestarts: ptr.To[int32](3), - }, - PyTorchReplicaSpecs: map[kubeflowv1.ReplicaType]*kubeflowv1.ReplicaSpec{ - kubeflowv1.PyTorchJobReplicaTypeWorker: { - Replicas: ptr.To[int32](1), - }, - }, - }, - }, - expectedErr: nil, - expected: []corev1.EnvVar{ - { - Name: EnvMaxRestarts, - Value: "3", - }, - { - Name: EnvRDZVBackend, - Value: "c10d", - }, - { - Name: EnvRDZVEndpoint, - Value: "localhost:1234", - }, - { - Name: EnvRDZVID, - Value: "rdzv-id", - }, - { - Name: EnvRDZVConf, - Value: "rdzv-conf-name=rdzv-conf-value,rdzv-conf-name-1=rdzv-conf-value-1", - }, - { - Name: EnvNnodes, - Value: "1:3", - }, - }, - }, - } - - for _, test := range tests { - actual, err := GetElasticEnvVarGenerator().Generate(test.job) - if test.expectedErr == nil { - gomega.Expect(err).To(gomega.BeNil()) - } else { - gomega.Expect(err).To(gomega.Equal(test.expectedErr)) - } - if test.expected == nil { - gomega.Expect(actual).To(gomega.BeNil()) - } else { - gomega.Expect(actual).To(gomega.ConsistOf(test.expected)) - } - } -} diff --git a/pkg/controller.v1/pytorch/envvar.go b/pkg/controller.v1/pytorch/envvar.go deleted file mode 100644 index 16e3fe270b..0000000000 --- a/pkg/controller.v1/pytorch/envvar.go +++ /dev/null @@ -1,168 +0,0 @@ -// Copyright 2021 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License - -package pytorch - -import ( - "fmt" - "strconv" - "strings" - - corev1 "k8s.io/api/core/v1" - - kubeflowv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" -) - -const ( - // Worker/node size related arguments. - - // EnvNprocPerNode is the environment variable name for the number of processes per node. - EnvNprocPerNode = "PET_NPROC_PER_NODE" - // EnvNnodes is the environment variable name for the number of nodes. - EnvNnodes = "PET_NNODES" - // EnvNodeRank is the environment variable name for the rank of nodes. - EnvNodeRank = "PET_NODE_RANK" -) - -// EnvVarGenerator is the environment variable generator interface. -type EnvVarGenerator interface { - Generate(job *kubeflowv1.PyTorchJob) ([]corev1.EnvVar, error) -} - -func setPodEnv(obj interface{}, podTemplateSpec *corev1.PodTemplateSpec, rtype, index string) error { - pytorchjob, ok := obj.(*kubeflowv1.PyTorchJob) - if !ok { - return fmt.Errorf("%+v is not a type of PyTorchJob", obj) - } - - for i := range podTemplateSpec.Spec.Containers { - // Initialize the environment variables. - if len(podTemplateSpec.Spec.Containers[i].Env) == 0 { - podTemplateSpec.Spec.Containers[i].Env = make([]corev1.EnvVar, 0) - } - // Set PYTHONUNBUFFERED to true, to disable output buffering. - // Ref https://stackoverflow.com/questions/59812009/what-is-the-use-of-pythonunbuffered-in-docker-file. - podTemplateSpec.Spec.Containers[i].Env = append( - podTemplateSpec.Spec.Containers[i].Env, corev1.EnvVar{ - Name: "PYTHONUNBUFFERED", - Value: "1", - }) - - totalReplicas := getTotalReplicas(pytorchjob) - nprocPerNode := getNprocPerNodeInt(pytorchjob) - worldSize := int(totalReplicas) * nprocPerNode - - // If the master is not null, then we need to set the MASTER_ADDR and RANK. - if pytorchjob.Spec.PyTorchReplicaSpecs[kubeflowv1.PyTorchJobReplicaTypeMaster] != nil { - envVars, err := GetMasterEnvVarGenerator().Generate(pytorchjob) - if err != nil { - return err - } - // Set master related environment variables. - podTemplateSpec.Spec.Containers[i].Env = append( - podTemplateSpec.Spec.Containers[i].Env, envVars...) - - // Set world size and rank. - rank, err := strconv.Atoi(index) - if err != nil { - return err - } - if rtype == strings.ToLower(string(kubeflowv1.PyTorchJobReplicaTypeWorker)) { - rank = rank + 1 - } - - podTemplateSpec.Spec.Containers[i].Env = append(podTemplateSpec.Spec.Containers[i].Env, corev1.EnvVar{ - Name: "WORLD_SIZE", - Value: strconv.Itoa(worldSize), - }) - podTemplateSpec.Spec.Containers[i].Env = append(podTemplateSpec.Spec.Containers[i].Env, corev1.EnvVar{ - Name: "RANK", - Value: strconv.Itoa(rank), - }) - podTemplateSpec.Spec.Containers[i].Env = append(podTemplateSpec.Spec.Containers[i].Env, corev1.EnvVar{ - Name: EnvNodeRank, - Value: strconv.Itoa(rank), - }) - } - - if pytorchjob.Spec.NprocPerNode != nil { - podTemplateSpec.Spec.Containers[i].Env = append(podTemplateSpec.Spec.Containers[i].Env, corev1.EnvVar{ - Name: EnvNprocPerNode, - Value: *pytorchjob.Spec.NprocPerNode, - }) - } - - // Set the elastic environment variables if the elasticPolicy is not null. - // nnodes is set in range format in elastic mode, e.g. nnodes=1:4 - // otherwise, nnodes is set by int, e.g. nnodes=2 - if pytorchjob.Spec.ElasticPolicy != nil { - envVars, err := GetElasticEnvVarGenerator().Generate(pytorchjob) - if err != nil { - return err - } - // Set elastic related environment variables. - podTemplateSpec.Spec.Containers[i].Env = append( - podTemplateSpec.Spec.Containers[i].Env, envVars...) - } else { - podTemplateSpec.Spec.Containers[i].Env = append( - podTemplateSpec.Spec.Containers[i].Env, corev1.EnvVar{ - Name: EnvNnodes, - Value: strconv.Itoa(int(totalReplicas)), - }) - } - } - - return nil -} - -// getNprocPerNodeInt return the int value of NprocPerNode, return 1 if not int -// When nproc_per_node set to auto, it means the number of process will be determinated -// in the user process phase, in this case, world size env will not be used. -func getNprocPerNodeInt(job *kubeflowv1.PyTorchJob) int { - if job.Spec.NprocPerNode == nil { - return 1 - } - if np, err := strconv.Atoi(*job.Spec.NprocPerNode); err == nil { - return np - } - return 1 -} - -func getTotalReplicas(job *kubeflowv1.PyTorchJob) int32 { - jobReplicas := int32(0) - for _, r := range job.Spec.PyTorchReplicaSpecs { - jobReplicas += *r.Replicas - } - return jobReplicas -} - -func replicaName(jobName string, rtype kubeflowv1.ReplicaType, index int) string { - n := jobName + "-" + strings.ToLower(string(rtype)) + "-" + strconv.Itoa(index) - return strings.Replace(n, "/", "-", -1) -} - -func getPortFromPyTorchJob(job *kubeflowv1.PyTorchJob, rtype kubeflowv1.ReplicaType) (int32, error) { - containers := job.Spec.PyTorchReplicaSpecs[rtype].Template.Spec.Containers - for _, container := range containers { - if container.Name == kubeflowv1.PyTorchJobDefaultContainerName { - ports := container.Ports - for _, port := range ports { - if port.Name == kubeflowv1.PyTorchJobDefaultPortName { - return port.ContainerPort, nil - } - } - } - } - return -1, fmt.Errorf("port not found") -} diff --git a/pkg/controller.v1/pytorch/hpa.go b/pkg/controller.v1/pytorch/hpa.go deleted file mode 100644 index e8846600cc..0000000000 --- a/pkg/controller.v1/pytorch/hpa.go +++ /dev/null @@ -1,102 +0,0 @@ -// Copyright 2021 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License - -package pytorch - -import ( - "context" - - autoscalingv2 "k8s.io/api/autoscaling/v2" - "k8s.io/apimachinery/pkg/api/equality" - "k8s.io/apimachinery/pkg/api/errors" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime" - "k8s.io/klog/v2" - controllerruntime "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/client" - - kubeflowv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - trainutil "github.com/kubeflow/training-operator/pkg/util/train" -) - -func (r *PyTorchJobReconciler) ReconcileHPA(pytorchJob *kubeflowv1.PyTorchJob) error { - logger := r.Log.WithValues(kubeflowv1.PyTorchJobSingular, pytorchJob.Name) - - if pytorchJob.Spec.ElasticPolicy == nil || pytorchJob.Spec.ElasticPolicy.Metrics == nil { - logger.V(1).Info( - "No ElasicPolicy or Metric is specified, skipping HPA reconciling process") - return nil - } - - current := &autoscalingv2.HorizontalPodAutoscaler{} - - // Get the expected HPA. - expected, err := desiredHPA(pytorchJob, r.Scheme) - if err != nil { - return err - } - - err = r.Get(context.TODO(), client.ObjectKeyFromObject(expected), current) - if err != nil { - if errors.IsNotFound(err) { - if trainutil.IsJobSuspended(&pytorchJob.Spec.RunPolicy) { - // If the job is suspended, it's correct behavior that HPA doesn't exist. - return nil - } - // Create the new HPA. - logger.V(1).Info("Creating HPA", "namespace", expected.Namespace, "name", expected.Name) - return r.Create(context.TODO(), expected) - } - return err - } - if trainutil.IsJobSuspended(&pytorchJob.Spec.RunPolicy) { - // Delete the current HPA - logger.V(1).Info("Deleting HPA", "HorizontalPodAutoscaler", klog.KObj(current)) - return r.Delete(context.TODO(), current) - } - - if !equality.Semantic.DeepEqual(expected.Spec, current.Spec) { - logger.V(1).Info("Updating HPA", "namespace", current.Namespace, "name", current.Name) - expected.ResourceVersion = current.ResourceVersion - err = r.Update(context.TODO(), expected) - if err != nil { - return err - } - } - return nil -} - -func desiredHPA(pytorchJob *kubeflowv1.PyTorchJob, scheme *runtime.Scheme) ( - *autoscalingv2.HorizontalPodAutoscaler, error) { - hpa := &autoscalingv2.HorizontalPodAutoscaler{ - ObjectMeta: metav1.ObjectMeta{ - Name: pytorchJob.Name, - Namespace: pytorchJob.Namespace, - }, - Spec: autoscalingv2.HorizontalPodAutoscalerSpec{ - ScaleTargetRef: autoscalingv2.CrossVersionObjectReference{ - Kind: pytorchJob.Kind, - Name: pytorchJob.Name, - APIVersion: pytorchJob.APIVersion, - }, - MinReplicas: pytorchJob.Spec.ElasticPolicy.MinReplicas, - MaxReplicas: *pytorchJob.Spec.ElasticPolicy.MaxReplicas, - Metrics: pytorchJob.Spec.ElasticPolicy.Metrics, - }, - } - if err := controllerruntime.SetControllerReference(pytorchJob, hpa, scheme); err != nil { - return nil, err - } - return hpa, nil -} diff --git a/pkg/controller.v1/pytorch/initcontainer.go b/pkg/controller.v1/pytorch/initcontainer.go deleted file mode 100644 index cab6b81657..0000000000 --- a/pkg/controller.v1/pytorch/initcontainer.go +++ /dev/null @@ -1,136 +0,0 @@ -// Copyright 2021 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License - -package pytorch - -import ( - "bytes" - "fmt" - "html/template" - "os" - "strings" - "sync" - - "github.com/go-logr/logr" - corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/types" - "sigs.k8s.io/yaml" - - kubeflowv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - "github.com/kubeflow/training-operator/pkg/config" -) - -var ( - initContainerTemplate = ` -- name: init-pytorch - image: {{.InitContainerImage}} - imagePullPolicy: IfNotPresent - resources: - limits: - cpu: 100m - memory: 20Mi - requests: - cpu: 50m - memory: 10Mi - command: ['sh', '-c', 'err=1;for i in $(seq {{.MaxTries}}); do if nslookup {{.MasterAddr}}; then err=0 && break; fi;echo waiting for master; sleep 2; done; exit $err']` - onceInitContainer sync.Once - icGenerator *initContainerGenerator -) - -type initContainerGenerator struct { - template string - image string - maxTries int -} - -func getInitContainerGenerator() *initContainerGenerator { - onceInitContainer.Do(func() { - icGenerator = &initContainerGenerator{ - template: getInitContainerTemplateOrDefault(config.Config.PyTorchInitContainerTemplateFile), - image: config.Config.PyTorchInitContainerImage, - maxTries: config.Config.PyTorchInitContainerMaxTries, - } - }) - return icGenerator -} - -func (i *initContainerGenerator) GetInitContainer(masterAddr string) ([]corev1.Container, error) { - var buf bytes.Buffer - tpl, err := template.New("container").Parse(i.template) - if err != nil { - return nil, err - } - if err := tpl.Execute(&buf, struct { - MasterAddr string - InitContainerImage string - MaxTries int - }{ - MasterAddr: masterAddr, - InitContainerImage: i.image, - MaxTries: i.maxTries, - }); err != nil { - return nil, err - } - - var result []corev1.Container - err = yaml.Unmarshal(buf.Bytes(), &result) - if err != nil { - return nil, err - } - - return result, nil -} - -// getInitContainerTemplateOrDefault returns the init container template file if -// it exists, or return initContainerTemplate by default. -func getInitContainerTemplateOrDefault(file string) string { - b, err := os.ReadFile(file) - if err == nil { - return string(b) - } - return initContainerTemplate -} - -func setInitContainer(obj interface{}, podTemplate *corev1.PodTemplateSpec, - rtype, index string, log logr.Logger) error { - pytorchJob, ok := obj.(*kubeflowv1.PyTorchJob) - if !ok { - return fmt.Errorf("%+v is not a type of PyTorchJob", obj) - } - logger := log.WithValues(kubeflowv1.PyTorchJobSingular, types.NamespacedName{ - Namespace: pytorchJob.Namespace, - Name: pytorchJob.Name, - }) - - // There is no need to set init container if no master is specified. - if pytorchJob.Spec.PyTorchReplicaSpecs[kubeflowv1.PyTorchJobReplicaTypeMaster] == nil { - logger.V(1).Info("No master is specified, skip setting init container") - return nil - } - - // Set the init container only if the master is specified and the current - // rtype is worker. - if rtype == strings.ToLower(string(kubeflowv1.PyTorchJobReplicaTypeWorker)) { - g := getInitContainerGenerator() - initContainers, err := g.GetInitContainer(replicaName(pytorchJob.Name, - kubeflowv1.PyTorchJobReplicaTypeMaster, 0)) - if err != nil { - return err - } - podTemplate.Spec.InitContainers = append(podTemplate.Spec.InitContainers, - initContainers...) - - } - return nil -} diff --git a/pkg/controller.v1/pytorch/initcontainer_test.go b/pkg/controller.v1/pytorch/initcontainer_test.go deleted file mode 100644 index 732db75708..0000000000 --- a/pkg/controller.v1/pytorch/initcontainer_test.go +++ /dev/null @@ -1,110 +0,0 @@ -// Copyright 2021 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License - -package pytorch - -import ( - "strings" - "testing" - - "github.com/go-logr/logr" - "github.com/onsi/ginkgo/v2" - "github.com/onsi/gomega" - "k8s.io/utils/ptr" - - kubeflowv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - "github.com/kubeflow/training-operator/pkg/config" -) - -func TestInitContainer(t *testing.T) { - gomega.RegisterFailHandler(ginkgo.Fail) - defer ginkgo.GinkgoRecover() - - config.Config.PyTorchInitContainerImage = config.PyTorchInitContainerImageDefault - config.Config.PyTorchInitContainerTemplateFile = config.PyTorchInitContainerTemplateFileDefault - config.Config.PyTorchInitContainerMaxTries = config.PyTorchInitContainerMaxTriesDefault - - testCases := []struct { - job *kubeflowv1.PyTorchJob - rtype kubeflowv1.ReplicaType - index string - expected int - exepctedErr error - }{ - { - job: &kubeflowv1.PyTorchJob{ - Spec: kubeflowv1.PyTorchJobSpec{ - PyTorchReplicaSpecs: map[kubeflowv1.ReplicaType]*kubeflowv1.ReplicaSpec{ - kubeflowv1.PyTorchJobReplicaTypeWorker: { - Replicas: ptr.To[int32](1), - }, - }, - }, - }, - rtype: kubeflowv1.PyTorchJobReplicaTypeWorker, - index: "0", - expected: 0, - exepctedErr: nil, - }, - { - job: &kubeflowv1.PyTorchJob{ - Spec: kubeflowv1.PyTorchJobSpec{ - PyTorchReplicaSpecs: map[kubeflowv1.ReplicaType]*kubeflowv1.ReplicaSpec{ - kubeflowv1.PyTorchJobReplicaTypeWorker: { - Replicas: ptr.To[int32](1), - }, - kubeflowv1.PyTorchJobReplicaTypeMaster: { - Replicas: ptr.To[int32](1), - }, - }, - }, - }, - rtype: kubeflowv1.PyTorchJobReplicaTypeWorker, - index: "0", - expected: 1, - exepctedErr: nil, - }, - { - job: &kubeflowv1.PyTorchJob{ - Spec: kubeflowv1.PyTorchJobSpec{ - PyTorchReplicaSpecs: map[kubeflowv1.ReplicaType]*kubeflowv1.ReplicaSpec{ - kubeflowv1.PyTorchJobReplicaTypeWorker: { - Replicas: ptr.To[int32](1), - }, - kubeflowv1.PyTorchJobReplicaTypeMaster: { - Replicas: ptr.To[int32](1), - }, - }, - }, - }, - rtype: kubeflowv1.PyTorchJobReplicaTypeMaster, - index: "0", - expected: 0, - exepctedErr: nil, - }, - } - - for _, t := range testCases { - log := logr.Discard() - podTemplateSpec := t.job.Spec.PyTorchReplicaSpecs[t.rtype].Template - err := setInitContainer(t.job, &podTemplateSpec, - strings.ToLower(string(t.rtype)), t.index, log) - if t.exepctedErr == nil { - gomega.Expect(err).To(gomega.BeNil()) - } else { - gomega.Expect(err).To(gomega.Equal(t.exepctedErr)) - } - gomega.Expect(len(podTemplateSpec.Spec.InitContainers)).To(gomega.Equal(t.expected)) - } -} diff --git a/pkg/controller.v1/pytorch/master.go b/pkg/controller.v1/pytorch/master.go deleted file mode 100644 index c46a031cca..0000000000 --- a/pkg/controller.v1/pytorch/master.go +++ /dev/null @@ -1,62 +0,0 @@ -package pytorch - -import ( - "strconv" - "sync" - - corev1 "k8s.io/api/core/v1" - - kubeflowv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" -) - -var ( - masterGenerator EnvVarGenerator - onceMaster sync.Once - EnvMasterPort = "MASTER_PORT" - EnvMasterAddr = "MASTER_ADDR" - - PETMasterPort = "PET_MASTER_PORT" - PETMasterAddr = "PET_MASTER_ADDR" -) - -// MasterEnvVarGenerator is the environment variable generator for Master related arguments. -type MasterEnvVarGenerator struct { -} - -func GetMasterEnvVarGenerator() EnvVarGenerator { - onceMaster.Do(func() { - masterGenerator = &MasterEnvVarGenerator{} - }) - return masterGenerator -} - -func (e MasterEnvVarGenerator) Generate( - job *kubeflowv1.PyTorchJob) ([]corev1.EnvVar, error) { - var envVars []corev1.EnvVar - if job.Spec.PyTorchReplicaSpecs[kubeflowv1.PyTorchJobReplicaTypeMaster] != nil { - masterPort, err := getPortFromPyTorchJob(job, kubeflowv1.PyTorchJobReplicaTypeMaster) - if err != nil { - return nil, err - } - - masterAddr := replicaName(job.Name, kubeflowv1.PyTorchJobReplicaTypeMaster, 0) - - envVars = append(envVars, corev1.EnvVar{ - Name: EnvMasterPort, - Value: strconv.Itoa(int(masterPort)), - }) - envVars = append(envVars, corev1.EnvVar{ - Name: PETMasterPort, - Value: strconv.Itoa(int(masterPort)), - }) - envVars = append(envVars, corev1.EnvVar{ - Name: EnvMasterAddr, - Value: masterAddr, - }) - envVars = append(envVars, corev1.EnvVar{ - Name: PETMasterAddr, - Value: masterAddr, - }) - } - return envVars, nil -} diff --git a/pkg/controller.v1/pytorch/pytorchjob_controller.go b/pkg/controller.v1/pytorch/pytorchjob_controller.go deleted file mode 100644 index 764fb696a5..0000000000 --- a/pkg/controller.v1/pytorch/pytorchjob_controller.go +++ /dev/null @@ -1,527 +0,0 @@ -// Copyright 2021 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package pytorch - -import ( - "context" - "fmt" - "strings" - "time" - - kubeflowv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - trainingoperatorcommon "github.com/kubeflow/training-operator/pkg/common" - "github.com/kubeflow/training-operator/pkg/common/util" - "github.com/kubeflow/training-operator/pkg/controller.v1/common" - "github.com/kubeflow/training-operator/pkg/controller.v1/control" - "github.com/kubeflow/training-operator/pkg/controller.v1/expectation" - commonutil "github.com/kubeflow/training-operator/pkg/util" - - "github.com/go-logr/logr" - "github.com/sirupsen/logrus" - corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/equality" - "k8s.io/apimachinery/pkg/api/errors" - "k8s.io/apimachinery/pkg/api/meta" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/runtime/schema" - "k8s.io/apimachinery/pkg/types" - utilruntime "k8s.io/apimachinery/pkg/util/runtime" - "k8s.io/client-go/informers" - kubeclientset "k8s.io/client-go/kubernetes" - "k8s.io/client-go/tools/record" - ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/controller" - "sigs.k8s.io/controller-runtime/pkg/event" - "sigs.k8s.io/controller-runtime/pkg/handler" - "sigs.k8s.io/controller-runtime/pkg/log" - "sigs.k8s.io/controller-runtime/pkg/manager" - "sigs.k8s.io/controller-runtime/pkg/predicate" - "sigs.k8s.io/controller-runtime/pkg/source" - schedulerpluginsv1alpha1 "sigs.k8s.io/scheduler-plugins/apis/scheduling/v1alpha1" - "volcano.sh/apis/pkg/apis/scheduling/v1beta1" -) - -const ( - controllerName = "pytorchjob-controller" -) - -// NewReconciler creates a PyTorchJob Reconciler -func NewReconciler(mgr manager.Manager, gangSchedulingSetupFunc common.GangSchedulingSetupFunc) *PyTorchJobReconciler { - r := &PyTorchJobReconciler{ - Client: mgr.GetClient(), - Scheme: mgr.GetScheme(), - recorder: mgr.GetEventRecorderFor(controllerName), - apiReader: mgr.GetAPIReader(), - Log: log.Log, - } - - // Create clients - cfg := mgr.GetConfig() - kubeClientSet := kubeclientset.NewForConfigOrDie(cfg) - sharedInformers := informers.NewSharedInformerFactory(kubeClientSet, 0) - priorityClassInformer := sharedInformers.Scheduling().V1().PriorityClasses() - - // Initialize common job controller - r.JobController = common.JobController{ - Controller: r, - Expectations: expectation.NewControllerExpectations(), - WorkQueue: &util.FakeWorkQueue[string]{}, - Recorder: r.recorder, - KubeClientSet: kubeClientSet, - PriorityClassLister: priorityClassInformer.Lister(), - PriorityClassInformerSynced: priorityClassInformer.Informer().HasSynced, - PodControl: control.RealPodControl{KubeClient: kubeClientSet, Recorder: r.recorder}, - ServiceControl: control.RealServiceControl{KubeClient: kubeClientSet, Recorder: r.recorder}, - } - - gangSchedulingSetupFunc(&r.JobController) - - return r -} - -// PyTorchJobReconciler reconciles a PyTorchJob object -type PyTorchJobReconciler struct { - common.JobController - client.Client - Scheme *runtime.Scheme - Log logr.Logger - recorder record.EventRecorder - apiReader client.Reader -} - -// +kubebuilder:rbac:groups=kubeflow.org,resources=pytorchjobs,verbs=get;list;watch;create;update;patch;delete -// +kubebuilder:rbac:groups=kubeflow.org,resources=pytorchjobs/status,verbs=get;update;patch -// +kubebuilder:rbac:groups=kubeflow.org,resources=pytorchjobs/finalizers,verbs=update -// +kubebuilder:rbac:groups="",resources=pods,verbs=get;list;watch;create;update;patch;delete -// +kubebuilder:rbac:groups="",resources=services,verbs=get;list;watch;create;delete -// +kubebuilder:rbac:groups=autoscaling,resources=horizontalpodautoscalers,verbs=get;list;watch;create;update;patch;delete -// +kubebuilder:rbac:groups=scheduling.volcano.sh,resources=podgroups,verbs=get;list;watch;create;update;patch;delete -// +kubebuilder:rbac:groups=scheduling.x-k8s.io,resources=podgroups,verbs=get;list;watch;create;update;patch;delete -// +kubebuilder:rbac:groups="",resources=events,verbs=get;list;watch;create;update;patch;delete - -// Reconcile is part of the main kubernetes reconciliation loop which aims to -// move the current state of the cluster closer to the desired state. -// the PyTorchJob object against the actual cluster state, and then -// perform operations to make the cluster state reflect the state specified by -// the user. -// -// For more details, check Reconcile and its Result here: -// - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.8.3/pkg/reconcile -func (r *PyTorchJobReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { - _ = log.FromContext(ctx) - logger := r.Log.WithValues(kubeflowv1.PyTorchJobSingular, req.NamespacedName) - - pytorchjob := &kubeflowv1.PyTorchJob{} - err := r.Get(ctx, req.NamespacedName, pytorchjob) - if err != nil { - logger.Info(err.Error(), "unable to fetch PyTorchJob", req.NamespacedName.String()) - return ctrl.Result{}, client.IgnoreNotFound(err) - } - - if manager := r.ManagedByExternalController(pytorchjob.Spec.RunPolicy.ManagedBy); manager != nil { - logger.Info("Skipping PyTorchJob managed by a custom controller", "managed-by", manager) - return ctrl.Result{}, nil - } - - // Check if reconciliation is needed - jobKey, err := common.KeyFunc(pytorchjob) - if err != nil { - utilruntime.HandleError(fmt.Errorf("couldn't get jobKey for job object %#v: %v", pytorchjob, err)) - } - - replicaTypes := util.GetReplicaTypes(pytorchjob.Spec.PyTorchReplicaSpecs) - needReconcile := util.SatisfiedExpectations(r.Expectations, jobKey, replicaTypes) - - if !needReconcile || pytorchjob.GetDeletionTimestamp() != nil { - logger.Info("reconcile cancelled, job does not need to do reconcile or has been deleted", - "sync", needReconcile, "deleted", pytorchjob.GetDeletionTimestamp() != nil) - return ctrl.Result{}, nil - } - - // Set default priorities to pytorch job - r.Scheme.Default(pytorchjob) - - err = r.ReconcileHPA(pytorchjob) - if err != nil { - logger.Error(err, "Reconcile PyTorchJob HPA error") - return ctrl.Result{}, err - } - // Use common to reconcile the job related pod and service - err = r.ReconcileJobs(pytorchjob, pytorchjob.Spec.PyTorchReplicaSpecs, pytorchjob.Status, &pytorchjob.Spec.RunPolicy) - if err != nil { - logger.Error(err, "Reconcile PyTorchJob error") - return ctrl.Result{}, err - } - t, err := util.DurationUntilExpireTime(&pytorchjob.Spec.RunPolicy, pytorchjob.Status) - if err != nil { - logrus.Warnf("Reconcile PyTorchJob error %v", err) - return ctrl.Result{}, err - } - if t >= 0 { - return ctrl.Result{Requeue: true, RequeueAfter: t}, nil - } - - return ctrl.Result{}, nil -} - -// SetupWithManager sets up the controller with the Manager. -func (r *PyTorchJobReconciler) SetupWithManager(mgr ctrl.Manager, controllerThreads int) error { - c, err := controller.New(r.ControllerName(), mgr, controller.Options{ - Reconciler: r, - MaxConcurrentReconciles: controllerThreads, - }) - if err != nil { - return err - } - // using onOwnerCreateFunc is easier to set defaults - if err = c.Watch(source.Kind[*kubeflowv1.PyTorchJob](mgr.GetCache(), &kubeflowv1.PyTorchJob{}, - &handler.TypedEnqueueRequestForObject[*kubeflowv1.PyTorchJob]{}, - predicate.TypedFuncs[*kubeflowv1.PyTorchJob]{CreateFunc: r.onOwnerCreateFunc()}), - ); err != nil { - return err - } - // inject watching for job related pod - if err = c.Watch(source.Kind[*corev1.Pod](mgr.GetCache(), &corev1.Pod{}, - handler.TypedEnqueueRequestForOwner[*corev1.Pod](mgr.GetScheme(), mgr.GetRESTMapper(), &kubeflowv1.PyTorchJob{}, handler.OnlyControllerOwner()), - util.OnDependentFuncs[*corev1.Pod](r.Scheme, r.Expectations, &r.JobController))); err != nil { - return err - } - // inject watching for job related service - if err = c.Watch(source.Kind[*corev1.Service](mgr.GetCache(), &corev1.Service{}, - handler.TypedEnqueueRequestForOwner[*corev1.Service](mgr.GetScheme(), mgr.GetRESTMapper(), &kubeflowv1.PyTorchJob{}, handler.OnlyControllerOwner()), - util.OnDependentFuncs[*corev1.Service](r.Scheme, r.Expectations, &r.JobController))); err != nil { - return err - } - // skip watching volcano PodGroup if volcano PodGroup is not installed - if _, err = mgr.GetRESTMapper().RESTMapping(schema.GroupKind{Group: v1beta1.GroupName, Kind: "PodGroup"}, - v1beta1.SchemeGroupVersion.Version); err == nil { - // inject watching for job related volcano PodGroup - if err = c.Watch(source.Kind[*v1beta1.PodGroup](mgr.GetCache(), &v1beta1.PodGroup{}, - handler.TypedEnqueueRequestForOwner[*v1beta1.PodGroup](mgr.GetScheme(), mgr.GetRESTMapper(), &kubeflowv1.PyTorchJob{}, handler.OnlyControllerOwner()), - util.OnDependentFuncs[*v1beta1.PodGroup](r.Scheme, r.Expectations, &r.JobController))); err != nil { - return err - } - } - // skip watching scheduler-plugins PodGroup if scheduler-plugins PodGroup is not installed - if _, err = mgr.GetRESTMapper().RESTMapping(schema.GroupKind{Group: schedulerpluginsv1alpha1.SchemeGroupVersion.Group, Kind: "PodGroup"}, - schedulerpluginsv1alpha1.SchemeGroupVersion.Version); err == nil { - // inject watching for job related scheduler-plugins PodGroup - if err = c.Watch(source.Kind[*schedulerpluginsv1alpha1.PodGroup](mgr.GetCache(), &schedulerpluginsv1alpha1.PodGroup{}, - handler.TypedEnqueueRequestForOwner[*schedulerpluginsv1alpha1.PodGroup](mgr.GetScheme(), mgr.GetRESTMapper(), &kubeflowv1.PyTorchJob{}, handler.OnlyControllerOwner()), - util.OnDependentFuncs[*schedulerpluginsv1alpha1.PodGroup](r.Scheme, r.Expectations, &r.JobController))); err != nil { - return err - } - } - return nil -} - -func (r *PyTorchJobReconciler) ControllerName() string { - return controllerName -} - -func (r *PyTorchJobReconciler) GetAPIGroupVersionKind() schema.GroupVersionKind { - return kubeflowv1.GroupVersion.WithKind(kubeflowv1.PyTorchJobKind) -} - -func (r *PyTorchJobReconciler) GetAPIGroupVersion() schema.GroupVersion { - return kubeflowv1.GroupVersion -} - -func (r *PyTorchJobReconciler) GetGroupNameLabelValue() string { - return kubeflowv1.GroupVersion.Group -} - -func (r *PyTorchJobReconciler) GetFrameworkName() string { - return kubeflowv1.PyTorchJobFrameworkName -} - -func (r *PyTorchJobReconciler) GetJobFromInformerCache(namespace, name string) (metav1.Object, error) { - job := &kubeflowv1.PyTorchJob{} - err := r.Get(context.Background(), types.NamespacedName{Namespace: namespace, Name: name}, job) - if err != nil { - if errors.IsNotFound(err) { - logrus.Error(err, "pytorch job not found", "namespace", namespace, "name", name) - } else { - logrus.Error(err, "failed to get job from api-server", "namespace", namespace, "name", name) - } - return nil, err - } - return job, nil -} - -func (r *PyTorchJobReconciler) GetJobFromAPIClient(namespace, name string) (metav1.Object, error) { - job := &kubeflowv1.PyTorchJob{} - - err := r.apiReader.Get(context.Background(), types.NamespacedName{Namespace: namespace, Name: name}, job) - if err != nil { - if errors.IsNotFound(err) { - logrus.Error(err, "pytorch job not found", "namespace", namespace, "name", name) - } else { - logrus.Error(err, "failed to get job from api-server", "namespace", namespace, "name", name) - } - return nil, err - } - return job, nil -} - -func (r *PyTorchJobReconciler) GetPodsForJob(obj interface{}) ([]*corev1.Pod, error) { - job, err := meta.Accessor(obj) - if err != nil { - return nil, err - } - - // List all pods to include those that don't match the selector anymore - // but have a ControllerRef pointing to this controller. - podlist := &corev1.PodList{} - err = r.List(context.Background(), podlist, client.MatchingLabels(r.GenLabels(job.GetName())), client.InNamespace(job.GetNamespace())) - if err != nil { - return nil, err - } - - return util.JobControlledPodList(podlist.Items, job), nil -} - -func (r *PyTorchJobReconciler) GetServicesForJob(obj interface{}) ([]*corev1.Service, error) { - job, err := meta.Accessor(obj) - if err != nil { - return nil, err - } - - // List all pods to include those that don't match the selector anymore - // but have a ControllerRef pointing to this controller. - serviceList := &corev1.ServiceList{} - err = r.List(context.Background(), serviceList, client.MatchingLabels(r.GenLabels(job.GetName())), client.InNamespace(job.GetNamespace())) - if err != nil { - return nil, err - } - - ret := util.ConvertServiceList(serviceList.Items) - return ret, nil -} - -func (r *PyTorchJobReconciler) DeleteJob(job interface{}) error { - pytorchjob, ok := job.(*kubeflowv1.PyTorchJob) - if !ok { - return fmt.Errorf("%+v is not a type of PyTorchJob", job) - } - if err := r.Delete(context.Background(), pytorchjob); err != nil { - r.recorder.Eventf(pytorchjob, corev1.EventTypeWarning, control.FailedDeletePodReason, "Error deleting: %v", err) - logrus.Error(err, "failed to delete job", "namespace", pytorchjob.Namespace, "name", pytorchjob.Name) - return err - } - r.recorder.Eventf(pytorchjob, corev1.EventTypeNormal, control.SuccessfulDeletePodReason, "Deleted job: %v", pytorchjob.Name) - logrus.Info("job deleted", "namespace", pytorchjob.Namespace, "name", pytorchjob.Name) - trainingoperatorcommon.DeletedJobsCounterInc(pytorchjob.Namespace, r.GetFrameworkName()) - return nil -} - -func (r *PyTorchJobReconciler) GenLabelSelector(jobName string, - rtype kubeflowv1.ReplicaType) *metav1.LabelSelector { - labels := r.GenLabels(jobName) - labels[kubeflowv1.ReplicaTypeLabel] = strings.ToLower(string(rtype)) - - return &metav1.LabelSelector{ - MatchLabels: labels, - } -} - -// UpdateJobStatus updates the job status and job conditions -func (r *PyTorchJobReconciler) UpdateJobStatus(job interface{}, - replicas map[kubeflowv1.ReplicaType]*kubeflowv1.ReplicaSpec, - jobStatus *kubeflowv1.JobStatus) error { - pytorchjob, ok := job.(*kubeflowv1.PyTorchJob) - if !ok { - return fmt.Errorf("%+v is not a type of PyTorchJob", job) - } - pytorchjobKey, err := common.KeyFunc(pytorchjob) - if err != nil { - utilruntime.HandleError(fmt.Errorf("couldn't get key for pytorchjob object %#v: %v", pytorchjob, err)) - return err - } - - logger := commonutil.LoggerForJob(pytorchjob) - - // Set StartTime. - if jobStatus.StartTime == nil { - now := metav1.Now() - jobStatus.StartTime = &now - // enqueue a sync to check if job past ActiveDeadlineSeconds - if pytorchjob.Spec.RunPolicy.ActiveDeadlineSeconds != nil { - logger.Infof("Job with ActiveDeadlineSeconds will sync after %d seconds", *pytorchjob.Spec.RunPolicy.ActiveDeadlineSeconds) - r.WorkQueue.AddAfter(pytorchjobKey, time.Duration(*pytorchjob.Spec.RunPolicy.ActiveDeadlineSeconds)*time.Second) - } - } - - for rtype, spec := range replicas { - status := jobStatus.ReplicaStatuses[rtype] - // Generate the label selector. - status.Selector = metav1.FormatLabelSelector(r.GenLabelSelector(pytorchjob.Name, rtype)) - - succeeded := status.Succeeded - expected := *(spec.Replicas) - succeeded - running := status.Active - failed := status.Failed - specReplicas := *spec.Replicas - - logrus.Infof("PyTorchJob=%s, ReplicaType=%s expected=%d, running=%d, succeeded=%d, failed=%d, Replicas=%d", - pytorchjob.Name, rtype, expected, running, succeeded, failed, specReplicas) - - if ContainsMasterSpec(replicas) { - if rtype == kubeflowv1.PyTorchJobReplicaTypeMaster { - if running > 0 { - msg := fmt.Sprintf("PyTorchJob %s is running.", pytorchjob.Name) - commonutil.UpdateJobConditions(jobStatus, kubeflowv1.JobRunning, corev1.ConditionTrue, commonutil.NewReason(kubeflowv1.PyTorchJobKind, commonutil.JobRunningReason), msg) - } - // when master is succeed, the job is finished. - if expected == 0 { - msg := fmt.Sprintf("PyTorchJob %s is successfully completed.", pytorchjob.Name) - logrus.Info(msg) - r.Recorder.Event(pytorchjob, corev1.EventTypeNormal, commonutil.NewReason(kubeflowv1.PyTorchJobKind, commonutil.JobSucceededReason), msg) - if jobStatus.CompletionTime == nil { - now := metav1.Now() - jobStatus.CompletionTime = &now - } - commonutil.UpdateJobConditions(jobStatus, kubeflowv1.JobSucceeded, corev1.ConditionTrue, commonutil.NewReason(kubeflowv1.PyTorchJobKind, commonutil.JobSucceededReason), msg) - trainingoperatorcommon.SuccessfulJobsCounterInc(pytorchjob.Namespace, r.GetFrameworkName()) - return nil - } - } - } else { - if rtype == kubeflowv1.PyTorchJobReplicaTypeWorker { - // TODO(gaocegege): Support SuccessPolicy - // Leave a succeeded condition for the following two cases: - // 1. If all workers are succeeded. - // 2. If `ElasticPolicy` is not nil and any worker has completed. - if expected == 0 || (pytorchjob.Spec.ElasticPolicy != nil && succeeded > 0) { - msg := fmt.Sprintf("PyTorchJob %s/%s successfully completed.", - pytorchjob.Namespace, pytorchjob.Name) - r.recorder.Event(pytorchjob, corev1.EventTypeNormal, commonutil.NewReason(kubeflowv1.PyTorchJobKind, commonutil.JobSucceededReason), msg) - if jobStatus.CompletionTime == nil { - now := metav1.Now() - jobStatus.CompletionTime = &now - } - commonutil.UpdateJobConditions(jobStatus, kubeflowv1.JobSucceeded, corev1.ConditionTrue, commonutil.NewReason(kubeflowv1.PyTorchJobKind, commonutil.JobSucceededReason), msg) - trainingoperatorcommon.SuccessfulJobsCounterInc(pytorchjob.Namespace, r.GetFrameworkName()) - } else if running > 0 { - // Some workers are still running, leave a running condition. - msg := fmt.Sprintf("PyTorchJob %s/%s is running.", - pytorchjob.Namespace, pytorchjob.Name) - commonutil.UpdateJobConditions(jobStatus, kubeflowv1.JobRunning, corev1.ConditionTrue, commonutil.NewReason(kubeflowv1.PyTorchJobKind, commonutil.JobRunningReason), msg) - } - } - } - - if failed > 0 && (specReplicas > succeeded+running) { - if spec.RestartPolicy != kubeflowv1.RestartPolicyNever { - msg := fmt.Sprintf("PyTorchJob %s is restarting because %d %s replica(s) failed.", pytorchjob.Name, failed, rtype) - r.Recorder.Event(pytorchjob, corev1.EventTypeWarning, commonutil.NewReason(kubeflowv1.PyTorchJobKind, commonutil.JobRestartingReason), msg) - commonutil.UpdateJobConditions(jobStatus, kubeflowv1.JobRestarting, corev1.ConditionTrue, commonutil.NewReason(kubeflowv1.PyTorchJobKind, commonutil.JobRestartingReason), msg) - trainingoperatorcommon.RestartedJobsCounterInc(pytorchjob.Namespace, r.GetFrameworkName()) - } else { - msg := fmt.Sprintf("PyTorchJob %s is failed because %d %s replica(s) failed.", pytorchjob.Name, failed, rtype) - r.Recorder.Event(pytorchjob, corev1.EventTypeNormal, commonutil.NewReason(kubeflowv1.PyTorchJobKind, commonutil.JobFailedReason), msg) - if jobStatus.CompletionTime == nil { - now := metav1.Now() - jobStatus.CompletionTime = &now - } - commonutil.UpdateJobConditions(jobStatus, kubeflowv1.JobFailed, corev1.ConditionTrue, commonutil.NewReason(kubeflowv1.PyTorchJobKind, commonutil.JobFailedReason), msg) - trainingoperatorcommon.FailedJobsCounterInc(pytorchjob.Namespace, r.GetFrameworkName()) - } - } - } - return nil -} - -// ContainsMasterSpec returns true if the pytorchjob contains master spec. -func ContainsMasterSpec(replicas map[kubeflowv1.ReplicaType]*kubeflowv1.ReplicaSpec) bool { - if _, ok := replicas[kubeflowv1.PyTorchJobReplicaTypeMaster]; ok { - return true - } - return false -} - -// UpdateJobStatusInApiServer updates the job status in to cluster. -func (r *PyTorchJobReconciler) UpdateJobStatusInApiServer(job interface{}, jobStatus *kubeflowv1.JobStatus) error { - if jobStatus.ReplicaStatuses == nil { - jobStatus.ReplicaStatuses = map[kubeflowv1.ReplicaType]*kubeflowv1.ReplicaStatus{} - } - - pytorchjob, ok := job.(*kubeflowv1.PyTorchJob) - trainingoperatorcommon.ClearGeneratedFields(&pytorchjob.ObjectMeta) - if !ok { - return fmt.Errorf("%+v is not a type of PyTorchJob", job) - } - - // Job status passed in differs with status in job, update in basis of the passed in one. - if !equality.Semantic.DeepEqual(&pytorchjob.Status, jobStatus) { - pytorchjob = pytorchjob.DeepCopy() - pytorchjob.Status = *jobStatus.DeepCopy() - } - - result := r.Status().Update(context.Background(), pytorchjob) - - if result != nil { - r.Log.WithValues("pytorchjob", types.NamespacedName{ - Namespace: pytorchjob.GetNamespace(), - Name: pytorchjob.GetName(), - }) - return result - } - - return nil -} - -// SetClusterSpec sets the cluster spec and init container for the pod -func (r *PyTorchJobReconciler) SetClusterSpec(job interface{}, podTemplate *corev1.PodTemplateSpec, rtype, index string) error { - if err := setPodEnv(job, podTemplate, rtype, index); err != nil { - return err - } - if err := setInitContainer(job, podTemplate, rtype, index, r.Log); err != nil { - return err - } - return nil -} - -func (r *PyTorchJobReconciler) IsMasterRole(replicas map[kubeflowv1.ReplicaType]*kubeflowv1.ReplicaSpec, - rtype kubeflowv1.ReplicaType, index int) bool { - if _, ok := replicas[kubeflowv1.PyTorchJobReplicaTypeMaster]; ok { - return rtype == kubeflowv1.PyTorchJobReplicaTypeMaster - } - // else check if it is worker with index 0 - return rtype == kubeflowv1.PyTorchJobReplicaTypeWorker && index == 0 -} - -func (r *PyTorchJobReconciler) GetDefaultContainerName() string { - return kubeflowv1.PyTorchJobDefaultContainerName -} - -func (r *PyTorchJobReconciler) GetDefaultContainerPortName() string { - return kubeflowv1.PyTorchJobDefaultPortName -} - -// onOwnerCreateFunc modify creation condition. -func (r *PyTorchJobReconciler) onOwnerCreateFunc() func(createEvent event.TypedCreateEvent[*kubeflowv1.PyTorchJob]) bool { - return func(e event.TypedCreateEvent[*kubeflowv1.PyTorchJob]) bool { - pytorchjob := e.Object - r.Scheme.Default(pytorchjob) - msg := fmt.Sprintf("PyTorchJob %s is created.", e.Object.GetName()) - logrus.Info(msg) - trainingoperatorcommon.CreatedJobsCounterInc(pytorchjob.Namespace, r.GetFrameworkName()) - commonutil.UpdateJobConditions(&pytorchjob.Status, kubeflowv1.JobCreated, corev1.ConditionTrue, commonutil.NewReason(kubeflowv1.PyTorchJobKind, commonutil.JobCreatedReason), msg) - return true - } -} diff --git a/pkg/controller.v1/pytorch/pytorchjob_controller_suite_test.go b/pkg/controller.v1/pytorch/pytorchjob_controller_suite_test.go deleted file mode 100644 index 35810c9d1c..0000000000 --- a/pkg/controller.v1/pytorch/pytorchjob_controller_suite_test.go +++ /dev/null @@ -1,133 +0,0 @@ -// Copyright 2021 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package pytorch - -import ( - "context" - "crypto/tls" - "fmt" - "net" - "path/filepath" - "testing" - "time" - - kubeflowv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - "github.com/kubeflow/training-operator/pkg/config" - "github.com/kubeflow/training-operator/pkg/controller.v1/common" - pytorchwebhook "github.com/kubeflow/training-operator/pkg/webhooks/pytorch" - - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" - "k8s.io/client-go/kubernetes/scheme" - ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/envtest" - logf "sigs.k8s.io/controller-runtime/pkg/log" - "sigs.k8s.io/controller-runtime/pkg/log/zap" - metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" - "sigs.k8s.io/controller-runtime/pkg/webhook" - "volcano.sh/apis/pkg/apis/scheduling/v1beta1" - //+kubebuilder:scaffold:imports -) - -// These tests use Ginkgo (BDD-style Go testing framework). Refer to -// http://onsi.github.io/ginkgo/ to learn more about Ginkgo. - -var ( - testK8sClient client.Client - testEnv *envtest.Environment - testCtx context.Context - testCancel context.CancelFunc -) - -func TestAPIs(t *testing.T) { - RegisterFailHandler(Fail) - - RunSpecs(t, "Controller Suite") -} - -var _ = BeforeSuite(func() { - logf.SetLogger(zap.New(zap.WriteTo(GinkgoWriter), zap.UseDevMode(true))) - - testCtx, testCancel = context.WithCancel(context.TODO()) - - By("bootstrapping test environment") - testEnv = &envtest.Environment{ - CRDDirectoryPaths: []string{filepath.Join("..", "..", "..", "manifests", "base", "crds")}, - ErrorIfCRDPathMissing: true, - WebhookInstallOptions: envtest.WebhookInstallOptions{ - Paths: []string{filepath.Join("..", "..", "..", "manifests", "base", "webhook", "manifests.yaml")}, - }, - } - - cfg, err := testEnv.Start() - Expect(err).NotTo(HaveOccurred()) - Expect(cfg).NotTo(BeNil()) - - err = v1beta1.AddToScheme(scheme.Scheme) - Expect(err).NotTo(HaveOccurred()) - err = kubeflowv1.AddToScheme(scheme.Scheme) - Expect(err).NotTo(HaveOccurred()) - - // Set default config. - config.Config.PyTorchInitContainerImage = config.PyTorchInitContainerImageDefault - config.Config.PyTorchInitContainerTemplateFile = config.PyTorchInitContainerTemplateFileDefault - - //+kubebuilder:scaffold:scheme - - testK8sClient, err = client.New(cfg, client.Options{Scheme: scheme.Scheme}) - Expect(err).NotTo(HaveOccurred()) - Expect(testK8sClient).NotTo(BeNil()) - - mgr, err := ctrl.NewManager(cfg, ctrl.Options{ - Metrics: metricsserver.Options{ - BindAddress: "0", - }, - WebhookServer: webhook.NewServer( - webhook.Options{ - Host: testEnv.WebhookInstallOptions.LocalServingHost, - Port: testEnv.WebhookInstallOptions.LocalServingPort, - CertDir: testEnv.WebhookInstallOptions.LocalServingCertDir, - }), - }) - Expect(err).NotTo(HaveOccurred()) - - gangSchedulingSetupFunc := common.GenNonGangSchedulerSetupFunc() - r := NewReconciler(mgr, gangSchedulingSetupFunc) - - Expect(r.SetupWithManager(mgr, 1)).NotTo(HaveOccurred()) - Expect(pytorchwebhook.SetupWebhook(mgr)).NotTo(HaveOccurred()) - - go func() { - defer GinkgoRecover() - err = mgr.Start(testCtx) - Expect(err).ToNot(HaveOccurred(), "failed to run manager") - }() - - dialer := &net.Dialer{Timeout: time.Second} - addrPort := fmt.Sprintf("%s:%d", testEnv.WebhookInstallOptions.LocalServingHost, testEnv.WebhookInstallOptions.LocalServingPort) - Eventually(func(g Gomega) { - conn, err := tls.DialWithDialer(dialer, "tcp", addrPort, &tls.Config{InsecureSkipVerify: true}) - g.Expect(err).NotTo(HaveOccurred()) - g.Expect(conn.Close()).NotTo(HaveOccurred()) - }).Should(Succeed()) -}) - -var _ = AfterSuite(func() { - By("tearing down the test environment") - testCancel() - err := testEnv.Stop() - Expect(err).NotTo(HaveOccurred()) -}) diff --git a/pkg/controller.v1/pytorch/pytorchjob_controller_test.go b/pkg/controller.v1/pytorch/pytorchjob_controller_test.go deleted file mode 100644 index 37b4c9218c..0000000000 --- a/pkg/controller.v1/pytorch/pytorchjob_controller_test.go +++ /dev/null @@ -1,741 +0,0 @@ -// Copyright 2021 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package pytorch - -import ( - "context" - "fmt" - - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" - autoscalingv2 "k8s.io/api/autoscaling/v2" - corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/errors" - "k8s.io/apimachinery/pkg/api/resource" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/types" - "k8s.io/utils/ptr" - "sigs.k8s.io/controller-runtime/pkg/client" - - kubeflowv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - commonutil "github.com/kubeflow/training-operator/pkg/util" - "github.com/kubeflow/training-operator/pkg/util/testutil" -) - -var _ = Describe("PyTorchJob controller", func() { - // Define utility constants for object names. - const ( - expectedPort = int32(8080) - ) - - Context("When creating the PyTorchJob", func() { - const name = "test-job" - var ( - ns *corev1.Namespace - job *kubeflowv1.PyTorchJob - jobKey types.NamespacedName - masterKey types.NamespacedName - worker0Key types.NamespacedName - ctx = context.Background() - ) - BeforeEach(func() { - ns = &corev1.Namespace{ - ObjectMeta: metav1.ObjectMeta{ - GenerateName: "pytorch-test-", - }, - } - Expect(testK8sClient.Create(ctx, ns)).Should(Succeed()) - - job = newPyTorchJobForTest(name, ns.Name) - jobKey = client.ObjectKeyFromObject(job) - masterKey = types.NamespacedName{ - Name: fmt.Sprintf("%s-master-0", name), - Namespace: ns.Name, - } - worker0Key = types.NamespacedName{ - Name: fmt.Sprintf("%s-worker-0", name), - Namespace: ns.Name, - } - job.Spec.NprocPerNode = nil - job.Spec.PyTorchReplicaSpecs = map[kubeflowv1.ReplicaType]*kubeflowv1.ReplicaSpec{ - kubeflowv1.PyTorchJobReplicaTypeMaster: { - Replicas: ptr.To[int32](1), - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Image: "test-image", - Name: kubeflowv1.PyTorchJobDefaultContainerName, - Ports: []corev1.ContainerPort{ - { - Name: kubeflowv1.PyTorchJobDefaultPortName, - ContainerPort: expectedPort, - Protocol: corev1.ProtocolTCP, - }, - }, - }, - }, - }, - }, - }, - kubeflowv1.PyTorchJobReplicaTypeWorker: { - Replicas: ptr.To[int32](2), - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Image: "test-image", - Name: kubeflowv1.PyTorchJobDefaultContainerName, - Ports: []corev1.ContainerPort{ - { - Name: kubeflowv1.PyTorchJobDefaultPortName, - ContainerPort: expectedPort, - Protocol: corev1.ProtocolTCP, - }, - }, - }, - }, - }, - }, - }, - } - }) - AfterEach(func() { - Expect(testK8sClient.Delete(ctx, job)).Should(Succeed()) - Expect(testK8sClient.Delete(ctx, ns)).Should(Succeed()) - }) - It("Should get the corresponding resources successfully", func() { - By("By creating a new PyTorchJob") - Expect(testK8sClient.Create(ctx, job)).Should(Succeed()) - - created := &kubeflowv1.PyTorchJob{} - - // We'll need to retry getting this newly created PyTorchJob, given that creation may not immediately happen. - Eventually(func() bool { - err := testK8sClient.Get(ctx, jobKey, created) - return err == nil - }, testutil.Timeout, testutil.Interval).Should(BeTrue()) - - masterPod := &corev1.Pod{} - Eventually(func() bool { - err := testK8sClient.Get(ctx, masterKey, masterPod) - return err == nil - }, testutil.Timeout, testutil.Interval).Should(BeTrue()) - - masterSvc := &corev1.Service{} - Eventually(func() bool { - err := testK8sClient.Get(ctx, masterKey, masterSvc) - return err == nil - }, testutil.Timeout, testutil.Interval).Should(BeTrue()) - - // Check the pod port. - Expect(masterPod.Spec.Containers[0].Ports).To(ContainElement(corev1.ContainerPort{ - Name: kubeflowv1.PyTorchJobDefaultPortName, - ContainerPort: expectedPort, - Protocol: corev1.ProtocolTCP})) - // Check env variable - Expect(masterPod.Spec.Containers[0].Env).To(ContainElements(corev1.EnvVar{ - Name: EnvMasterPort, - Value: fmt.Sprintf("%d", masterSvc.Spec.Ports[0].Port), - }, corev1.EnvVar{ - Name: EnvMasterAddr, - Value: masterSvc.Name, - }, corev1.EnvVar{ - Name: EnvNprocPerNode, - Value: kubeflowv1.DefaultNprocPerNode, - })) - // Check service port. - Expect(masterSvc.Spec.Ports[0].Port).To(Equal(expectedPort)) - // Check owner reference. - trueVal := true - Expect(masterPod.OwnerReferences).To(ContainElement(metav1.OwnerReference{ - APIVersion: kubeflowv1.SchemeGroupVersion.String(), - Kind: kubeflowv1.PyTorchJobKind, - Name: name, - UID: created.UID, - Controller: &trueVal, - BlockOwnerDeletion: &trueVal, - })) - Expect(masterSvc.OwnerReferences).To(ContainElement(metav1.OwnerReference{ - APIVersion: kubeflowv1.SchemeGroupVersion.String(), - Kind: kubeflowv1.PyTorchJobKind, - Name: name, - UID: created.UID, - Controller: &trueVal, - BlockOwnerDeletion: &trueVal, - })) - - // Test job status. - Eventually(func() error { - Expect(testK8sClient.Get(ctx, masterKey, masterPod)).Should(Succeed()) - masterPod.Status.Phase = corev1.PodSucceeded - return testK8sClient.Status().Update(ctx, masterPod) - }, testutil.Timeout, testutil.Interval).Should(Succeed()) - Eventually(func() bool { - err := testK8sClient.Get(ctx, jobKey, created) - if err != nil { - return false - } - return created.Status.ReplicaStatuses != nil && created.Status. - ReplicaStatuses[kubeflowv1.PyTorchJobReplicaTypeMaster].Succeeded == 1 - }, testutil.Timeout, testutil.Interval).Should(BeTrue()) - // Check if the job is succeeded. - cond := getCondition(created.Status, kubeflowv1.JobSucceeded) - Expect(cond.Status).To(Equal(corev1.ConditionTrue)) - }) - It("Shouldn't be updated resources if spec.runPolicy.schedulingPolicy.queue is changed after the job is created", func() { - By("Creating a PyTorchJob with a specific queue") - job.Spec.RunPolicy.SchedulingPolicy = &kubeflowv1.SchedulingPolicy{} - job.Spec.RunPolicy.SchedulingPolicy.Queue = "initial-queue" - Expect(testK8sClient.Create(ctx, job)).Should(Succeed()) - - By("Attempting to update the PyTorchJob with a different queue value") - Eventually(func(g Gomega) { - updatedJob := &kubeflowv1.PyTorchJob{} - g.Expect(testK8sClient.Get(ctx, jobKey, updatedJob)).Should(Succeed(), "Failed to get PyTorchJob") - updatedJob.Spec.RunPolicy.SchedulingPolicy.Queue = "test" - err := testK8sClient.Update(ctx, updatedJob) - g.Expect(err).To(HaveOccurred(), "Expected an error when updating the queue, but update succeeded") - By("Checking that the queue update fails") - Expect(err).To(MatchError(ContainSubstring("spec.runPolicy.schedulingPolicy.queue is immutable"), "The error message did not contain the expected message")) - - }, testutil.Timeout, testutil.Interval).Should(Succeed()) - - By("Validating the queue was not updated") - freshJob := &kubeflowv1.PyTorchJob{} - Expect(testK8sClient.Get(ctx, client.ObjectKeyFromObject(job), freshJob)).Should(Succeed(), "Failed to get PyTorchJob after update attempt") - Expect(freshJob.Spec.RunPolicy.SchedulingPolicy.Queue).To(Equal("initial-queue"), "The queue should remain as the initial value since it should be immutable") - }) - - It("Shouldn't create resources if PyTorchJob is suspended", func() { - By("By creating a new PyTorchJob with suspend=true") - job.Spec.RunPolicy.Suspend = ptr.To(true) - job.Spec.PyTorchReplicaSpecs[kubeflowv1.PyTorchJobReplicaTypeWorker].Replicas = ptr.To[int32](1) - Expect(testK8sClient.Create(ctx, job)).Should(Succeed()) - - created := &kubeflowv1.PyTorchJob{} - masterPod := &corev1.Pod{} - workerPod := &corev1.Pod{} - masterSvc := &corev1.Service{} - workerSvc := &corev1.Service{} - - By("Checking created PyTorchJob") - Eventually(func() bool { - err := testK8sClient.Get(ctx, jobKey, created) - return err == nil - }, testutil.Timeout, testutil.Interval).Should(BeTrue()) - By("Checking created PyTorchJob has a nil startTime") - Consistently(func() *metav1.Time { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - return created.Status.StartTime - }, testutil.ConsistentDuration, testutil.Interval).Should(BeNil()) - - By("Checking if the pods and services aren't created") - Consistently(func() bool { - errMasterPod := testK8sClient.Get(ctx, masterKey, masterPod) - errWorkerPod := testK8sClient.Get(ctx, worker0Key, workerPod) - errMasterSvc := testK8sClient.Get(ctx, masterKey, masterSvc) - errWorkerSvc := testK8sClient.Get(ctx, worker0Key, workerSvc) - return errors.IsNotFound(errMasterPod) && errors.IsNotFound(errWorkerPod) && - errors.IsNotFound(errMasterSvc) && errors.IsNotFound(errWorkerSvc) - }, testutil.ConsistentDuration, testutil.Interval).Should(BeTrue()) - - By("Checking if the PyTorchJob has suspended condition") - Eventually(func() []kubeflowv1.JobCondition { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - return created.Status.Conditions - }, testutil.ConsistentDuration, testutil.Interval).Should(BeComparableTo([]kubeflowv1.JobCondition{ - { - Type: kubeflowv1.JobCreated, - Status: corev1.ConditionTrue, - Reason: commonutil.NewReason(kubeflowv1.PyTorchJobKind, commonutil.JobCreatedReason), - Message: fmt.Sprintf("PyTorchJob %s is created.", name), - }, - { - Type: kubeflowv1.JobSuspended, - Status: corev1.ConditionTrue, - Reason: commonutil.NewReason(kubeflowv1.PyTorchJobKind, commonutil.JobSuspendedReason), - Message: fmt.Sprintf("PyTorchJob %s is suspended.", name), - }, - }, testutil.IgnoreJobConditionsTimes)) - }) - - It("Should delete resources after PyTorchJob is suspended; Should resume PyTorchJob after PyTorchJob is unsuspended", func() { - By("By creating a new PyTorchJob") - job.Spec.PyTorchReplicaSpecs[kubeflowv1.PyTorchJobReplicaTypeWorker].Replicas = ptr.To[int32](1) - Expect(testK8sClient.Create(ctx, job)).Should(Succeed()) - - created := &kubeflowv1.PyTorchJob{} - masterPod := &corev1.Pod{} - workerPod := &corev1.Pod{} - masterSvc := &corev1.Service{} - workerSvc := &corev1.Service{} - - // We'll need to retry getting this newly created PyTorchJob, given that creation may not immediately happen. - By("Checking created PyTorchJob") - Eventually(func() bool { - err := testK8sClient.Get(ctx, jobKey, created) - return err == nil - }, testutil.Timeout, testutil.Interval).Should(BeTrue()) - - var startTimeBeforeSuspended *metav1.Time - Eventually(func() *metav1.Time { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - startTimeBeforeSuspended = created.Status.StartTime - return startTimeBeforeSuspended - }, testutil.Timeout, testutil.Interval).ShouldNot(BeNil()) - - By("Checking the created pods and services") - Eventually(func() bool { - errMaster := testK8sClient.Get(ctx, masterKey, masterPod) - errWorker := testK8sClient.Get(ctx, worker0Key, workerPod) - return errMaster == nil && errWorker == nil - }, testutil.Timeout, testutil.Interval).Should(BeTrue()) - Eventually(func() bool { - errMaster := testK8sClient.Get(ctx, masterKey, masterSvc) - errWorker := testK8sClient.Get(ctx, worker0Key, workerSvc) - return errMaster == nil && errWorker == nil - }, testutil.Timeout, testutil.Interval).Should(BeTrue()) - - By("Updating the pod's phase with Running") - Eventually(func() error { - Expect(testK8sClient.Get(ctx, masterKey, masterPod)).Should(Succeed()) - masterPod.Status.Phase = corev1.PodRunning - return testK8sClient.Status().Update(ctx, masterPod) - }, testutil.Timeout, testutil.Interval).Should(Succeed()) - Eventually(func() error { - Expect(testK8sClient.Get(ctx, worker0Key, workerPod)).Should(Succeed()) - workerPod.Status.Phase = corev1.PodRunning - return testK8sClient.Status().Update(ctx, workerPod) - }, testutil.Timeout, testutil.Interval).Should(Succeed()) - - By("Checking the PyTorchJob's condition") - Eventually(func() []kubeflowv1.JobCondition { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - return created.Status.Conditions - }, testutil.Timeout, testutil.Interval).Should(BeComparableTo([]kubeflowv1.JobCondition{ - { - Type: kubeflowv1.JobCreated, - Status: corev1.ConditionTrue, - Reason: commonutil.NewReason(kubeflowv1.PyTorchJobKind, commonutil.JobCreatedReason), - Message: fmt.Sprintf("PyTorchJob %s is created.", name), - }, - { - Type: kubeflowv1.JobRunning, - Status: corev1.ConditionTrue, - Reason: commonutil.NewReason(kubeflowv1.PyTorchJobKind, commonutil.JobRunningReason), - Message: fmt.Sprintf("PyTorchJob %s is running.", name), - }, - }, testutil.IgnoreJobConditionsTimes)) - - By("Updating the PyTorchJob with suspend=true") - Eventually(func() error { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - created.Spec.RunPolicy.Suspend = ptr.To(true) - return testK8sClient.Update(ctx, created) - }, testutil.Timeout, testutil.Interval).Should(Succeed()) - - By("Checking if the pods and services are removed") - Eventually(func() bool { - errMaster := testK8sClient.Get(ctx, masterKey, masterPod) - errWorker := testK8sClient.Get(ctx, worker0Key, workerPod) - return errors.IsNotFound(errMaster) && errors.IsNotFound(errWorker) - }, testutil.Timeout, testutil.Interval).Should(BeTrue()) - Eventually(func() bool { - errMaster := testK8sClient.Get(ctx, masterKey, masterSvc) - errWorker := testK8sClient.Get(ctx, worker0Key, workerSvc) - return errors.IsNotFound(errMaster) && errors.IsNotFound(errWorker) - }, testutil.Timeout, testutil.Interval).Should(BeTrue()) - Consistently(func() bool { - errMasterPod := testK8sClient.Get(ctx, masterKey, masterPod) - errWorkerPod := testK8sClient.Get(ctx, worker0Key, workerPod) - errMasterSvc := testK8sClient.Get(ctx, masterKey, masterSvc) - errWorkerSvc := testK8sClient.Get(ctx, worker0Key, workerSvc) - return errors.IsNotFound(errMasterPod) && errors.IsNotFound(errWorkerPod) && - errors.IsNotFound(errMasterSvc) && errors.IsNotFound(errWorkerSvc) - }, testutil.ConsistentDuration, testutil.Interval).Should(BeTrue()) - - By("Checking if the PyTorchJob has a suspended condition") - Eventually(func() bool { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - return created.Status.ReplicaStatuses[kubeflowv1.PyTorchJobReplicaTypeMaster].Active == 0 && - created.Status.ReplicaStatuses[kubeflowv1.PyTorchJobReplicaTypeWorker].Active == 0 && - created.Status.StartTime.Equal(startTimeBeforeSuspended) - }, testutil.Timeout, testutil.Interval).Should(BeTrue()) - Consistently(func() bool { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - return created.Status.ReplicaStatuses[kubeflowv1.PyTorchJobReplicaTypeMaster].Active == 0 && - created.Status.ReplicaStatuses[kubeflowv1.PyTorchJobReplicaTypeWorker].Active == 0 && - created.Status.StartTime.Equal(startTimeBeforeSuspended) - }, testutil.ConsistentDuration, testutil.Interval).Should(BeTrue()) - Expect(created.Status.Conditions).Should(BeComparableTo([]kubeflowv1.JobCondition{ - { - Type: kubeflowv1.JobCreated, - Status: corev1.ConditionTrue, - Reason: commonutil.NewReason(kubeflowv1.PyTorchJobKind, commonutil.JobCreatedReason), - Message: fmt.Sprintf("PyTorchJob %s is created.", name), - }, - { - Type: kubeflowv1.JobRunning, - Status: corev1.ConditionFalse, - Reason: commonutil.NewReason(kubeflowv1.PyTorchJobKind, commonutil.JobSuspendedReason), - Message: fmt.Sprintf("PyTorchJob %s is suspended.", name), - }, - { - Type: kubeflowv1.JobSuspended, - Reason: commonutil.NewReason(kubeflowv1.PyTorchJobKind, commonutil.JobSuspendedReason), - Message: fmt.Sprintf("PyTorchJob %s is suspended.", name), - Status: corev1.ConditionTrue, - }, - }, testutil.IgnoreJobConditionsTimes)) - - By("Unsuspending the PyTorchJob") - Eventually(func() error { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - created.Spec.RunPolicy.Suspend = ptr.To(false) - return testK8sClient.Update(ctx, created) - }, testutil.Timeout, testutil.Interval).Should(Succeed()) - Eventually(func() *metav1.Time { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - return created.Status.StartTime - }, testutil.Timeout, testutil.Interval).ShouldNot(BeNil()) - - By("Check if the pods and services are created") - Eventually(func() error { - return testK8sClient.Get(ctx, masterKey, masterPod) - }, testutil.Timeout, testutil.Interval).Should(BeNil()) - Eventually(func() error { - return testK8sClient.Get(ctx, worker0Key, workerPod) - }, testutil.Timeout, testutil.Interval).Should(BeNil()) - Eventually(func() error { - return testK8sClient.Get(ctx, masterKey, masterSvc) - }, testutil.Timeout, testutil.Interval).Should(BeNil()) - Eventually(func() error { - return testK8sClient.Get(ctx, worker0Key, workerSvc) - }, testutil.Timeout, testutil.Interval).Should(BeNil()) - - By("Updating Pod's condition with running") - Eventually(func() error { - Expect(testK8sClient.Get(ctx, masterKey, masterPod)).Should(Succeed()) - masterPod.Status.Phase = corev1.PodRunning - return testK8sClient.Status().Update(ctx, masterPod) - }, testutil.Timeout, testutil.Interval).Should(Succeed()) - Eventually(func() error { - Expect(testK8sClient.Get(ctx, worker0Key, workerPod)).Should(Succeed()) - workerPod.Status.Phase = corev1.PodRunning - return testK8sClient.Status().Update(ctx, workerPod) - }, testutil.Timeout, testutil.Interval).Should(Succeed()) - - By("Checking if the PyTorchJob has resumed conditions") - Eventually(func() []kubeflowv1.JobCondition { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - return created.Status.Conditions - }, testutil.Timeout, testutil.Interval).Should(BeComparableTo([]kubeflowv1.JobCondition{ - { - Type: kubeflowv1.JobCreated, - Status: corev1.ConditionTrue, - Reason: commonutil.NewReason(kubeflowv1.PyTorchJobKind, commonutil.JobCreatedReason), - Message: fmt.Sprintf("PyTorchJob %s is created.", name), - }, - { - Type: kubeflowv1.JobSuspended, - Reason: commonutil.NewReason(kubeflowv1.PyTorchJobKind, commonutil.JobResumedReason), - Message: fmt.Sprintf("PyTorchJob %s is resumed.", name), - Status: corev1.ConditionFalse, - }, - { - Type: kubeflowv1.JobRunning, - Status: corev1.ConditionTrue, - Reason: commonutil.NewReason(kubeflowv1.PyTorchJobKind, commonutil.JobRunningReason), - Message: fmt.Sprintf("PyTorchJob %s is running.", name), - }, - }, testutil.IgnoreJobConditionsTimes)) - - By("Checking if the startTime is updated") - Expect(created.Status.StartTime).ShouldNot(Equal(startTimeBeforeSuspended)) - }) - - It("Should not reconcile a job while managed by external controller", func() { - By("Creating a PyTorchJob managed by external controller") - job.Spec.RunPolicy = kubeflowv1.RunPolicy{ - ManagedBy: ptr.To(kubeflowv1.MultiKueueController), - } - job.Spec.RunPolicy.Suspend = ptr.To(true) - job.Spec.PyTorchReplicaSpecs[kubeflowv1.PyTorchJobReplicaTypeWorker].Replicas = ptr.To[int32](1) - Expect(testK8sClient.Create(ctx, job)).Should(Succeed()) - - created := &kubeflowv1.PyTorchJob{} - By("Checking created PyTorchJob") - Eventually(func() bool { - err := testK8sClient.Get(ctx, jobKey, created) - return err == nil - }, testutil.Timeout, testutil.Interval).Should(BeTrue()) - - By("Checking created PyTorchJob has a nil startTime") - Consistently(func() *metav1.Time { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - return created.Status.StartTime - }, testutil.ConsistentDuration, testutil.Interval).Should(BeNil()) - - By("Checking if the pods and services aren't created") - Consistently(func() bool { - masterPod := &corev1.Pod{} - workerPod := &corev1.Pod{} - masterSvc := &corev1.Service{} - workerSvc := &corev1.Service{} - errMasterPod := testK8sClient.Get(ctx, masterKey, masterPod) - errWorkerPod := testK8sClient.Get(ctx, worker0Key, workerPod) - errMasterSvc := testK8sClient.Get(ctx, masterKey, masterSvc) - errWorkerSvc := testK8sClient.Get(ctx, worker0Key, workerSvc) - return errors.IsNotFound(errMasterPod) && errors.IsNotFound(errWorkerPod) && - errors.IsNotFound(errMasterSvc) && errors.IsNotFound(errWorkerSvc) - }, testutil.ConsistentDuration, testutil.Interval).Should(BeTrue(), "pods and services should be created by external controller (here not existent)") - - By("Checking if the PyTorchJob status was not updated") - Eventually(func() []kubeflowv1.JobCondition { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - return created.Status.Conditions - }, testutil.Timeout, testutil.Interval).Should(BeComparableTo([]kubeflowv1.JobCondition(nil))) - - By("Unsuspending the PyTorchJob") - Eventually(func() error { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - created.Spec.RunPolicy.Suspend = ptr.To(false) - return testK8sClient.Update(ctx, created) - }, testutil.Timeout, testutil.Interval).Should(Succeed()) - - By("Checking created PyTorchJob still has a nil startTime") - Consistently(func() *metav1.Time { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - return created.Status.StartTime - }, testutil.ConsistentDuration, testutil.Interval).Should(BeNil()) - - By("Checking if the PyTorchJob status was not updated, even after unsuspending") - Eventually(func() []kubeflowv1.JobCondition { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - return created.Status.Conditions - }, testutil.Timeout, testutil.Interval).Should(BeComparableTo([]kubeflowv1.JobCondition(nil))) - - }) - }) - - Context("When creating the elastic PyTorchJob", func() { - const name = "elastic-job" - var ( - ctx = context.Background() - ns *corev1.Namespace - job *kubeflowv1.PyTorchJob - jobKey types.NamespacedName - workerKey types.NamespacedName - backendC10D = kubeflowv1.BackendC10D - minReplicas = int32(1) - maxReplicas = int32(3) - maxRestarts = int32(3) - ) - BeforeEach(func() { - ns = &corev1.Namespace{ - ObjectMeta: metav1.ObjectMeta{ - GenerateName: "elastic-pytorch-test-", - }, - } - Expect(testK8sClient.Create(ctx, ns)) - - job = newPyTorchJobForTest(name, ns.Name) - jobKey = client.ObjectKeyFromObject(job) - workerKey = types.NamespacedName{ - Name: fmt.Sprintf("%s-worker-0", name), - Namespace: ns.Name, - } - // Define the expected elastic policy. - job.Spec.ElasticPolicy = &kubeflowv1.ElasticPolicy{ - RDZVBackend: &backendC10D, - MinReplicas: &minReplicas, - MaxReplicas: &maxReplicas, - MaxRestarts: &maxRestarts, - Metrics: []autoscalingv2.MetricSpec{ - { - Type: autoscalingv2.ResourceMetricSourceType, - Resource: &autoscalingv2.ResourceMetricSource{ - Name: corev1.ResourceCPU, - Target: autoscalingv2.MetricTarget{ - Type: autoscalingv2.UtilizationMetricType, - AverageValue: resource.NewQuantity(80, resource.DecimalSI), - }, - }, - }, - }, - } - job.Spec.PyTorchReplicaSpecs = map[kubeflowv1.ReplicaType]*kubeflowv1.ReplicaSpec{ - kubeflowv1.PyTorchJobReplicaTypeWorker: { - Replicas: ptr.To[int32](1), - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Image: "test-image", - Name: kubeflowv1.PyTorchJobDefaultContainerName, - Ports: []corev1.ContainerPort{ - { - Name: kubeflowv1.PyTorchJobDefaultPortName, - ContainerPort: expectedPort, - Protocol: corev1.ProtocolTCP, - }, - }, - }, - }, - }, - }, - }, - } - }) - AfterEach(func() { - Expect(testK8sClient.Delete(ctx, job)).Should(Succeed()) - Expect(testK8sClient.Delete(ctx, ns)).Should(Succeed()) - }) - // TODO(gaocegege): Test with more than 1 worker. - It("Should get the corresponding resources successfully", func() { - By("By creating a new PyTorchJob") - Expect(testK8sClient.Create(ctx, job)).Should(Succeed()) - - created := &kubeflowv1.PyTorchJob{} - - // We'll need to retry getting this newly created PyTorchJob, given that creation may not immediately happen. - Eventually(func() bool { - err := testK8sClient.Get(ctx, jobKey, created) - return err == nil - }, testutil.Timeout, testutil.Interval).Should(BeTrue()) - - pod := &corev1.Pod{} - Eventually(func() bool { - err := testK8sClient.Get(ctx, workerKey, pod) - return err == nil - }, testutil.Timeout, testutil.Interval).Should(BeTrue()) - - svc := &corev1.Service{} - Eventually(func() bool { - err := testK8sClient.Get(ctx, workerKey, svc) - return err == nil - }, testutil.Timeout, testutil.Interval).Should(BeTrue()) - - hpa := &autoscalingv2.HorizontalPodAutoscaler{} - Eventually(func() error { - return testK8sClient.Get(ctx, jobKey, hpa) - }, testutil.Timeout, testutil.Interval).Should(BeNil()) - - // Check pod port. - Expect(pod.Spec.Containers[0].Ports).To(ContainElement(corev1.ContainerPort{ - Name: kubeflowv1.PyTorchJobDefaultPortName, - ContainerPort: expectedPort, - Protocol: corev1.ProtocolTCP})) - // Check environment variables. - Expect(pod.Spec.Containers[0].Env).To(ContainElements(corev1.EnvVar{ - Name: EnvRDZVBackend, - Value: string(backendC10D), - }, corev1.EnvVar{ - Name: EnvNnodes, - Value: fmt.Sprintf("%d:%d", minReplicas, maxReplicas), - }, corev1.EnvVar{ - Name: EnvRDZVEndpoint, - Value: fmt.Sprintf("%s:%d", svc.Name, expectedPort), - }, corev1.EnvVar{ - Name: EnvMaxRestarts, - Value: fmt.Sprintf("%d", maxRestarts), - })) - Expect(svc.Spec.Ports[0].Port).To(Equal(expectedPort)) - // Check owner references. - trueVal := true - Expect(pod.OwnerReferences).To(ContainElement(metav1.OwnerReference{ - APIVersion: kubeflowv1.SchemeGroupVersion.String(), - Kind: kubeflowv1.PyTorchJobKind, - Name: name, - UID: created.UID, - Controller: &trueVal, - BlockOwnerDeletion: &trueVal, - })) - Expect(svc.OwnerReferences).To(ContainElement(metav1.OwnerReference{ - APIVersion: kubeflowv1.SchemeGroupVersion.String(), - Kind: kubeflowv1.PyTorchJobKind, - Name: name, - UID: created.UID, - Controller: &trueVal, - BlockOwnerDeletion: &trueVal, - })) - - // Test job status. - Eventually(func() error { - Expect(testK8sClient.Get(ctx, workerKey, pod)).Should(Succeed()) - pod.Status.Phase = corev1.PodSucceeded - return testK8sClient.Status().Update(ctx, pod) - }, testutil.Timeout, testutil.Interval).Should(Succeed()) - Eventually(func() bool { - err := testK8sClient.Get(ctx, jobKey, created) - if err != nil { - return false - } - return created.Status.ReplicaStatuses != nil && created.Status. - ReplicaStatuses[kubeflowv1.PyTorchJobReplicaTypeWorker].Succeeded == 1 - }, testutil.Timeout, testutil.Interval).Should(BeTrue()) - // Check if the job is succeeded. - cond := getCondition(created.Status, kubeflowv1.JobSucceeded) - Expect(cond.Status).To(Equal(corev1.ConditionTrue)) - }) - It("Should delete HPA once the PyTorchJob is suspended", func() { - By("By creating a new PyTorchJob") - Expect(testK8sClient.Create(ctx, job)).Should(Succeed()) - - created := &kubeflowv1.PyTorchJob{} - hpa := &autoscalingv2.HorizontalPodAutoscaler{} - - By("Checking if the PyTorchJob and HPA are created") - Eventually(func() error { - return testK8sClient.Get(ctx, jobKey, created) - }, testutil.Timeout, testutil.Interval).Should(BeNil()) - Eventually(func() error { - return testK8sClient.Get(ctx, jobKey, hpa) - }, testutil.Timeout, testutil.Interval).Should(BeNil()) - - By("Suspending PyTorchJob") - Eventually(func() error { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - created.Spec.RunPolicy.Suspend = ptr.To(true) - return testK8sClient.Update(ctx, created) - }, testutil.Timeout, testutil.Interval).Should(Succeed()) - - By("Checking if the HPA is deleted") - Eventually(func() bool { - return errors.IsNotFound(testK8sClient.Get(ctx, jobKey, hpa)) - }, testutil.Timeout, testutil.Interval).Should(BeTrue()) - }) - }) -}) - -func newPyTorchJobForTest(name, namespace string) *kubeflowv1.PyTorchJob { - return &kubeflowv1.PyTorchJob{ - ObjectMeta: metav1.ObjectMeta{ - Name: name, - Namespace: namespace, - }, - } -} - -// getCondition returns the condition with the provided type. -func getCondition(status kubeflowv1.JobStatus, condType kubeflowv1.JobConditionType) *kubeflowv1.JobCondition { - for _, condition := range status.Conditions { - if condition.Type == condType { - return &condition - } - } - return nil -} diff --git a/pkg/controller.v1/register_controller.go b/pkg/controller.v1/register_controller.go deleted file mode 100644 index ea099ced14..0000000000 --- a/pkg/controller.v1/register_controller.go +++ /dev/null @@ -1,83 +0,0 @@ -// Copyright 2021 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package controller_v1 - -import ( - "fmt" - "strings" - - kubeflowv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - "github.com/kubeflow/training-operator/pkg/controller.v1/common" - jaxcontroller "github.com/kubeflow/training-operator/pkg/controller.v1/jax" - mpicontroller "github.com/kubeflow/training-operator/pkg/controller.v1/mpi" - paddlecontroller "github.com/kubeflow/training-operator/pkg/controller.v1/paddlepaddle" - pytorchcontroller "github.com/kubeflow/training-operator/pkg/controller.v1/pytorch" - tensorflowcontroller "github.com/kubeflow/training-operator/pkg/controller.v1/tensorflow" - xgboostcontroller "github.com/kubeflow/training-operator/pkg/controller.v1/xgboost" - - "sigs.k8s.io/controller-runtime/pkg/manager" -) - -const ErrTemplateSchemeNotSupported = "scheme %s is not supported yet" - -type ReconcilerSetupFunc func(manager manager.Manager, gangSchedulingSetupFunc common.GangSchedulingSetupFunc, controllerThreads int) error - -var SupportedSchemeReconciler = map[string]ReconcilerSetupFunc{ - kubeflowv1.TFJobKind: func(mgr manager.Manager, gangSchedulingSetupFunc common.GangSchedulingSetupFunc, controllerThreads int) error { - return tensorflowcontroller.NewReconciler(mgr, gangSchedulingSetupFunc).SetupWithManager(mgr, controllerThreads) - }, - kubeflowv1.PyTorchJobKind: func(mgr manager.Manager, gangSchedulingSetupFunc common.GangSchedulingSetupFunc, controllerThreads int) error { - return pytorchcontroller.NewReconciler(mgr, gangSchedulingSetupFunc).SetupWithManager(mgr, controllerThreads) - }, - kubeflowv1.XGBoostJobKind: func(mgr manager.Manager, gangSchedulingSetupFunc common.GangSchedulingSetupFunc, controllerThreads int) error { - return xgboostcontroller.NewReconciler(mgr, gangSchedulingSetupFunc).SetupWithManager(mgr, controllerThreads) - }, - kubeflowv1.MPIJobKind: func(mgr manager.Manager, gangSchedulingSetupFunc common.GangSchedulingSetupFunc, controllerThreads int) error { - return mpicontroller.NewReconciler(mgr, gangSchedulingSetupFunc).SetupWithManager(mgr, controllerThreads) - }, - kubeflowv1.PaddleJobKind: func(mgr manager.Manager, gangSchedulingSetupFunc common.GangSchedulingSetupFunc, controllerThreads int) error { - return paddlecontroller.NewReconciler(mgr, gangSchedulingSetupFunc).SetupWithManager(mgr, controllerThreads) - }, - kubeflowv1.JAXJobKind: func(mgr manager.Manager, gangSchedulingSetupFunc common.GangSchedulingSetupFunc, controllerThreads int) error { - return jaxcontroller.NewReconciler(mgr, gangSchedulingSetupFunc).SetupWithManager(mgr, controllerThreads) - }, -} - -type EnabledSchemes []string - -func (es *EnabledSchemes) String() string { - return strings.Join(*es, ",") -} - -func (es *EnabledSchemes) Set(kind string) error { - kind = strings.ToLower(kind) - for supportedKind := range SupportedSchemeReconciler { - if strings.ToLower(supportedKind) == kind { - *es = append(*es, supportedKind) - return nil - } - } - return fmt.Errorf(ErrTemplateSchemeNotSupported, kind) -} - -func (es *EnabledSchemes) FillAll() { - for supportedKind := range SupportedSchemeReconciler { - *es = append(*es, supportedKind) - } -} - -func (es *EnabledSchemes) Empty() bool { - return len(*es) == 0 -} diff --git a/pkg/controller.v1/register_controller_test.go b/pkg/controller.v1/register_controller_test.go deleted file mode 100644 index 91b698e006..0000000000 --- a/pkg/controller.v1/register_controller_test.go +++ /dev/null @@ -1,76 +0,0 @@ -// Copyright 2022 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package controller_v1 - -import ( - "testing" - - kubeflowv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" -) - -func TestEnabledSchemes(t *testing.T) { - testES := EnabledSchemes{} - - if testES.String() != "" { - t.Errorf("empty EnabledSchemes converted no-empty string %s", testES.String()) - } - - if !testES.Empty() { - t.Error("Empty method returned false for empty EnabledSchemes") - } - - if testES.Set("TFJob") != nil { - t.Error("failed to restore TFJob") - } else { - stored := false - for _, kind := range testES { - if kind == kubeflowv1.TFJobKind { - stored = true - } - } - if !stored { - t.Errorf("%s not successfully registered", kubeflowv1.TFJobKind) - } - } - - if testES.Set("mpijob") != nil { - t.Error("failed to restore PyTorchJob(pytorchjob)") - } else { - stored := false - for _, kind := range testES { - if kind == kubeflowv1.MPIJobKind { - stored = true - } - } - if !stored { - t.Errorf("%s not successfully registered", kubeflowv1.MPIJobKind) - } - } - - dummyJob := "dummyjob" - if testES.Set(dummyJob) == nil { - t.Errorf("successfully registered non-supported job %s", dummyJob) - } - - if testES.Empty() { - t.Error("Empty method returned true for non-empty EnabledSchemes") - } - - es2 := EnabledSchemes{} - es2.FillAll() - if es2.Empty() { - t.Error("Empty method returned true for fully registered EnabledSchemes") - } -} diff --git a/pkg/controller.v1/tensorflow/job_test.go b/pkg/controller.v1/tensorflow/job_test.go deleted file mode 100644 index df146ef15a..0000000000 --- a/pkg/controller.v1/tensorflow/job_test.go +++ /dev/null @@ -1,826 +0,0 @@ -// Copyright 2021 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package tensorflow - -import ( - "context" - "fmt" - "strconv" - "time" - - "github.com/google/go-cmp/cmp/cmpopts" - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" - corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/errors" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/types" - "k8s.io/apimachinery/pkg/util/intstr" - "k8s.io/apimachinery/pkg/util/uuid" - "k8s.io/utils/ptr" - "sigs.k8s.io/controller-runtime/pkg/client" - - kubeflowv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - "github.com/kubeflow/training-operator/pkg/controller.v1/common" - tftestutil "github.com/kubeflow/training-operator/pkg/controller.v1/tensorflow/testutil" - commonutil "github.com/kubeflow/training-operator/pkg/util" - "github.com/kubeflow/training-operator/pkg/util/testutil" -) - -var _ = Describe("TFJob controller", func() { - Context("Test Add TFJob", func() { - It("should get the exact TFJob", func() { - By("submitting an TFJob") - - testJobName := "test-case-12" - testNamespace := metav1.NamespaceDefault - - decoyJobName := "decoy-case-34" - - ctx := context.Background() - - tfJob := tftestutil.NewTFJob(1, 0) - tfJob.SetName(testJobName) - tfJob.SetNamespace(testNamespace) - - decoyJob := tftestutil.NewTFJob(2, 3) - decoyJob.SetName(decoyJobName) - decoyJob.SetNamespace(testNamespace) - - Expect(testK8sClient.Create(ctx, tfJob)).Should(Succeed()) - Expect(testK8sClient.Create(ctx, decoyJob)).Should(Succeed()) - - key := types.NamespacedName{ - Namespace: testNamespace, - Name: testJobName, - } - Eventually(func() error { - job := &kubeflowv1.TFJob{} - return reconciler.Get(ctx, key, job) - }, testutil.Timeout, testutil.Interval).Should(BeNil()) - - Expect(testK8sClient.Delete(ctx, tfJob)).Should(Succeed()) - Expect(testK8sClient.Delete(ctx, decoyJob)).Should(Succeed()) - }) - }) - - Context("Test Copy Labels and Annotation", func() { - It("should copy labels and annotation from the spec to generated Pods", func() { - ctx := context.Background() - testAnnotationKey := "annotation1" - testAnnotationVal := "1" - testLabelKey := "label1" - testLabelVal := "1" - - testJobName := "test-copy-labels-anno" - tfjob := tftestutil.NewTFJob(1, 0) - tfjob.SetName(testJobName) - annotations := map[string]string{ - testAnnotationKey: testAnnotationVal, - } - labels := map[string]string{ - testLabelKey: testLabelVal, - } - tfjob.Spec.TFReplicaSpecs[kubeflowv1.TFJobReplicaTypeWorker].Template.Labels = labels - tfjob.Spec.TFReplicaSpecs[kubeflowv1.TFJobReplicaTypeWorker].Template.Annotations = annotations - - By("submitting an TFJob with specific labels and annotations") - Expect(testK8sClient.Create(ctx, tfjob)).Should(Succeed()) - - Eventually(func() error { - pod := &corev1.Pod{} - key := types.NamespacedName{ - Namespace: metav1.NamespaceDefault, - Name: common.GenGeneralName(tfjob.Name, "worker", "0"), - } - err := testK8sClient.Get(ctx, key, pod) - if err != nil { - return err - } - - if pod.Annotations == nil { - return fmt.Errorf("annotation of %s/%s is nil", pod.GetNamespace(), pod.GetName()) - } - if val, exist := pod.Annotations[testAnnotationKey]; exist { - if val != testAnnotationVal { - return fmt.Errorf("annotation of %s not match with %s", testAnnotationKey, testAnnotationVal) - } - } else { - return fmt.Errorf("annotation %s not found", testAnnotationKey) - } - - if pod.Labels == nil { - return fmt.Errorf("label of %s/%s is nil", pod.GetNamespace(), pod.GetName()) - } - if val, exist := pod.Labels[testLabelKey]; exist { - if val != testLabelVal { - return fmt.Errorf("annotation of %s not match with %s", testLabelKey, testLabelVal) - } - } else { - return fmt.Errorf("label %s not found", testLabelKey) - } - - return nil - }, testutil.Timeout, testutil.Interval).Should(BeNil()) - }) - }) - - Context("Test Delete Pods and Services", func() { - It("it should clean associated Pods and Services according to clean policy", func() { - type testCase struct { - description string - tfJob *kubeflowv1.TFJob - - pendingWorkerPods int32 - activeWorkerPods int32 - succeededWorkerPods int32 - failedWorkerPods int32 - - pendingPSPods int32 - activePSPods int32 - succeededPSPods int32 - failedPSPods int32 - - activeWorkerServices int32 - activePSServices int32 - - expectedPodRemaining int - } - - testCases := []testCase{ - { - description: "4 workers and 2 ps is running, policy is all", - tfJob: tftestutil.NewTFJobWithCleanPolicy(0, 4, 2, kubeflowv1.CleanPodPolicyAll), - - pendingWorkerPods: 0, - activeWorkerPods: 4, - succeededWorkerPods: 0, - failedWorkerPods: 0, - - pendingPSPods: 0, - activePSPods: 2, - succeededPSPods: 0, - failedPSPods: 0, - - activeWorkerServices: 4, - activePSServices: 2, - - expectedPodRemaining: 0, - }, - { - description: "4 workers and 2 ps is running, policy is running", - tfJob: tftestutil.NewTFJobWithCleanPolicy(0, 4, 2, kubeflowv1.CleanPodPolicyRunning), - - pendingWorkerPods: 0, - activeWorkerPods: 4, - succeededWorkerPods: 0, - failedWorkerPods: 0, - - pendingPSPods: 0, - activePSPods: 2, - succeededPSPods: 0, - failedPSPods: 0, - - activeWorkerServices: 4, - activePSServices: 2, - - expectedPodRemaining: 0, - }, - { - description: "4 workers and 2 ps is succeeded, policy is running", - tfJob: tftestutil.NewTFJobWithCleanPolicy(0, 4, 2, kubeflowv1.CleanPodPolicyRunning), - - pendingWorkerPods: 0, - activeWorkerPods: 0, - succeededWorkerPods: 4, - failedWorkerPods: 0, - - pendingPSPods: 0, - activePSPods: 0, - succeededPSPods: 2, - failedPSPods: 0, - - activeWorkerServices: 4, - activePSServices: 2, - - expectedPodRemaining: 6, - }, - { - description: "4 workers and 2 ps is succeeded, policy is None", - tfJob: tftestutil.NewTFJobWithCleanPolicy(0, 4, 2, kubeflowv1.CleanPodPolicyNone), - - pendingWorkerPods: 0, - activeWorkerPods: 0, - succeededWorkerPods: 4, - failedWorkerPods: 0, - - pendingPSPods: 0, - activePSPods: 0, - succeededPSPods: 2, - failedPSPods: 0, - - activeWorkerServices: 4, - activePSServices: 2, - - expectedPodRemaining: 6, - }, - } - - jobNameTemplate := "test-del-pod-svc-%d" - for idx, tc := range testCases { - By(fmt.Sprintf("preparing cases %s", tc.description)) - ctx := context.Background() - tc.tfJob.SetName(fmt.Sprintf(jobNameTemplate, idx)) - tc.tfJob.SetUID(uuid.NewUUID()) - commonutil.UpdateJobConditions(&tc.tfJob.Status, kubeflowv1.JobSucceeded, corev1.ConditionTrue, commonutil.NewReason(kubeflowv1.TFJobKind, commonutil.JobSucceededReason), "") - - refs := []metav1.OwnerReference{ - *reconciler.GenOwnerReference(tc.tfJob), - } - - basicLabels := reconciler.GenLabels(tc.tfJob.GetName()) - selector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{ - MatchLabels: basicLabels, - }) - Expect(err).Should(BeNil()) - listOpt := client.MatchingLabelsSelector{ - Selector: selector, - } - - By("creating Services and Pods with designed phases") - tftestutil.SetPodsStatuses(testK8sClient, tc.tfJob, kubeflowv1.TFJobReplicaTypeWorker, - tc.pendingWorkerPods, tc.activeWorkerPods, tc.succeededWorkerPods, tc.failedWorkerPods, - nil, refs, basicLabels) - tftestutil.SetPodsStatuses(testK8sClient, tc.tfJob, kubeflowv1.TFJobReplicaTypePS, - tc.pendingPSPods, tc.activePSPods, tc.succeededPSPods, tc.failedPSPods, - nil, refs, basicLabels) - - tftestutil.SetServices(testK8sClient, tc.tfJob, kubeflowv1.TFJobReplicaTypeWorker, tc.activeWorkerServices, refs, basicLabels) - tftestutil.SetServices(testK8sClient, tc.tfJob, kubeflowv1.TFJobReplicaTypePS, tc.activePSServices, refs, basicLabels) - - podList := &corev1.PodList{} - Expect(testK8sClient.List(ctx, podList, listOpt)).Should(Succeed()) - Expect(len(podList.Items)).To(Equal( - int(tc.pendingPSPods + tc.activePSPods + tc.failedPSPods + tc.succeededPSPods + - tc.pendingWorkerPods + tc.activeWorkerPods + tc.failedWorkerPods + tc.succeededWorkerPods))) - - By("calling ReconcileJob") - _ = reconciler.ReconcileJobs(tc.tfJob, tc.tfJob.Spec.TFReplicaSpecs, tc.tfJob.Status, &tc.tfJob.Spec.RunPolicy) - - podList = &corev1.PodList{} - Expect(testK8sClient.List(ctx, podList, listOpt, client.InNamespace(tc.tfJob.GetNamespace()))).Should(Succeed()) - podRemainingCount := len(podList.Items) - Expect(podRemainingCount).To(Equal(tc.expectedPodRemaining)) - - svcList := &corev1.ServiceList{} - Expect(testK8sClient.List(ctx, svcList, listOpt)).Should(Succeed()) - svcRemainingCount := len(svcList.Items) - Expect(svcRemainingCount).To(Equal(tc.expectedPodRemaining)) - } - }) - }) - - Context("Test Active Deadline Seconds", func() { - It("clean desired Pods and Services according to TFJob config", func() { - type testCase struct { - description string - tfJob *kubeflowv1.TFJob - - pendingWorkerPods int32 - activeWorkerPods int32 - succeededWorkerPods int32 - failedWorkerPods int32 - - pendingPSPods int32 - activePSPods int32 - succeededPSPods int32 - failedPSPods int32 - - activeWorkerServices int32 - activePSServices int32 - - expectedPodRemaining int - } - - ads2 := int64(2) - adsTest2 := &ads2 - testCases := []testCase{ - { - description: "4 workers and 2 ps is running, ActiveDeadlineSeconds unset", - tfJob: tftestutil.NewTFJobWithActiveDeadlineSeconds(0, 4, 2, nil), - - pendingWorkerPods: 0, - activeWorkerPods: 4, - succeededWorkerPods: 0, - failedWorkerPods: 0, - - pendingPSPods: 0, - activePSPods: 2, - succeededPSPods: 0, - failedPSPods: 0, - - activeWorkerServices: 4, - activePSServices: 2, - - expectedPodRemaining: 6, - }, - { - description: "4 workers and 2 ps is running, ActiveDeadlineSeconds is 2", - tfJob: tftestutil.NewTFJobWithActiveDeadlineSeconds(0, 4, 2, adsTest2), - - pendingWorkerPods: 0, - activeWorkerPods: 4, - succeededWorkerPods: 0, - failedWorkerPods: 0, - - pendingPSPods: 0, - activePSPods: 2, - succeededPSPods: 0, - failedPSPods: 0, - - activeWorkerServices: 4, - activePSServices: 2, - - expectedPodRemaining: 0, - }, - } - jobNameTemplate := "test-ads-%d" - for idx, tc := range testCases { - By(fmt.Sprintf("preparing cases %s", tc.description)) - ctx := context.Background() - tc.tfJob.SetName(fmt.Sprintf(jobNameTemplate, idx)) - tc.tfJob.SetUID(uuid.NewUUID()) - - refs := []metav1.OwnerReference{ - *reconciler.GenOwnerReference(tc.tfJob), - } - - basicLabels := reconciler.GenLabels(tc.tfJob.GetName()) - selector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{ - MatchLabels: basicLabels, - }) - Expect(err).Should(BeNil()) - listOpt := client.MatchingLabelsSelector{ - Selector: selector, - } - - By("creating Services and Pods with designed phases") - tftestutil.SetPodsStatuses(testK8sClient, tc.tfJob, kubeflowv1.TFJobReplicaTypeWorker, - tc.pendingWorkerPods, tc.activeWorkerPods, tc.succeededWorkerPods, tc.failedWorkerPods, - nil, refs, basicLabels) - tftestutil.SetPodsStatuses(testK8sClient, tc.tfJob, kubeflowv1.TFJobReplicaTypePS, - tc.pendingPSPods, tc.activePSPods, tc.succeededPSPods, tc.failedPSPods, - nil, refs, basicLabels) - - tftestutil.SetServices(testK8sClient, tc.tfJob, kubeflowv1.TFJobReplicaTypeWorker, tc.activeWorkerServices, refs, basicLabels) - tftestutil.SetServices(testK8sClient, tc.tfJob, kubeflowv1.TFJobReplicaTypePS, tc.activePSServices, refs, basicLabels) - - podList := &corev1.PodList{} - Expect(testK8sClient.List(ctx, podList, listOpt)).Should(Succeed()) - Expect(len(podList.Items)).To(Equal( - int(tc.pendingPSPods + tc.activePSPods + tc.failedPSPods + tc.succeededPSPods + - tc.pendingWorkerPods + tc.activeWorkerPods + tc.failedWorkerPods + tc.succeededWorkerPods))) - - By("waiting enough time") - now := metav1.Now() - tc.tfJob.Status.StartTime = &now - ads := tc.tfJob.Spec.RunPolicy.ActiveDeadlineSeconds - if ads != nil { - dur := time.Second * time.Duration(*ads) - time.Sleep(dur) - } - - By("calling ReconcileJob") - _ = reconciler.ReconcileJobs(tc.tfJob, tc.tfJob.Spec.TFReplicaSpecs, tc.tfJob.Status, &tc.tfJob.Spec.RunPolicy) - - podList = &corev1.PodList{} - Expect(testK8sClient.List(ctx, podList, listOpt, client.InNamespace(tc.tfJob.GetNamespace()))).Should(Succeed()) - podRemainingCount := len(podList.Items) - Expect(podRemainingCount).To(Equal(tc.expectedPodRemaining)) - - svcList := &corev1.ServiceList{} - Expect(testK8sClient.List(ctx, svcList, listOpt)).Should(Succeed()) - svcRemainingCount := len(svcList.Items) - Expect(svcRemainingCount).To(Equal(tc.expectedPodRemaining)) - } - }) - }) - - Context("Test Backoff For On Failure(", func() { - It("clean desired Pods and Services according to TFJob config", func() { - type testCase struct { - description string - tfJob *kubeflowv1.TFJob - - pendingWorkerPods int32 - activeWorkerPods int32 - succeededWorkerPods int32 - failedWorkerPods int32 - - restartCounts []int32 - - pendingPSPods int32 - activePSPods int32 - succeededPSPods int32 - failedPSPods int32 - - activeWorkerServices int32 - activePSServices int32 - - expectedPodRemaining int - } - - backoffLimit4 := int32(4) - backoffLimitTest4 := &backoffLimit4 - testCases := []testCase{ - { - description: "4 workers each having 1 restartCount and 2 ps is running, backoffLimit 4 ", - tfJob: tftestutil.NewTFJobWithBackoffLimit(0, 4, 2, backoffLimitTest4), - - pendingWorkerPods: 0, - activeWorkerPods: 4, - succeededWorkerPods: 0, - failedWorkerPods: 0, - - restartCounts: []int32{1, 1, 1, 1}, - - pendingPSPods: 0, - activePSPods: 2, - succeededPSPods: 0, - failedPSPods: 0, - - activeWorkerServices: 4, - activePSServices: 2, - - expectedPodRemaining: 0, - }, - } - - jobNameTemplate := "test-bof-%d" - for idx, tc := range testCases { - By(fmt.Sprintf("preparing cases %s", tc.description)) - ctx := context.Background() - tc.tfJob.SetName(fmt.Sprintf(jobNameTemplate, idx)) - tc.tfJob.SetUID(uuid.NewUUID()) - - refs := []metav1.OwnerReference{ - *reconciler.GenOwnerReference(tc.tfJob), - } - - basicLabels := reconciler.GenLabels(tc.tfJob.GetName()) - selector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{ - MatchLabels: basicLabels, - }) - Expect(err).Should(BeNil()) - listOpt := client.MatchingLabelsSelector{ - Selector: selector, - } - - By("creating Services and Pods with designed phases") - tftestutil.SetPodsStatuses(testK8sClient, tc.tfJob, kubeflowv1.TFJobReplicaTypeWorker, - tc.pendingWorkerPods, tc.activeWorkerPods, tc.succeededWorkerPods, tc.failedWorkerPods, - tc.restartCounts, refs, basicLabels) - tftestutil.SetPodsStatuses(testK8sClient, tc.tfJob, kubeflowv1.TFJobReplicaTypePS, - tc.pendingPSPods, tc.activePSPods, tc.succeededPSPods, tc.failedPSPods, - tc.restartCounts, refs, basicLabels) - - tftestutil.SetServices(testK8sClient, tc.tfJob, kubeflowv1.TFJobReplicaTypeWorker, tc.activeWorkerServices, refs, basicLabels) - tftestutil.SetServices(testK8sClient, tc.tfJob, kubeflowv1.TFJobReplicaTypePS, tc.activePSServices, refs, basicLabels) - - podList := &corev1.PodList{} - Expect(testK8sClient.List(ctx, podList, listOpt)).Should(Succeed()) - Expect(len(podList.Items)).To(Equal( - int(tc.pendingPSPods + tc.activePSPods + tc.failedPSPods + tc.succeededPSPods + - tc.pendingWorkerPods + tc.activeWorkerPods + tc.failedWorkerPods + tc.succeededWorkerPods))) - - By("calling ReconcileJob") - _ = reconciler.ReconcileJobs(tc.tfJob, tc.tfJob.Spec.TFReplicaSpecs, tc.tfJob.Status, &tc.tfJob.Spec.RunPolicy) - - podList = &corev1.PodList{} - Expect(testK8sClient.List(ctx, podList, listOpt, client.InNamespace(tc.tfJob.GetNamespace()))).Should(Succeed()) - podRemainingCount := len(podList.Items) - Expect(podRemainingCount).To(Equal(tc.expectedPodRemaining)) - - svcList := &corev1.ServiceList{} - Expect(testK8sClient.List(ctx, svcList, listOpt)).Should(Succeed()) - svcRemainingCount := len(svcList.Items) - Expect(svcRemainingCount).To(Equal(tc.expectedPodRemaining)) - } - }) - }) - - Context("Test TTL Seconds After Finished", func() { - It("should delete job when expired time is up", func() { - type testCase struct { - description string - tfJob *kubeflowv1.TFJob - phase corev1.PodPhase - } - testCases := []testCase{ - { - description: "succeeded job with TTL 3s", - tfJob: tftestutil.NewTFJobWithCleanupJobDelay(0, 1, 0, ptr.To[int32](3)), - phase: corev1.PodSucceeded, - }, - { - description: "failed job with TTL 3s", - tfJob: tftestutil.NewTFJobWithCleanupJobDelay(0, 1, 0, ptr.To[int32](3)), - phase: corev1.PodFailed, - }, - } - jobNameTemplate := "test-bof-%d" - for idx, tc := range testCases { - By(fmt.Sprintf("preparing cases %s", tc.description)) - ctx := context.Background() - name := fmt.Sprintf(jobNameTemplate, idx) - tc.tfJob.SetName(name) - tc.tfJob.CreationTimestamp = metav1.Now() - - By("creating a TFJob") - Expect(reconciler.Create(ctx, tc.tfJob)).Should(Succeed()) - - // We need to wait for synchronizing cache. - By("getting a created TFJob") - var updatedTFJob kubeflowv1.TFJob - Eventually(func() error { - return reconciler.Get(ctx, client.ObjectKeyFromObject(tc.tfJob), &updatedTFJob) - }, testutil.Timeout, testutil.Interval).Should(BeNil()) - - initializeReplicaStatuses(&updatedTFJob.Status, kubeflowv1.TFJobReplicaTypeWorker) - - By("prepare pod") - refs := []metav1.OwnerReference{ - *reconciler.GenOwnerReference(tc.tfJob), - } - pod := tftestutil.NewBasePod("pod", tc.tfJob, refs) - pod.Status.Phase = tc.phase - - By("update job replica statuses") - updateJobReplicaStatuses(&updatedTFJob.Status, kubeflowv1.TFJobReplicaTypeWorker, pod) - - By("update job status") - Expect(reconciler.UpdateJobStatus(&updatedTFJob, updatedTFJob.Spec.TFReplicaSpecs, &updatedTFJob.Status)).To(Succeed()) - By("updating job status...") - Expect(reconciler.Status().Update(ctx, &updatedTFJob)).To(Succeed()) - - By("waiting for updating replicaStatus for workers") - Eventually(func() *kubeflowv1.ReplicaStatus { - var getTFJob kubeflowv1.TFJob - Expect(reconciler.Get(ctx, client.ObjectKeyFromObject(tc.tfJob), &getTFJob)).Should(Succeed()) - return getTFJob.Status.ReplicaStatuses[kubeflowv1.TFJobReplicaTypeWorker] - }, testutil.Timeout, testutil.Interval).ShouldNot(BeNil()) - - ttl := updatedTFJob.Spec.RunPolicy.TTLSecondsAfterFinished - if ttl != nil { - dur := time.Second * time.Duration(*ttl) - time.Sleep(dur) - } - - Eventually(func() error { - tfJob := &kubeflowv1.TFJob{} - key := types.NamespacedName{ - Namespace: metav1.NamespaceDefault, - Name: name, - } - if err := reconciler.Get(ctx, key, tfJob); err != nil { - if errors.IsNotFound(err) { - return nil - } - return err - } - return fmt.Errorf("job %s still remains", name) - }, testutil.Timeout, testutil.Interval).Should(BeNil()) - } - }) - }) -}) - -var _ = Describe("Test for controller.v1/common", func() { - var ( - ctx = context.Background() - ns *corev1.Namespace - now metav1.Time - ) - BeforeEach(func() { - ns = &corev1.Namespace{ - ObjectMeta: metav1.ObjectMeta{ - GenerateName: "tfjob-ns-", - }, - } - now = metav1.Now() - Expect(testK8sClient.Create(ctx, ns)).Should(Succeed()) - }) - AfterEach(func() { - Expect(testK8sClient.Delete(ctx, ns)).Should(Succeed()) - }) - - type cleanUpCases struct { - tfJob *kubeflowv1.TFJob - runPolicy *kubeflowv1.RunPolicy - jobStatus kubeflowv1.JobStatus - wantTFJobIsRemoved bool - wantErr bool - } - DescribeTable("TFJob is created and is cleaned up", - func(tc *cleanUpCases) { - tc.tfJob.SetNamespace(ns.Name) - Expect(testK8sClient.Create(ctx, tc.tfJob)).Should(Succeed()) - - if tc.wantErr { - Expect(reconciler.CleanupJob(tc.runPolicy, tc.jobStatus, tc.tfJob)).ShouldNot(Succeed()) - } else { - Expect(reconciler.CleanupJob(tc.runPolicy, tc.jobStatus, tc.tfJob)).Should(Succeed()) - } - if tc.wantTFJobIsRemoved { - Eventually(func() bool { - gotErr := testK8sClient.Get(ctx, client.ObjectKeyFromObject(tc.tfJob), &kubeflowv1.TFJob{}) - return errors.IsNotFound(gotErr) - }, testutil.Timeout, testutil.Interval).Should(BeTrue()) - } else { - Eventually(func() error { - return testK8sClient.Get(ctx, client.ObjectKeyFromObject(tc.tfJob), &kubeflowv1.TFJob{}) - }, testutil.Timeout, testutil.Interval).Should(BeNil()) - } - }, - Entry("TFJob shouldn't be removed since TTL is nil", &cleanUpCases{ - tfJob: tftestutil.NewTFJobWithCleanupJobDelay(1, 2, 0, nil), - runPolicy: &kubeflowv1.RunPolicy{ - TTLSecondsAfterFinished: nil, - }, - jobStatus: kubeflowv1.JobStatus{}, - wantTFJobIsRemoved: false, - wantErr: false, - }), - Entry("No error with completionTime is nil if suspended", &cleanUpCases{ - tfJob: tftestutil.NewTFJobWithCleanupJobDelay(1, 2, 0, nil), - runPolicy: &kubeflowv1.RunPolicy{ - TTLSecondsAfterFinished: nil, - Suspend: ptr.To(true), - }, - jobStatus: kubeflowv1.JobStatus{ - CompletionTime: nil, - }, - wantTFJobIsRemoved: false, - wantErr: false, - }), - Entry("No error with TTL is set and completionTime is nil, if suspended", &cleanUpCases{ - tfJob: tftestutil.NewTFJobWithCleanupJobDelay(1, 2, 0, ptr.To[int32](10)), - runPolicy: &kubeflowv1.RunPolicy{ - TTLSecondsAfterFinished: ptr.To[int32](10), - Suspend: ptr.To(true), - }, - jobStatus: kubeflowv1.JobStatus{ - CompletionTime: nil, - }, - wantTFJobIsRemoved: false, - wantErr: false, - }), - Entry("Error is occurred since completionTime is nil", &cleanUpCases{ - tfJob: tftestutil.NewTFJobWithCleanupJobDelay(1, 2, 0, ptr.To[int32](10)), - runPolicy: &kubeflowv1.RunPolicy{ - TTLSecondsAfterFinished: ptr.To[int32](10), - }, - jobStatus: kubeflowv1.JobStatus{ - CompletionTime: nil, - }, - wantTFJobIsRemoved: false, - wantErr: true, - }), - Entry("TFJob is removed since exceeded TTL (TTL is 180s)", &cleanUpCases{ - tfJob: tftestutil.NewTFJobWithCleanupJobDelay(1, 2, 0, ptr.To[int32](180)), - runPolicy: &kubeflowv1.RunPolicy{ - TTLSecondsAfterFinished: ptr.To[int32](180), - }, - jobStatus: kubeflowv1.JobStatus{ - CompletionTime: &metav1.Time{ - Time: now.AddDate(0, 0, -1), - }, - }, - wantTFJobIsRemoved: true, - wantErr: false, - }), - Entry("TFJob is removed since (TTL is 0s)", &cleanUpCases{ - tfJob: tftestutil.NewTFJobWithCleanupJobDelay(1, 2, 0, ptr.To[int32](0)), - runPolicy: &kubeflowv1.RunPolicy{ - TTLSecondsAfterFinished: ptr.To[int32](0), - }, - jobStatus: kubeflowv1.JobStatus{ - CompletionTime: &now, - }, - wantTFJobIsRemoved: true, - wantErr: false, - }), - ) - - type createServiceCases struct { - tfJob *kubeflowv1.TFJob - rType kubeflowv1.ReplicaType - spec *kubeflowv1.ReplicaSpec - uid types.UID - index int - wantErr bool - } - DescribeTable("CreateNewService", - func(tc *createServiceCases) { - tc.tfJob.SetUID(tc.uid) - tc.tfJob.SetNamespace(ns.Name) - - gotErr := reconciler.CreateNewService(tc.tfJob, tc.rType, tc.spec, strconv.Itoa(tc.index)) - if tc.wantErr { - Expect(gotErr).ShouldNot(Succeed()) - } else { - Expect(gotErr).Should(Succeed()) - - svcInternalTPC := corev1.ServiceInternalTrafficPolicyCluster - svcSingleStack := corev1.IPFamilyPolicySingleStack - wantSvc := &corev1.Service{ - ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("%s-%s-%d", tc.tfJob.Name, tc.rType, tc.index), - Namespace: ns.Name, - OwnerReferences: []metav1.OwnerReference{ - *reconciler.GenOwnerReference(tc.tfJob), - }, - Labels: map[string]string{ - kubeflowv1.JobNameLabel: tc.tfJob.Name, - kubeflowv1.OperatorNameLabel: controllerName, - kubeflowv1.ReplicaIndexLabel: strconv.Itoa(tc.index), - kubeflowv1.ReplicaTypeLabel: "", - }, - }, - Spec: corev1.ServiceSpec{ - Ports: []corev1.ServicePort{ - { - Name: kubeflowv1.TFJobDefaultPortName, - Protocol: corev1.ProtocolTCP, - Port: kubeflowv1.TFJobDefaultPort, - TargetPort: intstr.IntOrString{ - IntVal: kubeflowv1.TFJobDefaultPort, - }, - }, - }, - Selector: map[string]string{ - kubeflowv1.JobNameLabel: tc.tfJob.Name, - kubeflowv1.OperatorNameLabel: controllerName, - kubeflowv1.ReplicaIndexLabel: strconv.Itoa(tc.index), - kubeflowv1.ReplicaTypeLabel: "", - }, - ClusterIP: corev1.ClusterIPNone, - Type: corev1.ServiceTypeClusterIP, - ClusterIPs: []string{corev1.ClusterIPNone}, - SessionAffinity: corev1.ClusterIPNone, - IPFamilies: []corev1.IPFamily{corev1.IPv4Protocol}, - IPFamilyPolicy: &svcSingleStack, - InternalTrafficPolicy: &svcInternalTPC, - }, - } - Eventually(func() *corev1.Service { - svc := &corev1.Service{} - Expect(testK8sClient.Get(ctx, client.ObjectKeyFromObject(wantSvc), svc)).Should(Succeed()) - return svc - }, testutil.Timeout, testutil.Interval).Should(BeComparableTo(wantSvc, - cmpopts.IgnoreFields(metav1.ObjectMeta{}, "UID", "ResourceVersion", "Generation", "CreationTimestamp", "ManagedFields"))) - } - }, - Entry("Failed to create service since containerPort is missing", &createServiceCases{ - tfJob: tftestutil.NewTFJobV2(2, 0, 0, 1, 0), - spec: &kubeflowv1.ReplicaSpec{ - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: kubeflowv1.TFJobDefaultContainerName, - }, - }, - }, - }, - }, - index: 0, - wantErr: true, - }), - Entry("Failed to create service since Job's ownerReference is invalid", &createServiceCases{ - tfJob: tftestutil.NewTFJobV2(2, 0, 0, 1, 0), - spec: &kubeflowv1.ReplicaSpec{Template: tftestutil.NewTFReplicaSpecTemplate()}, - index: 1, - wantErr: true, - }), - Entry("Succeeded to create service", &createServiceCases{ - tfJob: tftestutil.NewTFJobV2(2, 0, 0, 1, 0), - spec: &kubeflowv1.ReplicaSpec{Template: tftestutil.NewTFReplicaSpecTemplate()}, - index: 0, - wantErr: false, - uid: uuid.NewUUID(), - }), - ) -}) diff --git a/pkg/controller.v1/tensorflow/pod_test.go b/pkg/controller.v1/tensorflow/pod_test.go deleted file mode 100644 index d813cde005..0000000000 --- a/pkg/controller.v1/tensorflow/pod_test.go +++ /dev/null @@ -1,592 +0,0 @@ -// Copyright 2021 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package tensorflow - -import ( - "context" - "fmt" - "os" - - "github.com/google/go-cmp/cmp/cmpopts" - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" - corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/errors" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/types" - "k8s.io/apimachinery/pkg/util/uuid" - "sigs.k8s.io/controller-runtime/pkg/client" - - kubeflowv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - tftestutil "github.com/kubeflow/training-operator/pkg/controller.v1/tensorflow/testutil" - "github.com/kubeflow/training-operator/pkg/core" - commonutil "github.com/kubeflow/training-operator/pkg/util" - "github.com/kubeflow/training-operator/pkg/util/testutil" -) - -var _ = Describe("TFJob controller", func() { - Context("Test ClusterSpec", func() { - It("should generate desired cluster spec", func() { - type tc struct { - tfJob *kubeflowv1.TFJob - rt string - index string - customClusterDomain string - expectedClusterSpec string - } - testCase := []tc{ - { - tfJob: tftestutil.NewTFJobWithNamespace(1, 0, "ns0"), - rt: "worker", - index: "0", - customClusterDomain: "", - expectedClusterSpec: "", - }, - { - tfJob: tftestutil.NewTFJobWithNamespace(1, 0, "ns1"), - rt: "worker", - index: "0", - customClusterDomain: "tf.training.com", - expectedClusterSpec: "", - }, - { - tfJob: tftestutil.NewTFJobWithNamespace(1, 1, "ns2"), - rt: "worker", - index: "0", - customClusterDomain: "tf.training.org", - expectedClusterSpec: `{"cluster":{"ps":["` + tftestutil.TestTFJobName + - `-ps-0.ns2.svc.tf.training.org:2222"],"worker":["` + tftestutil.TestTFJobName + - `-worker-0.ns2.svc.tf.training.org:2222"]},"task":{"type":"worker","index":0},"environment":"cloud"}`, - }, - { - tfJob: tftestutil.NewTFJobWithEvaluatorAndNamespace(1, 1, 1, "ns3"), - rt: "worker", - index: "0", - customClusterDomain: "tf.training.io", - expectedClusterSpec: `{"cluster":{"evaluator":["` + tftestutil.TestTFJobName + - `-evaluator-0.ns3.svc.tf.training.io:2222"],"ps":["` + tftestutil.TestTFJobName + - `-ps-0.ns3.svc.tf.training.io:2222"],"worker":["` + tftestutil.TestTFJobName + - `-worker-0.ns3.svc.tf.training.io:2222"]},"task":{"type":"worker","index":0},"environment":"cloud"}`, - }, - { - tfJob: tftestutil.NewTFJobWithEvaluatorAndNamespace(1, 1, 1, "ns3"), - rt: "worker", - index: "0", - customClusterDomain: "", - expectedClusterSpec: `{"cluster":{"evaluator":["` + tftestutil.TestTFJobName + - `-evaluator-0.ns3.svc:2222"],"ps":["` + tftestutil.TestTFJobName + - `-ps-0.ns3.svc:2222"],"worker":["` + tftestutil.TestTFJobName + - `-worker-0.ns3.svc:2222"]},"task":{"type":"worker","index":0},"environment":"cloud"}`, - }, - } - - for _, c := range testCase { - c.tfJob.SetName(tftestutil.TestTFJobName) - c.tfJob.SetUID(uuid.NewUUID()) - _ = os.Setenv(EnvCustomClusterDomain, c.customClusterDomain) - - podTemplate := c.tfJob.Spec.TFReplicaSpecs[kubeflowv1.TFJobReplicaTypeWorker].Template.DeepCopy() - - podTemplate.Name = core.GenGeneralName(c.tfJob.GetName(), c.rt, c.index) - - if podTemplate.Labels == nil { - podTemplate.Labels = map[string]string{} - } - - jobName := c.tfJob.GetName() - labels := reconciler.GenLabels(jobName) - labels[kubeflowv1.ReplicaTypeLabel] = c.rt - labels[kubeflowv1.ReplicaIndexLabel] = c.index - - Expect(reconciler.SetClusterSpec(c.tfJob, podTemplate, c.rt, c.index)).Should(Succeed()) - - if c.expectedClusterSpec == "" { - Expect(len(podTemplate.Spec.Containers[0].Env)).Should(Equal(0)) - } else { - actual := podTemplate.Spec.Containers[0].Env[0].Value - reconciler.Log.Info("printing cluster spec", "expected", c.expectedClusterSpec, "actual pod", podTemplate) - Expect(actual).Should(Equal(c.expectedClusterSpec)) - } - } - }) - }) - - Context("Test IsDistributed", func() { - It("should returns correctly", func() { - type tc struct { - tfJob *kubeflowv1.TFJob - expected bool - } - testCase := []tc{ - { - tfJob: tftestutil.NewTFJob(1, 0), - expected: false, - }, - { - tfJob: tftestutil.NewTFJob(1, 1), - expected: true, - }, - { - tfJob: tftestutil.NewTFJob(0, 1), - expected: false, - }, - { - tfJob: tftestutil.NewTFJobWithChief(1, 0), - expected: true, - }, - } - for _, c := range testCase { - Expect(isDistributed(c.tfJob)).To(Equal(c.expected)) - } - }) - }) - - Context("Test Restart Policy", func() { - It("should assign proper restart policy to pod", func() { - type tc struct { - tfJob *kubeflowv1.TFJob - expectedRestartPolicy corev1.RestartPolicy - expectedType kubeflowv1.ReplicaType - } - testCase := []tc{ - func() tc { - tfJob := tftestutil.NewTFJob(1, 0) - specRestartPolicy := kubeflowv1.RestartPolicyExitCode - tfJob.Spec.TFReplicaSpecs[kubeflowv1.TFJobReplicaTypeWorker].RestartPolicy = specRestartPolicy - return tc{ - tfJob: tfJob, - expectedRestartPolicy: corev1.RestartPolicyNever, - expectedType: kubeflowv1.TFJobReplicaTypeWorker, - } - }(), - func() tc { - tfJob := tftestutil.NewTFJob(1, 0) - specRestartPolicy := kubeflowv1.RestartPolicyNever - tfJob.Spec.TFReplicaSpecs[kubeflowv1.TFJobReplicaTypeWorker].RestartPolicy = specRestartPolicy - return tc{ - tfJob: tfJob, - expectedRestartPolicy: corev1.RestartPolicyNever, - expectedType: kubeflowv1.TFJobReplicaTypeWorker, - } - }(), - func() tc { - tfJob := tftestutil.NewTFJob(1, 0) - specRestartPolicy := kubeflowv1.RestartPolicyAlways - tfJob.Spec.TFReplicaSpecs[kubeflowv1.TFJobReplicaTypeWorker].RestartPolicy = specRestartPolicy - return tc{ - tfJob: tfJob, - expectedRestartPolicy: corev1.RestartPolicyAlways, - expectedType: kubeflowv1.TFJobReplicaTypeWorker, - } - }(), - func() tc { - tfJob := tftestutil.NewTFJob(1, 0) - specRestartPolicy := kubeflowv1.RestartPolicyOnFailure - tfJob.Spec.TFReplicaSpecs[kubeflowv1.TFJobReplicaTypeWorker].RestartPolicy = specRestartPolicy - return tc{ - tfJob: tfJob, - expectedRestartPolicy: corev1.RestartPolicyOnFailure, - expectedType: kubeflowv1.TFJobReplicaTypeWorker, - } - }(), - } - for _, c := range testCase { - spec := c.tfJob.Spec.TFReplicaSpecs[c.expectedType] - podTemplate := spec.Template - setRestartPolicy(&podTemplate, spec) - Expect(podTemplate.Spec.RestartPolicy).To(Equal(c.expectedRestartPolicy)) - } - }) - }) - - Context("Test Exit Code", func() { - It("should delete designated Pod", func() { - By("Creating TFJob \"test-exit-code\" with 1 worker only") - ctx := context.Background() - - tfJob := tftestutil.NewTFJob(1, 0) - tfJob.SetName("test-exit-code") - tfJob.SetUID(uuid.NewUUID()) - tfJob.Spec.TFReplicaSpecs[kubeflowv1.TFJobReplicaTypeWorker].RestartPolicy = kubeflowv1.RestartPolicyExitCode - - refs := []metav1.OwnerReference{ - *reconciler.GenOwnerReference(tfJob), - } - By("creating worker Pod") - pod := tftestutil.NewPod(tfJob, kubeflowv1.TFJobReplicaTypeWorker, 0, refs) - basicLabels := reconciler.GenLabels(tfJob.GetName()) - for k, v := range basicLabels { - pod.Labels[k] = v - } - pod.Spec.Containers = append(pod.Spec.Containers, corev1.Container{ - Name: kubeflowv1.TFJobDefaultContainerName, - Image: tftestutil.DummyContainerImage, - }) - Expect(testK8sClient.Create(ctx, pod)).Should(Succeed()) - - created := &corev1.Pod{} - key := types.NamespacedName{Namespace: metav1.NamespaceDefault, Name: pod.GetName()} - Expect(testK8sClient.Get(ctx, key, created)).Should(Succeed()) - created.Status.Phase = corev1.PodFailed - created.Status.ContainerStatuses = append(created.Status.ContainerStatuses, corev1.ContainerStatus{ - Name: kubeflowv1.TFJobDefaultContainerName, - State: corev1.ContainerState{ - Terminated: &corev1.ContainerStateTerminated{ - ExitCode: 130, - }, - }, - }) - Expect(testK8sClient.Status().Update(ctx, created)) - - // Make sure the version of pod created is updated with desired status - Eventually(func() error { - updated := &corev1.Pod{} - if err := testK8sClient.Get(ctx, key, updated); err != nil { - return err - } - if updated.Status.Phase != corev1.PodFailed { - return fmt.Errorf("pod status is not Failed") - } - return nil - }, testutil.Timeout, testutil.Interval).Should(BeNil()) - - _ = reconciler.ReconcileJobs(tfJob, tfJob.Spec.TFReplicaSpecs, tfJob.Status, &tfJob.Spec.RunPolicy) - - Eventually(func() bool { - noPod := &corev1.Pod{} - err := testK8sClient.Get(ctx, key, noPod) - if err == nil { - reconciler.Log.Info("still got pod", "jobName", tfJob.GetName(), "pod", noPod) - return noPod.GetDeletionTimestamp() != nil - } - return errors.IsNotFound(err) - }, testutil.Timeout, testutil.Interval).Should(BeTrue()) - }) - }) - - Context("Test Unretryable Exit Code", func() { - It("should set the job status to Failed", func() { - By("Creating TFJob \"test-noretry-exit-code\" with 1 worker only") - ctx := context.Background() - - tfJob := tftestutil.NewTFJob(1, 0) - tfJob.SetName("test-noretry-exit-code") - tfJob.SetUID(uuid.NewUUID()) - tfJob.Spec.TFReplicaSpecs[kubeflowv1.TFJobReplicaTypeWorker].RestartPolicy = kubeflowv1.RestartPolicyExitCode - Expect(testK8sClient.Create(ctx, tfJob)).Should(Succeed()) - - _ = reconciler.ReconcileJobs(tfJob, tfJob.Spec.TFReplicaSpecs, tfJob.Status, &tfJob.Spec.RunPolicy) - - created := &corev1.Pod{} - key := types.NamespacedName{Namespace: metav1.NamespaceDefault, Name: "test-noretry-exit-code-worker-0"} - Expect(testK8sClient.Get(ctx, key, created)).Should(Succeed()) - created.Status.Phase = corev1.PodFailed - created.Status.ContainerStatuses = append(created.Status.ContainerStatuses, corev1.ContainerStatus{ - Name: kubeflowv1.TFJobDefaultContainerName, - State: corev1.ContainerState{ - Terminated: &corev1.ContainerStateTerminated{ - ExitCode: 1, - }, - }, - }) - Expect(testK8sClient.Status().Update(ctx, created)).Should(Succeed()) - - _ = reconciler.ReconcileJobs(tfJob, tfJob.Spec.TFReplicaSpecs, tfJob.Status, &tfJob.Spec.RunPolicy) - - Eventually(func(g Gomega) { - updatedJob := &kubeflowv1.TFJob{} - g.Expect(testK8sClient.Get(ctx, types.NamespacedName{Name: tfJob.GetName(), Namespace: metav1.NamespaceDefault}, updatedJob)).Should(Succeed()) - g.Expect(updatedJob.Status.Conditions).Should(ContainElements(BeComparableTo(kubeflowv1.JobCondition{ - Type: kubeflowv1.JobFailed, - Status: corev1.ConditionTrue, - Reason: commonutil.NewReason(kubeflowv1.TFJobKind, commonutil.JobFailedReason), - Message: fmt.Sprintf("job %q is failing because %q replica(s) failed.", updatedJob.Name, kubeflowv1.TFJobReplicaTypeWorker), - }, cmpopts.IgnoreFields(kubeflowv1.JobCondition{}, "LastUpdateTime", "LastTransitionTime"))), "TFJob should be in Failed state") - }, testutil.Timeout, testutil.Interval).Should(Succeed()) - - }) - }) - - Describe("Test Scale Down", func() { - It("should delete redundant Pods", func() { - ctx := context.Background() - - tfJob := tftestutil.NewTFJob(2, 0) - //tfJob.SelfLink = "/api/v1/namespaces/default/tfjob/test-tfjob" - tfJob.SetName("test-scale-down") - tfJob.SetUID(uuid.NewUUID()) - tfJob.Spec.EnableDynamicWorker = true - - refs := []metav1.OwnerReference{*reconciler.GenOwnerReference(tfJob)} - - pods := []*corev1.Pod{ - tftestutil.NewPod(tfJob, kubeflowv1.TFJobReplicaTypeWorker, 0, refs), - tftestutil.NewPod(tfJob, kubeflowv1.TFJobReplicaTypeWorker, 1, refs), - tftestutil.NewPod(tfJob, kubeflowv1.TFJobReplicaTypeWorker, 2, refs), - } - - for i := range pods { - pod := pods[i] - for k, v := range reconciler.GenLabels(tfJob.GetName()) { - pod.Labels[k] = v - } - Expect(testK8sClient.Create(ctx, pod)).Should(Succeed()) - } - - // Ensure the created Pods are all in cache - Eventually(func() error { - podList := &corev1.PodList{} - selector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{ - MatchLabels: reconciler.GenLabels(tfJob.GetName()), - }) - if err != nil { - return err - } - listOpt := client.MatchingLabelsSelector{ - Selector: selector, - } - err = testK8sClient.List(ctx, podList, listOpt) - if err != nil { - return err - } - if len(podList.Items) != 3 { - return fmt.Errorf("expecting %d Pods while got %d", 3, len(podList.Items)) - } - return nil - }, testutil.Timeout, testutil.Interval).Should(BeNil()) - - _ = reconciler.ReconcileJobs(tfJob, tfJob.Spec.TFReplicaSpecs, tfJob.Status, &tfJob.Spec.RunPolicy) - - noKey := types.NamespacedName{ - Namespace: metav1.NamespaceDefault, - Name: pods[2].GetName(), - } - Eventually(func() bool { - noPod := &corev1.Pod{} - err := testK8sClient.Get(ctx, noKey, noPod) - if err == nil { - return false - } - return errors.IsNotFound(err) - }, testutil.Timeout, testutil.Interval).Should(BeTrue()) - }) - }) - - Describe("Test Scale Up", func() { - It("should create missing Pods", func() { - ctx := context.Background() - - tfJob := tftestutil.NewTFJob(3, 0) - tfJob.SetName("test-scale-up") - tfJob.SetUID(uuid.NewUUID()) - tfJob.Spec.EnableDynamicWorker = true - - refs := []metav1.OwnerReference{*reconciler.GenOwnerReference(tfJob)} - - pods := []*corev1.Pod{ - tftestutil.NewPod(tfJob, kubeflowv1.TFJobReplicaTypeWorker, 0, refs), - } - - for i := range pods { - pod := pods[i] - for k, v := range reconciler.GenLabels(tfJob.GetName()) { - pod.Labels[k] = v - } - Expect(testK8sClient.Create(ctx, pod)).Should(Succeed()) - } - - // Ensure the created Pods are all in cache - Eventually(func() error { - podList := &corev1.PodList{} - selector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{ - MatchLabels: reconciler.GenLabels(tfJob.GetName()), - }) - if err != nil { - return err - } - listOpt := client.MatchingLabelsSelector{ - Selector: selector, - } - err = testK8sClient.List(ctx, podList, listOpt) - if err != nil { - return err - } - if len(podList.Items) != 1 { - return fmt.Errorf("before reconciling, expecting %d Pods while got %d", 1, len(podList.Items)) - } - return nil - }, testutil.Timeout, testutil.Interval).Should(BeNil()) - - _ = reconciler.ReconcileJobs(tfJob, tfJob.Spec.TFReplicaSpecs, tfJob.Status, &tfJob.Spec.RunPolicy) - - // Check if there are two more Pods created - Eventually(func() error { - podList := &corev1.PodList{} - selector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{ - MatchLabels: reconciler.GenLabels(tfJob.GetName()), - }) - if err != nil { - return err - } - listOpt := client.MatchingLabelsSelector{ - Selector: selector, - } - err = testK8sClient.List(ctx, podList, listOpt) - if err != nil { - return err - } - if len(podList.Items) != 3 { - return fmt.Errorf("after reconciling, expecting %d Pods while got %d", 3, len(podList.Items)) - } - return nil - }, testutil.Timeout, testutil.Interval).Should(BeNil()) - }) - }) - - Describe("TestIsWorker0Completed", func() { - It("should match expected result", func() { - newInt32 := func(in int32) *int32 { - return &in - } - tests := []struct { - // worker failed, succeeded, running num - workers [3]int32 - tfJob *kubeflowv1.TFJob - replicas map[kubeflowv1.ReplicaType]*kubeflowv1.ReplicaSpec - expected bool - expectedErr bool - }{ - { - workers: [3]int32{0, 0, 1}, - tfJob: tftestutil.NewTFJobV2(1, 1, 0, 0, 0), - expected: false, - expectedErr: false, - replicas: map[kubeflowv1.ReplicaType]*kubeflowv1.ReplicaSpec{ - kubeflowv1.TFJobReplicaTypeWorker: { - Replicas: newInt32(1), - Template: tftestutil.NewTFReplicaSpecTemplate(), - }, - kubeflowv1.TFJobReplicaTypePS: { - Replicas: newInt32(1), - Template: tftestutil.NewTFReplicaSpecTemplate(), - }, - }, - }, - { - workers: [3]int32{0, 1, 0}, - tfJob: tftestutil.NewTFJobV2(1, 0, 0, 0, 0), - expected: true, - expectedErr: false, - replicas: map[kubeflowv1.ReplicaType]*kubeflowv1.ReplicaSpec{ - kubeflowv1.TFJobReplicaTypeWorker: { - Replicas: newInt32(1), - Template: tftestutil.NewTFReplicaSpecTemplate(), - }, - }, - }, - { - workers: [3]int32{0, 0, 0}, - tfJob: tftestutil.NewTFJobV2(0, 0, 1, 0, 0), - expected: true, - expectedErr: false, - replicas: map[kubeflowv1.ReplicaType]*kubeflowv1.ReplicaSpec{ - kubeflowv1.TFJobReplicaTypeMaster: { - Replicas: newInt32(1), - Template: tftestutil.NewTFReplicaSpecTemplate(), - }, - }, - }, - { - workers: [3]int32{0, 0, 0}, - tfJob: tftestutil.NewTFJobV2(0, 0, 0, 1, 0), - expected: true, - expectedErr: false, - replicas: map[kubeflowv1.ReplicaType]*kubeflowv1.ReplicaSpec{ - kubeflowv1.TFJobReplicaTypeChief: { - Replicas: newInt32(1), - Template: tftestutil.NewTFReplicaSpecTemplate(), - }, - }, - }, - { - workers: [3]int32{1, 1, 0}, - tfJob: tftestutil.NewTFJobV2(2, 0, 0, 0, 0), - expected: true, - expectedErr: false, - replicas: map[kubeflowv1.ReplicaType]*kubeflowv1.ReplicaSpec{ - kubeflowv1.TFJobReplicaTypeWorker: { - Replicas: newInt32(2), - Template: tftestutil.NewTFReplicaSpecTemplate(), - }, - }, - }, - { - workers: [3]int32{1, 0, 1}, - tfJob: tftestutil.NewTFJobV2(2, 0, 0, 0, 0), - expected: false, - expectedErr: false, - replicas: map[kubeflowv1.ReplicaType]*kubeflowv1.ReplicaSpec{ - kubeflowv1.TFJobReplicaTypeWorker: { - Replicas: newInt32(2), - Template: tftestutil.NewTFReplicaSpecTemplate(), - }, - }, - }, - } - - jobNameTemplate := "test-worker0-complete-%d" - for i, tt := range tests { - tt.tfJob.SetName(fmt.Sprintf(jobNameTemplate, i)) - tt.tfJob.SetUID(uuid.NewUUID()) - // only related to worker status - initializeReplicaStatuses(&tt.tfJob.Status, kubeflowv1.TFJobReplicaTypeWorker) - // set status and add pod to indexer - setStatusForTest(tt.tfJob, kubeflowv1.TFJobReplicaTypeWorker, tt.workers[0], tt.workers[1], tt.workers[2], false, true, testK8sClient) - - // Adding this section to make sure all pods are created and cached - Eventually(func() error { - podList := &corev1.PodList{} - selector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{ - MatchLabels: reconciler.GenLabels(tt.tfJob.GetName()), - }) - if err != nil { - return err - } - listOpt := client.MatchingLabelsSelector{ - Selector: selector, - } - err = testK8sClient.List(context.Background(), podList, listOpt) - if err != nil { - return nil - } - totalExpectedPodCount := tt.workers[0] + tt.workers[1] + tt.workers[2] - if len(podList.Items) != int(totalExpectedPodCount) { - return fmt.Errorf("pod number (%d) for %s not match for expected pod number %d", - len(podList.Items), tt.tfJob.GetName(), totalExpectedPodCount) - } - return nil - }, testutil.Timeout, testutil.Interval).Should(BeNil()) - - got, err := reconciler.IsWorker0Completed(tt.tfJob, tt.replicas) - - if err != nil { - Expect(err).To(Equal(tt.expectedErr)) - } else { - Expect(got).To(Equal(tt.expected)) - } - } - }) - }) -}) diff --git a/pkg/controller.v1/tensorflow/status_test.go b/pkg/controller.v1/tensorflow/status_test.go deleted file mode 100644 index aa17143b90..0000000000 --- a/pkg/controller.v1/tensorflow/status_test.go +++ /dev/null @@ -1,598 +0,0 @@ -// Copyright 2021 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package tensorflow - -import ( - "context" - "fmt" - - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" - corev1 "k8s.io/api/core/v1" - v1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/types" - "k8s.io/apimachinery/pkg/util/uuid" - "sigs.k8s.io/controller-runtime/pkg/client" - - kubeflowv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - tftestutil "github.com/kubeflow/training-operator/pkg/controller.v1/tensorflow/testutil" - "github.com/kubeflow/training-operator/pkg/util" - "github.com/kubeflow/training-operator/pkg/util/testutil" -) - -var _ = Describe("TFJob controller", func() { - Context("Test Failed", func() { - It("should update TFJob with failed status", func() { - By("creating a TFJob with replicaStatues initialized") - tfJob := tftestutil.NewTFJob(3, 0) - initializeReplicaStatuses(&tfJob.Status, kubeflowv1.TFJobReplicaTypeWorker) - - By("prepare pod") - refs := []metav1.OwnerReference{ - *reconciler.GenOwnerReference(tfJob), - } - pod := tftestutil.NewBasePod("pod", tfJob, refs) - pod.Status.Phase = v1.PodFailed - - By("update job replica statuses") - updateJobReplicaStatuses(&tfJob.Status, kubeflowv1.TFJobReplicaTypeWorker, pod) - Expect(tfJob.Status.ReplicaStatuses[kubeflowv1.TFJobReplicaTypeWorker].Failed).Should(Equal(int32(1))) - - By("update job status") - Expect(reconciler.UpdateJobStatus(tfJob, tfJob.Spec.TFReplicaSpecs, &tfJob.Status)).To(Succeed()) - - By("finding failed job status") - found := false - for _, condition := range tfJob.Status.Conditions { - if condition.Type == kubeflowv1.JobFailed { - found = true - } - } - Expect(found).To(BeTrue()) - }) - }) - - Context("Test Status", func() { - It("should update TFJob with desired status", func() { - type testCase struct { - description string - tfJob *kubeflowv1.TFJob - - expectedFailedPS int32 - expectedSucceededPS int32 - expectedActivePS int32 - - expectedFailedWorker int32 - expectedSucceededWorker int32 - expectedActiveWorker int32 - - expectedFailedChief int32 - expectedSucceededChief int32 - expectedActiveChief int32 - - restart bool - worker0Completed bool - - expectedType kubeflowv1.JobConditionType - } - - testCases := []testCase{ - { - description: "Chief worker is succeeded", - tfJob: tftestutil.NewTFJobWithChief(1, 0), - expectedFailedPS: 0, - expectedSucceededPS: 0, - expectedActivePS: 0, - expectedFailedWorker: 0, - expectedSucceededWorker: 1, - expectedActiveWorker: 0, - expectedFailedChief: 0, - expectedSucceededChief: 1, - expectedActiveChief: 0, - restart: false, - worker0Completed: false, - expectedType: kubeflowv1.JobSucceeded, - }, - { - description: "Chief worker is running", - tfJob: tftestutil.NewTFJobWithChief(1, 0), - expectedFailedPS: 0, - expectedSucceededPS: 0, - expectedActivePS: 0, - expectedFailedWorker: 0, - expectedSucceededWorker: 0, - expectedActiveWorker: 0, - expectedFailedChief: 0, - expectedSucceededChief: 0, - expectedActiveChief: 1, - restart: false, - worker0Completed: false, - expectedType: kubeflowv1.JobRunning, - }, - { - description: "Chief worker is failed", - tfJob: tftestutil.NewTFJobWithChief(1, 0), - expectedFailedPS: 0, - expectedSucceededPS: 0, - expectedActivePS: 0, - expectedFailedWorker: 0, - expectedSucceededWorker: 0, - expectedActiveWorker: 0, - expectedFailedChief: 1, - expectedSucceededChief: 0, - expectedActiveChief: 0, - restart: false, - worker0Completed: false, - expectedType: kubeflowv1.JobFailed, - }, - { - description: "(No chief worker) Worker is failed", - tfJob: tftestutil.NewTFJob(1, 0), - expectedFailedPS: 0, - expectedSucceededPS: 0, - expectedActivePS: 0, - expectedFailedWorker: 1, - expectedSucceededWorker: 0, - expectedActiveWorker: 0, - expectedFailedChief: 0, - expectedSucceededChief: 0, - expectedActiveChief: 0, - restart: false, - worker0Completed: false, - expectedType: kubeflowv1.JobFailed, - }, - { - description: "(No chief worker) Worker is succeeded", - tfJob: tftestutil.NewTFJob(1, 0), - expectedFailedPS: 0, - expectedSucceededPS: 0, - expectedActivePS: 0, - expectedFailedWorker: 0, - expectedSucceededWorker: 1, - expectedActiveWorker: 0, - expectedFailedChief: 0, - expectedSucceededChief: 0, - expectedActiveChief: 0, - restart: false, - worker0Completed: false, - expectedType: kubeflowv1.JobSucceeded, - }, - { - description: "(No chief worker) Worker is running", - tfJob: tftestutil.NewTFJob(1, 0), - expectedFailedPS: 0, - expectedSucceededPS: 0, - expectedActivePS: 0, - expectedFailedWorker: 0, - expectedSucceededWorker: 0, - expectedActiveWorker: 1, - expectedFailedChief: 0, - expectedSucceededChief: 0, - expectedActiveChief: 0, - restart: false, - worker0Completed: false, - expectedType: kubeflowv1.JobRunning, - }, - { - description: "(No chief worker) 2 workers are succeeded, 2 workers are active", - tfJob: tftestutil.NewTFJob(4, 2), - expectedFailedPS: 0, - expectedSucceededPS: 0, - expectedActivePS: 2, - expectedFailedWorker: 0, - expectedSucceededWorker: 2, - expectedActiveWorker: 2, - expectedFailedChief: 0, - expectedSucceededChief: 0, - expectedActiveChief: 0, - restart: false, - worker0Completed: false, - expectedType: kubeflowv1.JobRunning, - }, - { - description: "(No chief worker) 2 workers are running, 2 workers are failed", - tfJob: tftestutil.NewTFJob(4, 2), - expectedFailedPS: 0, - expectedSucceededPS: 0, - expectedActivePS: 2, - expectedFailedWorker: 2, - expectedSucceededWorker: 0, - expectedActiveWorker: 2, - expectedFailedChief: 0, - expectedSucceededChief: 0, - expectedActiveChief: 0, - restart: false, - worker0Completed: false, - expectedType: kubeflowv1.JobFailed, - }, - { - description: "(No chief worker) 2 workers are succeeded, 2 workers are failed", - tfJob: tftestutil.NewTFJob(4, 2), - expectedFailedPS: 0, - expectedSucceededPS: 0, - expectedActivePS: 2, - expectedFailedWorker: 2, - expectedSucceededWorker: 2, - expectedActiveWorker: 0, - expectedFailedChief: 0, - expectedSucceededChief: 0, - expectedActiveChief: 0, - restart: false, - worker0Completed: false, - expectedType: kubeflowv1.JobFailed, - }, - { - description: "(No chief worker) worker-0 are succeeded, 3 workers are active", - tfJob: tftestutil.NewTFJob(4, 2), - expectedFailedPS: 0, - expectedSucceededPS: 0, - expectedActivePS: 2, - expectedFailedWorker: 0, - expectedSucceededWorker: 1, - expectedActiveWorker: 3, - expectedFailedChief: 0, - expectedSucceededChief: 0, - expectedActiveChief: 0, - restart: false, - worker0Completed: true, - expectedType: kubeflowv1.JobSucceeded, - }, - { - description: "(No chief worker, successPolicy: AllWorkers) worker-0 are succeeded, 3 workers are active", - tfJob: tftestutil.NewTFJobWithSuccessPolicy(4, 0, kubeflowv1.SuccessPolicyAllWorkers), - expectedFailedPS: 0, - expectedSucceededPS: 0, - expectedActivePS: 0, - expectedFailedWorker: 0, - expectedSucceededWorker: 1, - expectedActiveWorker: 3, - expectedFailedChief: 0, - expectedSucceededChief: 0, - expectedActiveChief: 0, - restart: false, - worker0Completed: true, - expectedType: kubeflowv1.JobRunning, - }, - { - description: "(No chief worker, successPolicy: AllWorkers) 4 workers are succeeded", - tfJob: tftestutil.NewTFJobWithSuccessPolicy(4, 0, kubeflowv1.SuccessPolicyAllWorkers), - expectedFailedPS: 0, - expectedSucceededPS: 0, - expectedActivePS: 0, - expectedFailedWorker: 0, - expectedSucceededWorker: 4, - expectedActiveWorker: 0, - expectedFailedChief: 0, - expectedSucceededChief: 0, - expectedActiveChief: 0, - restart: false, - worker0Completed: true, - expectedType: kubeflowv1.JobSucceeded, - }, - { - description: "(No chief worker, successPolicy: AllWorkers) worker-0 is succeeded, 2 workers are running, 1 worker is failed", - tfJob: tftestutil.NewTFJobWithSuccessPolicy(4, 0, kubeflowv1.SuccessPolicyAllWorkers), - expectedFailedPS: 0, - expectedSucceededPS: 0, - expectedActivePS: 0, - expectedFailedWorker: 1, - expectedSucceededWorker: 1, - expectedActiveWorker: 2, - expectedFailedChief: 0, - expectedSucceededChief: 0, - expectedActiveChief: 0, - restart: false, - worker0Completed: true, - expectedType: kubeflowv1.JobFailed, - }, - { - description: "Chief is running, workers are failed", - tfJob: tftestutil.NewTFJobWithChief(4, 2), - expectedFailedPS: 0, - expectedSucceededPS: 0, - expectedActivePS: 2, - expectedFailedWorker: 4, - expectedSucceededWorker: 0, - expectedActiveWorker: 0, - expectedFailedChief: 0, - expectedSucceededChief: 0, - expectedActiveChief: 1, - restart: false, - worker0Completed: false, - expectedType: kubeflowv1.JobRunning, - }, - { - description: "Chief is running, workers are succeeded", - tfJob: tftestutil.NewTFJobWithChief(4, 2), - expectedFailedPS: 0, - expectedSucceededPS: 0, - expectedActivePS: 2, - expectedFailedWorker: 0, - expectedSucceededWorker: 4, - expectedActiveWorker: 0, - expectedFailedChief: 0, - expectedSucceededChief: 0, - expectedActiveChief: 1, - restart: false, - worker0Completed: false, - expectedType: kubeflowv1.JobRunning, - }, - { - description: "Chief is running, a PS is failed", - tfJob: tftestutil.NewTFJobWithChief(4, 2), - expectedFailedPS: 1, - expectedSucceededPS: 0, - expectedActivePS: 1, - expectedFailedWorker: 0, - expectedSucceededWorker: 4, - expectedActiveWorker: 0, - expectedFailedChief: 0, - expectedSucceededChief: 0, - expectedActiveChief: 1, - restart: false, - worker0Completed: false, - expectedType: kubeflowv1.JobFailed, - }, - { - description: "Chief is failed, workers are succeeded", - tfJob: tftestutil.NewTFJobWithChief(4, 2), - expectedFailedPS: 0, - expectedSucceededPS: 0, - expectedActivePS: 2, - expectedFailedWorker: 0, - expectedSucceededWorker: 4, - expectedActiveWorker: 0, - expectedFailedChief: 1, - expectedSucceededChief: 0, - expectedActiveChief: 0, - restart: false, - worker0Completed: false, - expectedType: kubeflowv1.JobFailed, - }, - { - description: "Chief is succeeded, workers are failed", - tfJob: tftestutil.NewTFJobWithChief(4, 2), - expectedFailedPS: 0, - expectedSucceededPS: 0, - expectedActivePS: 2, - expectedFailedWorker: 4, - expectedSucceededWorker: 0, - expectedActiveWorker: 0, - expectedFailedChief: 0, - expectedSucceededChief: 1, - expectedActiveChief: 0, - restart: false, - worker0Completed: false, - expectedType: kubeflowv1.JobSucceeded, - }, - { - description: "Chief is failed and restarting", - tfJob: tftestutil.NewTFJobWithChief(4, 2), - expectedFailedPS: 0, - expectedSucceededPS: 0, - expectedActivePS: 2, - expectedFailedWorker: 4, - expectedSucceededWorker: 0, - expectedActiveWorker: 0, - expectedFailedChief: 1, - expectedSucceededChief: 0, - expectedActiveChief: 0, - restart: true, - worker0Completed: false, - expectedType: kubeflowv1.JobRestarting, - }, - } - - jobNameTemplate := "test-status-%d" - for i, c := range testCases { - reconciler.Log.Info("testing case", "description", c.description) - c.tfJob.SetName(fmt.Sprintf(jobNameTemplate, i)) - c.tfJob.SetUID(uuid.NewUUID()) - - initializeReplicaStatuses(&c.tfJob.Status, kubeflowv1.TFJobReplicaTypeWorker) - initializeReplicaStatuses(&c.tfJob.Status, kubeflowv1.TFJobReplicaTypeChief) - initializeReplicaStatuses(&c.tfJob.Status, kubeflowv1.TFJobReplicaTypePS) - - setStatusForTest(c.tfJob, kubeflowv1.TFJobReplicaTypePS, c.expectedFailedPS, c.expectedSucceededPS, c.expectedActivePS, c.restart, c.worker0Completed, testK8sClient) - setStatusForTest(c.tfJob, kubeflowv1.TFJobReplicaTypeWorker, c.expectedFailedWorker, c.expectedSucceededWorker, c.expectedActiveWorker, c.restart, c.worker0Completed, testK8sClient) - setStatusForTest(c.tfJob, kubeflowv1.TFJobReplicaTypeChief, c.expectedFailedChief, c.expectedSucceededChief, c.expectedActiveChief, c.restart, c.worker0Completed, testK8sClient) - - // Adding this section to make sure all pods are created and cached - Eventually(func() error { - podList := &corev1.PodList{} - basicLabels := reconciler.GenLabels(c.tfJob.GetName()) - selector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{ - MatchLabels: basicLabels, - }) - if err != nil { - return err - } - listOpt := client.MatchingLabelsSelector{ - Selector: selector, - } - err = testK8sClient.List(context.Background(), podList, listOpt) - if err != nil { - return nil - } - totalExpectedPodCount := c.expectedFailedPS + c.expectedSucceededPS + c.expectedActivePS + - c.expectedFailedWorker + c.expectedSucceededWorker + c.expectedActiveWorker + - c.expectedFailedChief + c.expectedSucceededChief + c.expectedActiveChief - if len(podList.Items) != int(totalExpectedPodCount) { - return fmt.Errorf("pod number (%d) for %s not match for expected pod number %d", - len(podList.Items), c.tfJob.GetName(), totalExpectedPodCount) - } - return nil - }, testutil.Timeout, testutil.Interval).Should(BeNil()) - - _ = reconciler.ReconcileJobs(c.tfJob, c.tfJob.Spec.TFReplicaSpecs, c.tfJob.Status, &c.tfJob.Spec.RunPolicy) - - Expect(filterOutConditionTest(c.tfJob.Status)).Should(Succeed()) - - reconciler.Log.Info("checking status", "tfJob.Status", c.tfJob.Status) - found := false - for _, condition := range c.tfJob.Status.Conditions { - if condition.Type == c.expectedType { - found = true - } - } - Expect(found).To(BeTrue()) - reconciler.Log.Info("passed!", - "job name", c.tfJob.GetName(), "job description", c.description) - } - }) - }) -}) - -func setStatusForTest(tfJob *kubeflowv1.TFJob, rtype kubeflowv1.ReplicaType, failed, succeeded, active int32, restart bool, worker0Completed bool, client client.Client) { - if restart == true { - tfJob.Spec.TFReplicaSpecs[rtype].RestartPolicy = kubeflowv1.RestartPolicyExitCode - } - - basicLabels := reconciler.GenLabels(tfJob.GetName()) - ctx := context.Background() - - Expect(rtype).Should(BeElementOf([]kubeflowv1.ReplicaType{ - kubeflowv1.TFJobReplicaTypeWorker, - kubeflowv1.TFJobReplicaTypePS, - kubeflowv1.TFJobReplicaTypeChief, - })) - - refs := []metav1.OwnerReference{ - *reconciler.GenOwnerReference(tfJob), - } - - var i int32 - index := 0 - for i = 0; i < succeeded; i++ { - pod := tftestutil.NewPod(tfJob, rtype, index, refs) - for k, v := range basicLabels { - pod.Labels[k] = v - } - po := &corev1.Pod{} - Expect(client.Create(ctx, pod)).Should(Succeed()) - - key := genKeyFromJob(pod) - Eventually(func() error { - po = &corev1.Pod{} - if err := client.Get(ctx, key, po); err != nil { - return err - } - - po.Status.Phase = corev1.PodSucceeded - if worker0Completed == true && rtype == kubeflowv1.TFJobReplicaTypeWorker && index == 0 { - po.Status.ContainerStatuses = []corev1.ContainerStatus{ - { - Name: reconciler.GetDefaultContainerName(), - State: corev1.ContainerState{ - Terminated: &corev1.ContainerStateTerminated{ - ExitCode: int32(0), // exit with 0 - }, - }, - }, - } - } - - return client.Status().Update(ctx, po) - }, testutil.Timeout, testutil.Interval).Should(BeNil()) - - updateJobReplicaStatuses(&tfJob.Status, rtype, po) - - index++ - } - - for i = 0; i < failed; i++ { - pod := tftestutil.NewPod(tfJob, rtype, index, refs) - for k, v := range basicLabels { - pod.Labels[k] = v - } - po := &corev1.Pod{} - Expect(client.Create(ctx, pod)).Should(Succeed()) - - key := genKeyFromJob(pod) - Eventually(func() error { - po = &corev1.Pod{} - if err := client.Get(ctx, key, po); err != nil { - return err - } - - po.Status.Phase = corev1.PodFailed - if restart == true { - if po.Status.ContainerStatuses == nil { - po.Status.ContainerStatuses = []corev1.ContainerStatus{ - { - Name: reconciler.GetDefaultContainerName(), - State: corev1.ContainerState{ - Terminated: &corev1.ContainerStateTerminated{ - ExitCode: int32(130), // 130 is a retryable code - }, - }, - }, - } - } - } - - return client.Status().Update(ctx, po) - }, testutil.Timeout, testutil.Interval).Should(BeNil()) - - updateJobReplicaStatuses(&tfJob.Status, rtype, po) - index++ - } - - for i = 0; i < active; i++ { - pod := tftestutil.NewPod(tfJob, rtype, index, refs) - for k, v := range basicLabels { - pod.Labels[k] = v - } - po := &corev1.Pod{} - Expect(client.Create(ctx, pod)).Should(Succeed()) - - key := genKeyFromJob(pod) - Eventually(func() error { - po = &corev1.Pod{} - if err := client.Get(ctx, key, po); err != nil { - return err - } - - po.Status.Phase = corev1.PodRunning - - return client.Status().Update(ctx, po) - }, testutil.Timeout, testutil.Interval).Should(BeNil()) - - updateJobReplicaStatuses(&tfJob.Status, rtype, po) - index++ - } -} - -func genKeyFromJob(job client.Object) types.NamespacedName { - ns := metav1.NamespaceDefault - if job.GetNamespace() != "" { - ns = job.GetNamespace() - } - return types.NamespacedName{ - Namespace: ns, - Name: job.GetName(), - } -} - -func filterOutConditionTest(status kubeflowv1.JobStatus) error { - flag := util.IsFailed(status) || util.IsSucceeded(status) - for _, condition := range status.Conditions { - if flag && condition.Type == kubeflowv1.JobRunning && condition.Status == corev1.ConditionTrue { - return fmt.Errorf("error condition status when succeeded or failed") - } - } - return nil -} diff --git a/pkg/controller.v1/tensorflow/suite_test.go b/pkg/controller.v1/tensorflow/suite_test.go deleted file mode 100644 index c1824fea5c..0000000000 --- a/pkg/controller.v1/tensorflow/suite_test.go +++ /dev/null @@ -1,143 +0,0 @@ -// Copyright 2021 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package tensorflow - -import ( - "context" - "crypto/tls" - "fmt" - "net" - "path/filepath" - "testing" - "time" - - kubeflowv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - "github.com/kubeflow/training-operator/pkg/controller.v1/common" - "github.com/kubeflow/training-operator/pkg/util/testutil" - tensorflowwebhook "github.com/kubeflow/training-operator/pkg/webhooks/tensorflow" - - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" - corev1 "k8s.io/api/core/v1" - "k8s.io/client-go/kubernetes/scheme" - ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/envtest" - logf "sigs.k8s.io/controller-runtime/pkg/log" - "sigs.k8s.io/controller-runtime/pkg/log/zap" - metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" - "sigs.k8s.io/controller-runtime/pkg/webhook" - "volcano.sh/apis/pkg/apis/scheduling/v1beta1" - //+kubebuilder:scaffold:imports -) - -// These tests use Ginkgo (BDD-style Go testing framework). Refer to -// http://onsi.github.io/ginkgo/ to learn more about Ginkgo. - -var ( - testK8sClient client.Client - testEnv *envtest.Environment - testCtx context.Context - testCancel context.CancelFunc - reconciler *TFJobReconciler -) - -func TestAPIs(t *testing.T) { - RegisterFailHandler(Fail) - - RunSpecs(t, "Controller Suite") -} - -var _ = BeforeSuite(func() { - logf.SetLogger(zap.New(zap.WriteTo(GinkgoWriter), zap.UseDevMode(true))) - - testCtx, testCancel = context.WithCancel(context.TODO()) - - By("bootstrapping test environment") - testEnv = &envtest.Environment{ - CRDDirectoryPaths: []string{filepath.Join("..", "..", "..", "manifests", "base", "crds")}, - ErrorIfCRDPathMissing: true, - WebhookInstallOptions: envtest.WebhookInstallOptions{ - Paths: []string{filepath.Join("..", "..", "..", "manifests", "base", "webhook", "manifests.yaml")}, - }, - } - - cfg, err := testEnv.Start() - Expect(err).NotTo(HaveOccurred()) - Expect(cfg).NotTo(BeNil()) - - err = v1beta1.AddToScheme(scheme.Scheme) - Expect(err).NotTo(HaveOccurred()) - err = kubeflowv1.AddToScheme(scheme.Scheme) - Expect(err).NotTo(HaveOccurred()) - - //+kubebuilder:scaffold:scheme - - testK8sClient, err = client.New(cfg, client.Options{Scheme: scheme.Scheme}) - Expect(err).NotTo(HaveOccurred()) - Expect(testK8sClient).NotTo(BeNil()) - - mgr, err := ctrl.NewManager(cfg, ctrl.Options{ - Metrics: metricsserver.Options{ - BindAddress: "0", - }, - WebhookServer: webhook.NewServer( - webhook.Options{ - Host: testEnv.WebhookInstallOptions.LocalServingHost, - Port: testEnv.WebhookInstallOptions.LocalServingPort, - CertDir: testEnv.WebhookInstallOptions.LocalServingCertDir, - }), - }) - Expect(err).NotTo(HaveOccurred()) - - gangSchedulingSetupFunc := common.GenNonGangSchedulerSetupFunc() - reconciler = NewReconciler(mgr, gangSchedulingSetupFunc) - Expect(reconciler.SetupWithManager(mgr, 1)).NotTo(HaveOccurred()) - Expect(tensorflowwebhook.SetupWebhook(mgr)).NotTo(HaveOccurred()) - - go func() { - defer GinkgoRecover() - err = mgr.Start(testCtx) - Expect(err).ToNot(HaveOccurred(), "failed to run manager") - }() - - dialer := &net.Dialer{Timeout: time.Second} - addrPort := fmt.Sprintf("%s:%d", testEnv.WebhookInstallOptions.LocalServingHost, testEnv.WebhookInstallOptions.LocalServingPort) - Eventually(func(g Gomega) { - conn, err := tls.DialWithDialer(dialer, "tcp", addrPort, &tls.Config{InsecureSkipVerify: true}) - g.Expect(err).NotTo(HaveOccurred()) - g.Expect(conn.Close()).NotTo(HaveOccurred()) - }).Should(Succeed()) - - // This step is introduced to make sure cache starts before running any tests - Eventually(func() error { - nsList := &corev1.NamespaceList{} - if err := testK8sClient.List(context.Background(), nsList); err != nil { - return err - } else if len(nsList.Items) < 1 { - return fmt.Errorf("cannot get at lease one namespace, got %d", len(nsList.Items)) - } - return nil - }, testutil.Timeout, testutil.Interval).Should(BeNil()) -}) - -var _ = AfterSuite(func() { - By("tearing down the test environment") - testCancel() - // Give 5 seconds to stop all tests - time.Sleep(5 * time.Second) - err := testEnv.Stop() - Expect(err).NotTo(HaveOccurred()) -}) diff --git a/pkg/controller.v1/tensorflow/tensorflow.go b/pkg/controller.v1/tensorflow/tensorflow.go deleted file mode 100644 index 4942dd2b3e..0000000000 --- a/pkg/controller.v1/tensorflow/tensorflow.go +++ /dev/null @@ -1,188 +0,0 @@ -// Copyright 2018 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package controller provides a Kubernetes controller for a TFJob resource. -package tensorflow - -import ( - "encoding/json" - "fmt" - "os" - "strconv" - "strings" - - kubeflowv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - "github.com/kubeflow/training-operator/pkg/controller.v1/common" -) - -const ( - // EnvCustomClusterDomain is the custom defined cluster domain, such as "svc.cluster.local". - // Ref: https://kubernetes.io/docs/concepts/services-networking/dns-pod-service/#a-records - EnvCustomClusterDomain = "CUSTOM_CLUSTER_DOMAIN" -) - -// TaskSpec is the specification for a task (PS or worker) of the TFJob. -type TaskSpec struct { - Type string `json:"type"` - Index int `json:"index"` -} - -// ClusterSpec represents a cluster TensorFlow specification. -// https://www.tensorflow.org/deploy/distributed#create_a_tftrainclusterspec_to_describe_the_cluster -// It is a map from job names to network addresses. -type ClusterSpec map[string][]string - -// TFConfig is a struct representing the distributed TensorFlow config. -// This struct is turned into an environment variable TF_CONFIG -// which is used by TensorFlow processes to configure themselves. -// https://www.tensorflow.org/api_docs/python/tf/estimator/RunConfig#methods -// https://cloud.google.com/ml-engine/docs/tensorflow/distributed-training-details -type TFConfig struct { - // Cluster represents a TensorFlow ClusterSpec. - // See: https://www.tensorflow.org/api_docs/python/tf/train/ClusterSpec - Cluster ClusterSpec `json:"cluster"` - Task TaskSpec `json:"task"` - // Environment is used by tensorflow.contrib.learn.python.learn in versions <= 1.3 - // TODO(jlewi): I don't think it is used in versions TF >- 1.4. So we can eventually get rid of it. - Environment string `json:"environment"` -} - -// SparseClusterSpec enables a server to be configured without needing to know -// the identity of (for example) all other worker tasks. -// https://www.tensorflow.org/api_docs/python/tf/train/ClusterSpec -type SparseClusterSpec struct { - Worker map[int32]string `json:"worker"` - PS []string `json:"ps"` -} - -type SparseTFConfig struct { - Cluster SparseClusterSpec `json:"cluster"` - Task TaskSpec `json:"task"` -} - -func convertClusterSpecToSparseClusterSpec(clusterSpec ClusterSpec, rtype string, index int32) SparseClusterSpec { - sparseClusterSpec := SparseClusterSpec{Worker: map[int32]string{}, PS: []string{}} - if rtype == strings.ToLower(string(kubeflowv1.TFJobReplicaTypePS)) { - sparseClusterSpec.PS = append(sparseClusterSpec.PS, clusterSpec[rtype][index]) - } else if rtype == strings.ToLower(string(kubeflowv1.TFJobReplicaTypeWorker)) { - sparseClusterSpec.PS = clusterSpec[strings.ToLower(string(kubeflowv1.TFJobReplicaTypePS))] - sparseClusterSpec.Worker[index] = clusterSpec[rtype][index] - } - return sparseClusterSpec -} - -// genTFConfig will generate the environment variable TF_CONFIG -// -// { -// "cluster": { -// "ps": ["ps1:2222", "ps2:2222"], -// "worker": ["worker1:2222", "worker2:2222", "worker3:2222"] -// }, -// "task": { -// "type": "ps", -// "index": 1 -// }, -// } -// } -// -// if EnableDynamicWorker set true -// -// { -// "cluster": { -// "ps": ["ps1:2222", "ps2:2222"], -// "worker": {"1":"worker1:2222"} -// }, -// "task": { -// "type": "worker", -// "index": 1 -// }, -// } -// } -func genTFConfigJSONStr(tfjob *kubeflowv1.TFJob, rtype, index string) (string, error) { - // Configure the TFCONFIG environment variable. - i, err := strconv.ParseInt(index, 0, 32) - if err != nil { - return "", err - } - - cluster, err := genClusterSpec(tfjob) - if err != nil { - return "", err - } - - var tfConfigJSONByteSlice []byte - if tfjob.Spec.EnableDynamicWorker { - sparseCluster := convertClusterSpecToSparseClusterSpec(cluster, strings.ToLower(rtype), int32(i)) - sparseTFConfig := SparseTFConfig{ - Cluster: sparseCluster, - Task: TaskSpec{ - Type: strings.ToLower(rtype), - Index: int(i), - }, - } - tfConfigJSONByteSlice, err = json.Marshal(sparseTFConfig) - } else { - tfConfig := TFConfig{ - Cluster: cluster, - Task: TaskSpec{ - Type: strings.ToLower(rtype), - Index: int(i), - }, - // We need to set environment to cloud otherwise it will default to local which isn't what we want. - // Environment is used by tensorflow.contrib.learn.python.learn in versions <= 1.3 - // TODO(jlewi): I don't think it is used in versions TF >- 1.4. So we can eventually get rid of it. - Environment: "cloud", - } - tfConfigJSONByteSlice, err = json.Marshal(tfConfig) - } - if err != nil { - return "", err - } - - return string(tfConfigJSONByteSlice), nil -} - -// genClusterSpec will generate ClusterSpec. -func genClusterSpec(tfjob *kubeflowv1.TFJob) (ClusterSpec, error) { - clusterSpec := make(ClusterSpec) - - for rtype, spec := range tfjob.Spec.TFReplicaSpecs { - rt := strings.ToLower(string(rtype)) - replicaNames := make([]string, 0, *spec.Replicas) - - port, err := GetPortFromTFJob(tfjob, rtype) - if err != nil { - return nil, err - } - for i := int32(0); i < *spec.Replicas; i++ { - // As described here: https://kubernetes.io/docs/concepts/services-networking/dns-pod-service/#a-records. - // Headless service assigned a DNS A record for a name of the form "my-svc.my-namespace.svc.cluster.local". - // And the last part "svc.cluster.local" is called cluster domain - // which maybe different between kubernetes clusters. - hostName := common.GenGeneralName(tfjob.Name, rt, fmt.Sprintf("%d", i)) - svcName := hostName + "." + tfjob.Namespace + "." + "svc" - clusterDomain := os.Getenv(EnvCustomClusterDomain) - if len(clusterDomain) > 0 { - svcName += "." + clusterDomain - } - - endpoint := fmt.Sprintf("%s:%d", svcName, port) - replicaNames = append(replicaNames, endpoint) - } - - clusterSpec[rt] = replicaNames - } - - return clusterSpec, nil -} diff --git a/pkg/controller.v1/tensorflow/tensorflow_test.go b/pkg/controller.v1/tensorflow/tensorflow_test.go deleted file mode 100644 index ce059524d8..0000000000 --- a/pkg/controller.v1/tensorflow/tensorflow_test.go +++ /dev/null @@ -1,45 +0,0 @@ -// Copyright 2020 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package controller provides a Kubernetes controller for a TFJob resource. -package tensorflow - -import ( - "reflect" - "testing" -) - -func TestConvertClusterSpecToSparseClusterSpec(t *testing.T) { - clusterSpec := ClusterSpec{ - "ps": {"test-tfjob-ps-0.default.svc:2222", "test-tfjob-ps-1.default.svc:2222"}, - "worker": {"test-tfjob-worker-0.default.svc:2222", "test-tfjob-worker-1.default.svc:2222"}, - } - workerSparseClusterSpec := convertClusterSpecToSparseClusterSpec(clusterSpec, "worker", 0) - psSparseClusterSpec := convertClusterSpecToSparseClusterSpec(clusterSpec, "ps", 0) - - expectedWorkerSparseClusterSpec := SparseClusterSpec{ - Worker: map[int32]string{0: "test-tfjob-worker-0.default.svc:2222"}, - PS: []string{"test-tfjob-ps-0.default.svc:2222", "test-tfjob-ps-1.default.svc:2222"}, - } - expectedPSSparseClusterSpec := SparseClusterSpec{ - Worker: map[int32]string{}, - PS: []string{"test-tfjob-ps-0.default.svc:2222"}, - } - if !reflect.DeepEqual(workerSparseClusterSpec, expectedWorkerSparseClusterSpec) { - t.Error("sparseClusterSpec for worker is not correct!") - } - if !reflect.DeepEqual(psSparseClusterSpec, expectedPSSparseClusterSpec) { - t.Error("sparseClusterSpec for worker is not correct!") - } -} diff --git a/pkg/controller.v1/tensorflow/testutil/pod.go b/pkg/controller.v1/tensorflow/testutil/pod.go deleted file mode 100644 index da6562d76e..0000000000 --- a/pkg/controller.v1/tensorflow/testutil/pod.go +++ /dev/null @@ -1,111 +0,0 @@ -// Copyright 2018 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package testutil - -import ( - "context" - "fmt" - "strings" - - . "github.com/onsi/gomega" - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/types" - "sigs.k8s.io/controller-runtime/pkg/client" - - kubeflowv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - "github.com/kubeflow/training-operator/pkg/util/testutil" -) - -const ( - DummyContainerName = "dummy" - DummyContainerImage = "dummy/dummy:latest" -) - -func NewBasePod(name string, job metav1.Object, refs []metav1.OwnerReference) *corev1.Pod { - - return &corev1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: name, - Labels: map[string]string{}, - Namespace: job.GetNamespace(), - OwnerReferences: refs, - }, - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: DummyContainerName, - Image: DummyContainerImage, - }, - }, - }, - } -} - -func NewPod(job metav1.Object, typ kubeflowv1.ReplicaType, index int, refs []metav1.OwnerReference) *corev1.Pod { - pod := NewBasePod(fmt.Sprintf("%s-%s-%d", job.GetName(), strings.ToLower(string(typ)), index), job, refs) - pod.Labels[kubeflowv1.ReplicaTypeLabel] = strings.ToLower(string(typ)) - pod.Labels[kubeflowv1.ReplicaIndexLabel] = fmt.Sprintf("%d", index) - return pod -} - -// NewPodList create count pods with the given phase for the given tfJob -func NewPodList(count int32, status corev1.PodPhase, job metav1.Object, typ kubeflowv1.ReplicaType, start int32, refs []metav1.OwnerReference) []*corev1.Pod { - pods := []*corev1.Pod{} - for i := int32(0); i < count; i++ { - newPod := NewPod(job, typ, int(start+i), refs) - newPod.Status = corev1.PodStatus{Phase: status} - pods = append(pods, newPod) - } - return pods -} - -func SetPodsStatuses(client client.Client, job metav1.Object, typ kubeflowv1.ReplicaType, - pendingPods, activePods, succeededPods, failedPods int32, restartCounts []int32, - refs []metav1.OwnerReference, basicLabels map[string]string) { - var index int32 - taskMap := map[corev1.PodPhase]int32{ - corev1.PodFailed: failedPods, - corev1.PodPending: pendingPods, - corev1.PodRunning: activePods, - corev1.PodSucceeded: succeededPods, - } - ctx := context.Background() - - for podPhase, desiredCount := range taskMap { - for i, pod := range NewPodList(desiredCount, podPhase, job, typ, index, refs) { - for k, v := range basicLabels { - pod.Labels[k] = v - } - _ = client.Create(ctx, pod) - launcherKey := types.NamespacedName{ - Namespace: metav1.NamespaceDefault, - Name: pod.GetName(), - } - Eventually(func() error { - po := &corev1.Pod{} - if err := client.Get(ctx, launcherKey, po); err != nil { - return err - } - po.Status.Phase = podPhase - if podPhase == corev1.PodRunning && restartCounts != nil { - po.Status.ContainerStatuses = []corev1.ContainerStatus{{RestartCount: restartCounts[i]}} - } - return client.Status().Update(ctx, po) - }, testutil.Timeout, testutil.Interval).Should(BeNil()) - } - index += desiredCount - } -} diff --git a/pkg/controller.v1/tensorflow/testutil/service.go b/pkg/controller.v1/tensorflow/testutil/service.go deleted file mode 100644 index 00e01c628a..0000000000 --- a/pkg/controller.v1/tensorflow/testutil/service.go +++ /dev/null @@ -1,86 +0,0 @@ -// Copyright 2018 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package testutil - -import ( - "context" - "fmt" - "strings" - - . "github.com/onsi/gomega" - corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/errors" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "sigs.k8s.io/controller-runtime/pkg/client" - - kubeflowv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" -) - -const ( - DummyPortName string = "dummy" - DummyPort int32 = 1221 -) - -func NewBaseService(name string, job metav1.Object, refs []metav1.OwnerReference) *corev1.Service { - return &corev1.Service{ - ObjectMeta: metav1.ObjectMeta{ - Name: name, - Labels: map[string]string{}, - Namespace: job.GetNamespace(), - OwnerReferences: refs, - }, - Spec: corev1.ServiceSpec{ - Ports: []corev1.ServicePort{ - { - Name: DummyPortName, - Port: DummyPort, - }, - }, - }, - } -} - -func NewService(job metav1.Object, typ kubeflowv1.ReplicaType, index int, refs []metav1.OwnerReference) *corev1.Service { - svc := NewBaseService(fmt.Sprintf("%s-%s-%d", job.GetName(), strings.ToLower(string(typ)), index), job, refs) - svc.Labels[kubeflowv1.ReplicaTypeLabel] = strings.ToLower(string(typ)) - svc.Labels[kubeflowv1.ReplicaIndexLabel] = fmt.Sprintf("%d", index) - return svc -} - -// NewServiceList creates count pods with the given phase for the given tfJob -func NewServiceList(count int32, job metav1.Object, typ kubeflowv1.ReplicaType, refs []metav1.OwnerReference) []*corev1.Service { - services := []*corev1.Service{} - for i := int32(0); i < count; i++ { - newService := NewService(job, typ, int(i), refs) - services = append(services, newService) - } - return services -} - -func SetServices(client client.Client, job metav1.Object, typ kubeflowv1.ReplicaType, activeWorkerServices int32, - refs []metav1.OwnerReference, basicLabels map[string]string) { - ctx := context.Background() - for _, svc := range NewServiceList(activeWorkerServices, job, typ, refs) { - for k, v := range basicLabels { - svc.Labels[k] = v - } - err := client.Create(ctx, svc) - if errors.IsAlreadyExists(err) { - return - } else { - Expect(err).To(BeNil()) - } - } -} diff --git a/pkg/controller.v1/tensorflow/testutil/tfjob.go b/pkg/controller.v1/tensorflow/testutil/tfjob.go deleted file mode 100644 index 25da80d8b8..0000000000 --- a/pkg/controller.v1/tensorflow/testutil/tfjob.go +++ /dev/null @@ -1,244 +0,0 @@ -// Copyright 2018 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package testutil - -import ( - v1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - - kubeflowv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" -) - -const TestTFJobName = "test-tfjob" - -func NewTFJobWithCleanPolicy(chief, worker, ps int, policy kubeflowv1.CleanPodPolicy) *kubeflowv1.TFJob { - if chief == 1 { - tfJob := NewTFJobWithChief(worker, ps) - tfJob.Spec.RunPolicy.CleanPodPolicy = &policy - return tfJob - } - tfJob := NewTFJob(worker, ps) - tfJob.Spec.RunPolicy.CleanPodPolicy = &policy - return tfJob -} - -func NewTFJobWithCleanupJobDelay(chief, worker, ps int, ttl *int32) *kubeflowv1.TFJob { - if chief == 1 { - tfJob := NewTFJobWithChief(worker, ps) - tfJob.Spec.RunPolicy.TTLSecondsAfterFinished = ttl - tfJob.Spec.RunPolicy.CleanPodPolicy = kubeflowv1.CleanPodPolicyPointer(kubeflowv1.CleanPodPolicyNone) - return tfJob - } - tfJob := NewTFJob(worker, ps) - tfJob.Spec.RunPolicy.TTLSecondsAfterFinished = ttl - tfJob.Spec.RunPolicy.CleanPodPolicy = kubeflowv1.CleanPodPolicyPointer(kubeflowv1.CleanPodPolicyNone) - return tfJob -} - -func NewTFJobWithActiveDeadlineSeconds(chief, worker, ps int, ads *int64) *kubeflowv1.TFJob { - if chief == 1 { - tfJob := NewTFJobWithChief(worker, ps) - tfJob.Spec.RunPolicy.ActiveDeadlineSeconds = ads - tfJob.Spec.RunPolicy.CleanPodPolicy = kubeflowv1.CleanPodPolicyPointer(kubeflowv1.CleanPodPolicyAll) - return tfJob - } - tfJob := NewTFJob(worker, ps) - tfJob.Spec.RunPolicy.ActiveDeadlineSeconds = ads - tfJob.Spec.RunPolicy.CleanPodPolicy = kubeflowv1.CleanPodPolicyPointer(kubeflowv1.CleanPodPolicyAll) - return tfJob -} - -func NewTFJobWithBackoffLimit(chief, worker, ps int, backoffLimit *int32) *kubeflowv1.TFJob { - if chief == 1 { - tfJob := NewTFJobWithChief(worker, ps) - tfJob.Spec.RunPolicy.BackoffLimit = backoffLimit - tfJob.Spec.TFReplicaSpecs["Worker"].RestartPolicy = "OnFailure" - tfJob.Spec.RunPolicy.CleanPodPolicy = kubeflowv1.CleanPodPolicyPointer(kubeflowv1.CleanPodPolicyAll) - return tfJob - } - tfJob := NewTFJob(worker, ps) - tfJob.Spec.RunPolicy.BackoffLimit = backoffLimit - tfJob.Spec.TFReplicaSpecs["Worker"].RestartPolicy = "OnFailure" - tfJob.Spec.RunPolicy.CleanPodPolicy = kubeflowv1.CleanPodPolicyPointer(kubeflowv1.CleanPodPolicyAll) - return tfJob -} - -func NewTFJobWithChief(worker, ps int) *kubeflowv1.TFJob { - tfJob := NewTFJob(worker, ps) - chief := int32(1) - tfJob.Spec.TFReplicaSpecs[kubeflowv1.TFJobReplicaTypeChief] = &kubeflowv1.ReplicaSpec{ - Replicas: &chief, - Template: NewTFReplicaSpecTemplate(), - } - return tfJob -} - -func NewTFJobWithEvaluator(worker, ps, evaluator int) *kubeflowv1.TFJob { - tfJob := NewTFJob(worker, ps) - if evaluator > 0 { - evaluator := int32(evaluator) - tfJob.Spec.TFReplicaSpecs[kubeflowv1.TFJobReplicaTypeEval] = &kubeflowv1.ReplicaSpec{ - Replicas: &evaluator, - Template: NewTFReplicaSpecTemplate(), - } - } - return tfJob -} - -func NewTFJobWithSuccessPolicy(worker, ps int, successPolicy kubeflowv1.SuccessPolicy) *kubeflowv1.TFJob { - tfJob := NewTFJob(worker, ps) - tfJob.Spec.SuccessPolicy = &successPolicy - return tfJob -} - -func NewTFJob(worker, ps int) *kubeflowv1.TFJob { - tfJob := &kubeflowv1.TFJob{ - TypeMeta: metav1.TypeMeta{ - Kind: kubeflowv1.TFJobKind, - }, - ObjectMeta: metav1.ObjectMeta{ - Name: TestTFJobName, - Namespace: metav1.NamespaceDefault, - }, - Spec: kubeflowv1.TFJobSpec{ - TFReplicaSpecs: make(map[kubeflowv1.ReplicaType]*kubeflowv1.ReplicaSpec), - }, - } - kubeflowv1.SetObjectDefaults_TFJob(tfJob) - - if worker > 0 { - worker := int32(worker) - workerReplicaSpec := &kubeflowv1.ReplicaSpec{ - Replicas: &worker, - Template: NewTFReplicaSpecTemplate(), - } - tfJob.Spec.TFReplicaSpecs[kubeflowv1.TFJobReplicaTypeWorker] = workerReplicaSpec - } - - if ps > 0 { - ps := int32(ps) - psReplicaSpec := &kubeflowv1.ReplicaSpec{ - Replicas: &ps, - Template: NewTFReplicaSpecTemplate(), - } - tfJob.Spec.TFReplicaSpecs[kubeflowv1.TFJobReplicaTypePS] = psReplicaSpec - } - return tfJob -} - -func NewTFJobV2(worker, ps, master, chief, evaluator int) *kubeflowv1.TFJob { - tfJob := &kubeflowv1.TFJob{ - TypeMeta: metav1.TypeMeta{ - Kind: kubeflowv1.TFJobKind, - }, - ObjectMeta: metav1.ObjectMeta{ - Name: TestTFJobName, - Namespace: metav1.NamespaceDefault, - }, - Spec: kubeflowv1.TFJobSpec{ - TFReplicaSpecs: make(map[kubeflowv1.ReplicaType]*kubeflowv1.ReplicaSpec), - }, - } - kubeflowv1.SetObjectDefaults_TFJob(tfJob) - - if worker > 0 { - worker := int32(worker) - workerReplicaSpec := &kubeflowv1.ReplicaSpec{ - Replicas: &worker, - Template: NewTFReplicaSpecTemplate(), - } - tfJob.Spec.TFReplicaSpecs[kubeflowv1.TFJobReplicaTypeWorker] = workerReplicaSpec - } - - if ps > 0 { - ps := int32(ps) - psReplicaSpec := &kubeflowv1.ReplicaSpec{ - Replicas: &ps, - Template: NewTFReplicaSpecTemplate(), - } - tfJob.Spec.TFReplicaSpecs[kubeflowv1.TFJobReplicaTypePS] = psReplicaSpec - } - - if master > 0 { - master := int32(master) - masterReplicaSpec := &kubeflowv1.ReplicaSpec{ - Replicas: &master, - Template: NewTFReplicaSpecTemplate(), - } - tfJob.Spec.TFReplicaSpecs[kubeflowv1.TFJobReplicaTypeMaster] = masterReplicaSpec - } - - if chief > 0 { - chief := int32(chief) - chiefReplicaSpec := &kubeflowv1.ReplicaSpec{ - Replicas: &chief, - Template: NewTFReplicaSpecTemplate(), - } - tfJob.Spec.TFReplicaSpecs[kubeflowv1.TFJobReplicaTypeChief] = chiefReplicaSpec - } - - if evaluator > 0 { - evaluator := int32(evaluator) - evaluatorReplicaSpec := &kubeflowv1.ReplicaSpec{ - Replicas: &evaluator, - Template: NewTFReplicaSpecTemplate(), - } - tfJob.Spec.TFReplicaSpecs[kubeflowv1.TFJobReplicaTypeChief] = evaluatorReplicaSpec - } - return tfJob -} - -func NewTFJobWithNamespace(worker, ps int, ns string) *kubeflowv1.TFJob { - tfJob := NewTFJob(worker, ps) - tfJob.Namespace = ns - - return tfJob -} - -func NewTFJobWithEvaluatorAndNamespace(worker, ps, evaluator int, ns string) *kubeflowv1.TFJob { - tfJob := NewTFJobWithEvaluator(worker, ps, evaluator) - tfJob.Namespace = ns - - return tfJob -} - -func NewTFReplicaSpecTemplate() v1.PodTemplateSpec { - return v1.PodTemplateSpec{ - Spec: v1.PodSpec{ - Containers: []v1.Container{ - v1.Container{ - Name: kubeflowv1.TFJobDefaultContainerName, - Image: "test-image-for-kubeflow-training-operator:latest", - Args: []string{"Fake", "Fake"}, - Ports: []v1.ContainerPort{ - v1.ContainerPort{ - Name: kubeflowv1.TFJobDefaultPortName, - ContainerPort: kubeflowv1.TFJobDefaultPort, - }, - }, - }, - }, - }, - } -} - -func CheckCondition(tfJob *kubeflowv1.TFJob, condition kubeflowv1.JobConditionType, reason string) bool { - for _, v := range tfJob.Status.Conditions { - if v.Type == condition && v.Status == v1.ConditionTrue && v.Reason == reason { - return true - } - } - return false -} diff --git a/pkg/controller.v1/tensorflow/tfjob_controller.go b/pkg/controller.v1/tensorflow/tfjob_controller.go deleted file mode 100644 index dbaf1c8a72..0000000000 --- a/pkg/controller.v1/tensorflow/tfjob_controller.go +++ /dev/null @@ -1,662 +0,0 @@ -// Copyright 2021 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package tensorflow - -import ( - "context" - "fmt" - "strings" - "time" - - kubeflowv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - trainingoperatorcommon "github.com/kubeflow/training-operator/pkg/common" - "github.com/kubeflow/training-operator/pkg/common/util" - "github.com/kubeflow/training-operator/pkg/controller.v1/common" - "github.com/kubeflow/training-operator/pkg/controller.v1/control" - "github.com/kubeflow/training-operator/pkg/controller.v1/expectation" - commonutil "github.com/kubeflow/training-operator/pkg/util" - - "github.com/go-logr/logr" - "github.com/sirupsen/logrus" - corev1 "k8s.io/api/core/v1" - v1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/errors" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/runtime/schema" - "k8s.io/apimachinery/pkg/types" - utilruntime "k8s.io/apimachinery/pkg/util/runtime" - "k8s.io/client-go/informers" - kubeclientset "k8s.io/client-go/kubernetes" - "k8s.io/client-go/tools/record" - ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/controller" - "sigs.k8s.io/controller-runtime/pkg/event" - "sigs.k8s.io/controller-runtime/pkg/handler" - "sigs.k8s.io/controller-runtime/pkg/log" - "sigs.k8s.io/controller-runtime/pkg/manager" - "sigs.k8s.io/controller-runtime/pkg/predicate" - "sigs.k8s.io/controller-runtime/pkg/source" - schedulerpluginsv1alpha1 "sigs.k8s.io/scheduler-plugins/apis/scheduling/v1alpha1" - "volcano.sh/apis/pkg/apis/scheduling/v1beta1" -) - -const ( - FailedDeleteJobReason = "FailedDeleteJob" - SuccessfulDeleteJobReason = "SuccessfulDeleteJob" - - controllerName = "tfjob-controller" - - // tfConfig is the environment variable name of TensorFlow cluster spec. - tfConfig = "TF_CONFIG" -) - -func NewReconciler(mgr manager.Manager, gangSchedulingSetupFunc common.GangSchedulingSetupFunc) *TFJobReconciler { - r := &TFJobReconciler{ - Client: mgr.GetClient(), - Scheme: mgr.GetScheme(), - recorder: mgr.GetEventRecorderFor(controllerName), - apiReader: mgr.GetAPIReader(), - Log: log.Log, - } - - cfg := mgr.GetConfig() - kubeClientSet := kubeclientset.NewForConfigOrDie(cfg) - sharedInformers := informers.NewSharedInformerFactory(kubeClientSet, 0) - priorityClassInformer := sharedInformers.Scheduling().V1().PriorityClasses() - - r.JobController = common.JobController{ - Controller: r, - Expectations: expectation.NewControllerExpectations(), - WorkQueue: &util.FakeWorkQueue[string]{}, - Recorder: r.recorder, - KubeClientSet: kubeClientSet, - PriorityClassLister: priorityClassInformer.Lister(), - PriorityClassInformerSynced: priorityClassInformer.Informer().HasSynced, - PodControl: control.RealPodControl{KubeClient: kubeClientSet, Recorder: r.recorder}, - ServiceControl: control.RealServiceControl{KubeClient: kubeClientSet, Recorder: r.recorder}, - } - - gangSchedulingSetupFunc(&r.JobController) - - return r -} - -// TFJobReconciler reconciles a TFJob object -type TFJobReconciler struct { - common.JobController - client.Client - Scheme *runtime.Scheme - recorder record.EventRecorder - apiReader client.Reader - Log logr.Logger -} - -// +kubebuilder:rbac:groups=kubeflow.org,resources=tfjobs,verbs=get;list;watch;create;update;patch;delete -// +kubebuilder:rbac:groups=kubeflow.org,resources=tfjobs/status,verbs=get;update;patch -// +kubebuilder:rbac:groups=kubeflow.org,resources=tfjobs/finalizers,verbs=update -// +kubebuilder:rbac:groups="",resources=pods,verbs=get;list;watch;create;update;patch;delete -// +kubebuilder:rbac:groups="",resources=services,verbs=get;list;watch;create;delete -// +kubebuilder:rbac:groups=scheduling.volcano.sh,resources=podgroups,verbs=get;list;watch;create;update;patch;delete -// +kubebuilder:rbac:groups=scheduling.x-k8s.io,resources=podgroups,verbs=get;list;watch;create;update;patch;delete -// +kubebuilder:rbac:groups="",resources=events,verbs=get;list;watch;create;update;patch;delete - -// Reconcile is part of the main kubernetes reconciliation loop which aims to -// move the current state of the cluster closer to the desired state. -func (r *TFJobReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { - _ = log.FromContext(ctx) - logger := r.Log.WithValues(kubeflowv1.TFJobSingular, req.NamespacedName) - - tfjob := &kubeflowv1.TFJob{} - err := r.Get(ctx, req.NamespacedName, tfjob) - if err != nil { - logger.Info(err.Error(), "unable to fetch TFJob", req.NamespacedName.String()) - return ctrl.Result{}, client.IgnoreNotFound(err) - } - - if manager := r.ManagedByExternalController(tfjob.Spec.RunPolicy.ManagedBy); manager != nil { - logger.Info("Skipping TFJob managed by a custom controller", "managed-by", manager) - return ctrl.Result{}, nil - } - - // Check if reconciliation is needed - jobKey, err := common.KeyFunc(tfjob) - if err != nil { - utilruntime.HandleError(fmt.Errorf("couldn't get jobKey for job object %#v: %v", tfjob, err)) - } - - replicaTypes := util.GetReplicaTypes(tfjob.Spec.TFReplicaSpecs) - needReconcile := util.SatisfiedExpectations(r.Expectations, jobKey, replicaTypes) - - if !needReconcile || tfjob.GetDeletionTimestamp() != nil { - logger.Info("reconcile cancelled, job does not need to do reconcile or has been deleted", - "sync", needReconcile, "deleted", tfjob.GetDeletionTimestamp() != nil) - return ctrl.Result{}, nil - } - - // Set default priorities to tfjob - r.Scheme.Default(tfjob) - - // Use common to reconcile the job related pod and service - err = r.ReconcileJobs(tfjob, tfjob.Spec.TFReplicaSpecs, tfjob.Status, &tfjob.Spec.RunPolicy) - if err != nil { - logrus.Warnf("Reconcile Tensorflow Job error %v", err) - return ctrl.Result{}, err - } - - t, err := util.DurationUntilExpireTime(&tfjob.Spec.RunPolicy, tfjob.Status) - if err != nil { - logrus.Warnf("Reconcile Tensorflow Job error %v", err) - return ctrl.Result{}, err - } - if t >= 0 { - return ctrl.Result{Requeue: true, RequeueAfter: t}, nil - } - - return ctrl.Result{}, nil -} - -// SetupWithManager sets up the controller with the Manager. -func (r *TFJobReconciler) SetupWithManager(mgr ctrl.Manager, controllerThreads int) error { - c, err := controller.New(r.ControllerName(), mgr, controller.Options{ - Reconciler: r, - MaxConcurrentReconciles: controllerThreads, - }) - if err != nil { - return err - } - // using onOwnerCreateFunc is easier to set defaults - if err = c.Watch(source.Kind[*kubeflowv1.TFJob](mgr.GetCache(), &kubeflowv1.TFJob{}, - &handler.TypedEnqueueRequestForObject[*kubeflowv1.TFJob]{}, - predicate.TypedFuncs[*kubeflowv1.TFJob]{CreateFunc: r.onOwnerCreateFunc()}), - ); err != nil { - return err - } - // inject watching for job related pod - if err = c.Watch(source.Kind[*corev1.Pod](mgr.GetCache(), &corev1.Pod{}, - handler.TypedEnqueueRequestForOwner[*corev1.Pod](mgr.GetScheme(), mgr.GetRESTMapper(), &kubeflowv1.TFJob{}, handler.OnlyControllerOwner()), - util.OnDependentFuncs[*corev1.Pod](r.Scheme, r.Expectations, &r.JobController))); err != nil { - return err - } - // inject watching for job related service - if err = c.Watch(source.Kind[*corev1.Service](mgr.GetCache(), &corev1.Service{}, - handler.TypedEnqueueRequestForOwner[*corev1.Service](mgr.GetScheme(), mgr.GetRESTMapper(), &kubeflowv1.TFJob{}, handler.OnlyControllerOwner()), - util.OnDependentFuncs[*corev1.Service](r.Scheme, r.Expectations, &r.JobController))); err != nil { - return err - } - // skip watching volcano PodGroup if volcano PodGroup is not installed - if _, err = mgr.GetRESTMapper().RESTMapping(schema.GroupKind{Group: v1beta1.GroupName, Kind: "PodGroup"}, - v1beta1.SchemeGroupVersion.Version); err == nil { - // inject watching for job related volcano PodGroup - if err = c.Watch(source.Kind[*v1beta1.PodGroup](mgr.GetCache(), &v1beta1.PodGroup{}, - handler.TypedEnqueueRequestForOwner[*v1beta1.PodGroup](mgr.GetScheme(), mgr.GetRESTMapper(), &kubeflowv1.TFJob{}, handler.OnlyControllerOwner()), - util.OnDependentFuncs[*v1beta1.PodGroup](r.Scheme, r.Expectations, &r.JobController))); err != nil { - return err - } - } - // skip watching scheduler-plugins PodGroup if scheduler-plugins PodGroup is not installed - if _, err = mgr.GetRESTMapper().RESTMapping(schema.GroupKind{Group: schedulerpluginsv1alpha1.SchemeGroupVersion.Group, Kind: "PodGroup"}, - schedulerpluginsv1alpha1.SchemeGroupVersion.Version); err == nil { - // inject watching for job related scheduler-plugins PodGroup - if err = c.Watch(source.Kind[*schedulerpluginsv1alpha1.PodGroup](mgr.GetCache(), &schedulerpluginsv1alpha1.PodGroup{}, - handler.TypedEnqueueRequestForOwner[*schedulerpluginsv1alpha1.PodGroup](mgr.GetScheme(), mgr.GetRESTMapper(), &kubeflowv1.TFJob{}, handler.OnlyControllerOwner()), - util.OnDependentFuncs[*schedulerpluginsv1alpha1.PodGroup](r.Scheme, r.Expectations, &r.JobController))); err != nil { - return err - } - } - return nil -} - -func (r *TFJobReconciler) ControllerName() string { - return controllerName -} - -func (r *TFJobReconciler) GetAPIGroupVersionKind() schema.GroupVersionKind { - return kubeflowv1.GroupVersion.WithKind(kubeflowv1.TFJobKind) -} - -func (r *TFJobReconciler) GetAPIGroupVersion() schema.GroupVersion { - return kubeflowv1.GroupVersion -} - -func (r *TFJobReconciler) GetGroupNameLabelValue() string { - return kubeflowv1.GroupVersion.Group -} - -func (r *TFJobReconciler) GetFrameworkName() string { - return kubeflowv1.TFJobFrameworkName -} - -func (r *TFJobReconciler) GetJobFromInformerCache(namespace, name string) (metav1.Object, error) { - tfjob := &kubeflowv1.TFJob{} - err := r.Get(context.Background(), types.NamespacedName{ - Namespace: namespace, Name: name, - }, tfjob) - return tfjob, err -} - -func (r *TFJobReconciler) GetJobFromAPIClient(namespace, name string) (metav1.Object, error) { - job := &kubeflowv1.TFJob{} - - err := r.apiReader.Get(context.Background(), types.NamespacedName{Namespace: namespace, Name: name}, job) - if err != nil { - if errors.IsNotFound(err) { - logrus.Error(err, "tensorflow job not found", "namespace", namespace, "name", name) - } else { - logrus.Error(err, "failed to get job from api-server", "namespace", namespace, "name", name) - } - return nil, err - } - return job, nil -} - -// GetPodsForJob returns the set of pods that this job should manage. -// It also reconciles ControllerRef by adopting/orphaning. -// Note that the returned Pods are pointers into the cache. -func (r *TFJobReconciler) GetPodsForJob(jobObject interface{}) ([]*corev1.Pod, error) { - job, ok := jobObject.(metav1.Object) - if !ok { - return nil, fmt.Errorf("job is not of type metav1.Object") - } - - // Create selector. - selector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{ - MatchLabels: r.GenLabels(job.GetName()), - }) - - if err != nil { - return nil, fmt.Errorf("couldn't convert Job selector: %v", err) - } - // List all pods to include those that don't match the selector anymore - // but have a ControllerRef pointing to this controller. - podlist := &corev1.PodList{} - err = r.List(context.Background(), podlist, - client.MatchingLabelsSelector{Selector: selector}, client.InNamespace(job.GetNamespace())) - if err != nil { - return nil, err - } - - pods := util.JobControlledPodList(podlist.Items, job) - - // If any adoptions are attempted, we should first recheck for deletion - // with an uncached quorum read sometime after listing Pods (see #42639). - canAdoptFunc := common.RecheckDeletionTimestamp(func() (metav1.Object, error) { - fresh, err := r.Controller.GetJobFromAPIClient(job.GetNamespace(), job.GetName()) - if err != nil { - return nil, err - } - if fresh.GetUID() != job.GetUID() { - return nil, fmt.Errorf("original Job %v/%v is gone: got uid %v, wanted %v", job.GetNamespace(), job.GetName(), fresh.GetUID(), job.GetUID()) - } - return fresh, nil - }) - cm := control.NewPodControllerRefManager(r.PodControl, job, selector, r.Controller.GetAPIGroupVersionKind(), canAdoptFunc) - return cm.ClaimPods(pods) -} - -// GetServicesForJob returns the set of services that this job should manage. -// It also reconciles ControllerRef by adopting/orphaning. -// Note that the returned services are pointers into the cache. -func (r *TFJobReconciler) GetServicesForJob(jobObject interface{}) ([]*corev1.Service, error) { - job, ok := jobObject.(metav1.Object) - if !ok { - return nil, fmt.Errorf("job is not of type metav1.Object") - } - - // Create selector - selector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{ - MatchLabels: r.GenLabels(job.GetName()), - }) - - if err != nil { - return nil, fmt.Errorf("couldn't convert Job selector: %v", err) - } - // List all services to include those that don't match the selector anymore - // but have a ControllerRef pointing to this controller. - svclist := &corev1.ServiceList{} - err = r.List(context.Background(), svclist, - client.MatchingLabelsSelector{Selector: selector}, client.InNamespace(job.GetNamespace())) - if err != nil { - return nil, fmt.Errorf("couldn't get Service: %v", err) - } - - // If any adoptions are attempted, we should first recheck for deletion - // with an uncached quorum read sometime after listing services (see #42639). - canAdoptFunc := common.RecheckDeletionTimestamp(func() (metav1.Object, error) { - fresh, err := r.GetJobFromInformerCache(job.GetNamespace(), job.GetName()) - if err != nil { - return nil, err - } - if fresh.GetUID() != job.GetUID() { - return nil, fmt.Errorf("original Job %v/%v is gone: got uid %v, wanted %v", job.GetNamespace(), job.GetName(), fresh.GetUID(), job.GetUID()) - } - return fresh, nil - }) - cm := control.NewServiceControllerRefManager(r.ServiceControl, job, selector, r.Controller.GetAPIGroupVersionKind(), canAdoptFunc) - - services := util.ConvertServiceList(svclist.Items) - return cm.ClaimServices(services) -} - -func (r *TFJobReconciler) DeleteJob(job interface{}) error { - tfJob, ok := job.(*kubeflowv1.TFJob) - if !ok { - return fmt.Errorf("%v is not a type of TFJob", tfJob) - } - - log := commonutil.LoggerForJob(tfJob) - if err := r.Delete(context.Background(), tfJob); err != nil { - r.recorder.Eventf(tfJob, v1.EventTypeWarning, FailedDeleteJobReason, "Error deleting: %v", err) - log.Errorf("failed to delete job %s/%s, %v", tfJob.Namespace, tfJob.Name, err) - return err - } - - r.recorder.Eventf(tfJob, v1.EventTypeNormal, SuccessfulDeleteJobReason, "Deleted job: %v", tfJob.Name) - log.Infof("job %s/%s has been deleted", tfJob.Namespace, tfJob.Name) - trainingoperatorcommon.DeletedJobsCounterInc(tfJob.Namespace, r.GetFrameworkName()) - return nil -} - -func (r *TFJobReconciler) UpdateJobStatus(job interface{}, replicas map[kubeflowv1.ReplicaType]*kubeflowv1.ReplicaSpec, jobStatus *kubeflowv1.JobStatus) error { - tfJob, ok := job.(*kubeflowv1.TFJob) - if !ok { - return fmt.Errorf("%v is not a type of TFJob", tfJob) - } - - tfJobKey, err := common.KeyFunc(tfJob) - if err != nil { - utilruntime.HandleError(fmt.Errorf("couldn't get key for tfjob object %#v: %v", tfJob, err)) - return err - } - - logger := commonutil.LoggerForJob(tfJob) - - worker0Completed, err := r.IsWorker0Completed(tfJob, replicas) - if err != nil { - logger.Warnf("check if worker 0 completed error %v", err) - return err - } - - // Set StartTime. - if jobStatus.StartTime == nil { - now := metav1.Now() - jobStatus.StartTime = &now - // enqueue a sync to check if job past ActiveDeadlineSeconds - if tfJob.Spec.RunPolicy.ActiveDeadlineSeconds != nil { - logger.Infof("Job with ActiveDeadlineSeconds will sync after %d seconds", *tfJob.Spec.RunPolicy.ActiveDeadlineSeconds) - // TODO(Jeffwan): requeue job key in reconciler scenarios - r.WorkQueue.AddAfter(tfJobKey, time.Duration(*tfJob.Spec.RunPolicy.ActiveDeadlineSeconds)*time.Second) - } - } - - // For the situation that jobStatus has a restarting condition, and append a running condition, - // the restarting condition will be removed from jobStatus by kubeflowv1.filterOutCondition(), - // so we need to record the existing restarting condition for later use. - var existingRestartingCondition *kubeflowv1.JobCondition - for _, condition := range jobStatus.Conditions { - if condition.Type == kubeflowv1.JobRestarting { - existingRestartingCondition = &kubeflowv1.JobCondition{ - Reason: condition.Reason, - Message: condition.Message, - } - } - } - - // iterate the replica spec based on this order - allTypes := []kubeflowv1.ReplicaType{ - kubeflowv1.TFJobReplicaTypeChief, - kubeflowv1.TFJobReplicaTypeEval, - kubeflowv1.TFJobReplicaTypeMaster, - kubeflowv1.TFJobReplicaTypePS, - kubeflowv1.TFJobReplicaTypeWorker, - } - for _, rtype := range allTypes { - if replicas[rtype] == nil { - continue - } - spec := replicas[rtype] - status := jobStatus.ReplicaStatuses[rtype] - - // Expect to have `replicas - succeeded` pods alive. - succeeded := status.Succeeded - expected := *(spec.Replicas) - succeeded - running := status.Active - failed := status.Failed - - logger.Infof("TFJob=%s/%s, ReplicaType=%s expected=%d, running=%d, failed=%d", - tfJob.Namespace, tfJob.Name, rtype, expected, running, failed) - - // If the TFJob contains Chief or Master spec, then we will update the status - // according to the Chief/Master spec. - if ContainsChiefOrMasterSpec(tfJob.Spec.TFReplicaSpecs) { - if kubeflowv1.IsChiefOrMaster(rtype) { - if running > 0 { - msg := fmt.Sprintf("TFJob %s/%s is running.", tfJob.Namespace, tfJob.Name) - commonutil.UpdateJobConditions(jobStatus, kubeflowv1.JobRunning, corev1.ConditionTrue, commonutil.NewReason(kubeflowv1.TFJobKind, commonutil.JobRunningReason), msg) - } - if expected == 0 { - msg := fmt.Sprintf("TFJob %s/%s successfully completed.", - tfJob.Namespace, tfJob.Name) - r.recorder.Event(tfJob, corev1.EventTypeNormal, commonutil.NewReason(kubeflowv1.TFJobKind, commonutil.JobSucceededReason), msg) - if jobStatus.CompletionTime == nil { - now := metav1.Now() - jobStatus.CompletionTime = &now - } - commonutil.UpdateJobConditions(jobStatus, kubeflowv1.JobSucceeded, corev1.ConditionTrue, commonutil.NewReason(kubeflowv1.TFJobKind, commonutil.JobSucceededReason), msg) - trainingoperatorcommon.SuccessfulJobsCounterInc(tfJob.Namespace, r.GetFrameworkName()) - } - } - } else { - if rtype == kubeflowv1.TFJobReplicaTypeWorker { - // Leave a succeeded condition for the following two cases: - // 1. If default success policy is used and worker 0 has completed. - // 2. If `SuccessPolicyAllWorkers` success policy is used and all workers are succeeded. - if expected == 0 || (worker0Completed && *tfJob.Spec.SuccessPolicy != kubeflowv1.SuccessPolicyAllWorkers) { - msg := fmt.Sprintf("TFJob %s/%s successfully completed.", - tfJob.Namespace, tfJob.Name) - r.recorder.Event(tfJob, corev1.EventTypeNormal, commonutil.NewReason(kubeflowv1.TFJobKind, commonutil.JobSucceededReason), msg) - if jobStatus.CompletionTime == nil { - now := metav1.Now() - jobStatus.CompletionTime = &now - } - commonutil.UpdateJobConditions(jobStatus, kubeflowv1.JobSucceeded, corev1.ConditionTrue, commonutil.NewReason(kubeflowv1.TFJobKind, commonutil.JobSucceededReason), msg) - trainingoperatorcommon.SuccessfulJobsCounterInc(tfJob.Namespace, r.GetFrameworkName()) - } else if running > 0 { - // Some workers are still running, leave a running condition. - msg := fmt.Sprintf("TFJob %s/%s is running.", tfJob.Namespace, tfJob.Name) - commonutil.UpdateJobConditions(jobStatus, kubeflowv1.JobRunning, corev1.ConditionTrue, commonutil.NewReason(kubeflowv1.TFJobKind, commonutil.JobRunningReason), msg) - } - } - } - - if failed > 0 { - // For the situation that jobStatus has a restarting condition, and appends a new running condition, - // the restarting condition will be removed from jobStatus by kubeflowv1.filterOutCondition(), - // so we need to append the restarting condition back to jobStatus. - if existingRestartingCondition != nil { - commonutil.UpdateJobConditions(jobStatus, kubeflowv1.JobRestarting, corev1.ConditionTrue, existingRestartingCondition.Reason, existingRestartingCondition.Message) - // job is restarting, no need to set it failed - // we know it because we update the status condition when reconciling the replicas - trainingoperatorcommon.RestartedJobsCounterInc(tfJob.Namespace, r.GetFrameworkName()) - } else { - if tfJob.Spec.EnableDynamicWorker && rtype == kubeflowv1.TFJobReplicaTypeWorker { - commonutil.LoggerForJob(tfJob).Infof("TFJob %s/%s continues regardless %d Worker replica(s) failed as enableDynamicWorker is set true.", - tfJob.Namespace, tfJob.Name, failed) - continue - } - msg := fmt.Sprintf("TFJob %s/%s has failed because %d %s replica(s) failed.", - tfJob.Namespace, tfJob.Name, failed, rtype) - r.recorder.Event(tfJob, corev1.EventTypeNormal, commonutil.NewReason(kubeflowv1.TFJobKind, commonutil.JobFailedReason), msg) - if jobStatus.CompletionTime == nil { - now := metav1.Now() - jobStatus.CompletionTime = &now - } - commonutil.UpdateJobConditions(jobStatus, kubeflowv1.JobFailed, corev1.ConditionTrue, commonutil.NewReason(kubeflowv1.TFJobKind, commonutil.JobFailedReason), msg) - trainingoperatorcommon.FailedJobsCounterInc(tfJob.Namespace, r.GetFrameworkName()) - } - } - } - // we assign the jobStatus to the tfJob.Status for testing purpose - // it won't effect the main reconcile logic - // because we already use oldStatus := jobStatus.DeepCopy() to record the oldStatus - // and use !reflect.DeepEqual(*oldStatus, jobStatus) to decide whether to update the tfJob or not - tfJob.Status = *jobStatus.DeepCopy() - - return nil -} - -func (r *TFJobReconciler) UpdateJobStatusInApiServer(job interface{}, jobStatus *kubeflowv1.JobStatus) error { - if jobStatus.ReplicaStatuses == nil { - jobStatus.ReplicaStatuses = map[kubeflowv1.ReplicaType]*kubeflowv1.ReplicaStatus{} - } - - tfJob, ok := job.(*kubeflowv1.TFJob) - if !ok { - return fmt.Errorf("%v is not a type of TFJob", tfJob) - } - - startTime := time.Now() - logger := commonutil.LoggerForJob(tfJob) - defer func() { - logger.Infof("Finished updating TFJobs Status %q (%v)", - tfJob.Name, time.Since(startTime)) - }() - - tfJob = tfJob.DeepCopy() - tfJob.Status = *jobStatus.DeepCopy() - - result := r.Status().Update(context.Background(), tfJob) - - if result != nil { - r.Log.WithValues("tfjob", types.NamespacedName{ - Namespace: tfJob.GetNamespace(), - Name: tfJob.GetName(), - }) - return result - } - - return nil -} - -// Same as Func (tc *TFController) SetClusterSpec(...) in pod.go -func (r *TFJobReconciler) SetClusterSpec(job interface{}, podTemplate *corev1.PodTemplateSpec, rtype, index string) error { - tfjob, ok := job.(*kubeflowv1.TFJob) - if !ok { - return fmt.Errorf("%v is not a type of TFJob", tfjob) - } - - // Do not set TF_CONFIG for local training jobs. - if !isDistributed(tfjob) { - return nil - } - // Generate TF_CONFIG JSON string. - tfConfigStr, err := genTFConfigJSONStr(tfjob, rtype, index) - if err != nil { - return err - } - - if tfConfigStr == "" { - return nil - } - // Add TF_CONFIG environment variable to tensorflow container in the pod. - for i := range podTemplate.Spec.Containers { - if podTemplate.Spec.Containers[i].Name == kubeflowv1.TFJobDefaultContainerName { - if len(podTemplate.Spec.Containers[i].Env) == 0 { - podTemplate.Spec.Containers[i].Env = make([]corev1.EnvVar, 0) - } - podTemplate.Spec.Containers[i].Env = append(podTemplate.Spec.Containers[i].Env, corev1.EnvVar{ - Name: tfConfig, - Value: tfConfigStr, - }) - break - } - } - return nil -} - -func (r *TFJobReconciler) GetDefaultContainerName() string { - return kubeflowv1.TFJobDefaultContainerName -} - -func (r *TFJobReconciler) GetDefaultContainerPortName() string { - return kubeflowv1.TFJobDefaultPortName -} - -func (r *TFJobReconciler) IsMasterRole(replicas map[kubeflowv1.ReplicaType]*kubeflowv1.ReplicaSpec, - rtype kubeflowv1.ReplicaType, index int) bool { - if ContainsChiefOrMasterSpec(replicas) { - return rtype == kubeflowv1.TFJobReplicaTypeChief || rtype == kubeflowv1.TFJobReplicaTypeMaster - } - // else check if it is worker with index 0 - return rtype == kubeflowv1.TFJobReplicaTypeWorker && index == 0 -} - -// IsWorker0Completed returns true if pod of worker0 succeeded and exited with 0 -func (r *TFJobReconciler) IsWorker0Completed(tfJob *kubeflowv1.TFJob, replicas map[kubeflowv1.ReplicaType]*kubeflowv1.ReplicaSpec) (bool, error) { - worker0Completed := false - _, ok := replicas[kubeflowv1.TFJobReplicaTypeWorker] - if !ok { - return true, nil - } - podSlices, err := r.getPodSlices(tfJob, replicas[kubeflowv1.TFJobReplicaTypeWorker].Replicas) - if err != nil { - return false, err - } - for index, podSlice := range podSlices { - if len(podSlice) == 1 { - pod := podSlice[0] - exitCode := getContainerExitCode(pod) - if index == 0 && exitCode == 0 && pod.Status.Phase == v1.PodSucceeded { - worker0Completed = true - } - } - } - return worker0Completed, nil -} - -// getPodSlices returns a slice, which element is the slice of pod. -// It gives enough information to caller to make decision to up/down scale resources. -func (r *TFJobReconciler) getPodSlices(tfjob *kubeflowv1.TFJob, replicasNum *int32) ([][]*v1.Pod, error) { - logger := commonutil.LoggerForReplica(tfjob, strings.ToLower(string(kubeflowv1.TFJobReplicaTypeWorker))) - - pods, err := r.GetPodsForJob(tfjob) - if err != nil { - commonutil.LoggerForJob(tfjob).Warnf("getPodsForTFJob error %v", err) - return nil, err - } - - // Get all pods for the type rt. - pods, err = r.JobController.FilterPodsForReplicaType(pods, strings.ToLower(string(kubeflowv1.TFJobReplicaTypeWorker))) - if err != nil { - return nil, err - } - - podSlices := r.GetPodSlices(pods, int(*replicasNum), logger) - return podSlices, nil -} - -// onOwnerCreateFunc modify creation condition. -func (r *TFJobReconciler) onOwnerCreateFunc() func(createEvent event.TypedCreateEvent[*kubeflowv1.TFJob]) bool { - return func(e event.TypedCreateEvent[*kubeflowv1.TFJob]) bool { - tfJob := e.Object - r.Scheme.Default(tfJob) - msg := fmt.Sprintf("TFJob %s is created.", e.Object.GetName()) - logrus.Info(msg) - trainingoperatorcommon.CreatedJobsCounterInc(tfJob.Namespace, r.GetFrameworkName()) - commonutil.UpdateJobConditions(&tfJob.Status, kubeflowv1.JobCreated, corev1.ConditionTrue, commonutil.NewReason(kubeflowv1.TFJobKind, commonutil.JobCreatedReason), msg) - return true - } -} diff --git a/pkg/controller.v1/tensorflow/tfjob_controller_test.go b/pkg/controller.v1/tensorflow/tfjob_controller_test.go deleted file mode 100644 index 53265e358c..0000000000 --- a/pkg/controller.v1/tensorflow/tfjob_controller_test.go +++ /dev/null @@ -1,671 +0,0 @@ -// Copyright 2021 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package tensorflow - -import ( - "context" - "fmt" - - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" - corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/errors" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/types" - "k8s.io/apimachinery/pkg/util/uuid" - "k8s.io/utils/ptr" - "sigs.k8s.io/controller-runtime/pkg/client" - - kubeflowv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - tftestutil "github.com/kubeflow/training-operator/pkg/controller.v1/tensorflow/testutil" - commonutil "github.com/kubeflow/training-operator/pkg/util" - "github.com/kubeflow/training-operator/pkg/util/testutil" -) - -var _ = Describe("TFJob controller", func() { - Context("Test Normal Path", func() { - It("should create desired Pods and Services", func() { - var ( - tfJobRunning = kubeflowv1.JobRunning - tfJobSucceeded = kubeflowv1.JobSucceeded - ) - - testCases := map[string]struct { - worker int - ps int - - // pod setup - // ControllerError error - // jobKeyForget bool - - pendingWorkerPods int32 - activeWorkerPods int32 - succeededWorkerPods int32 - failedWorkerPods int32 - - pendingPSPods int32 - activePSPods int32 - succeededPSPods int32 - failedPSPods int32 - - activeWorkerServices int32 - activePSServices int32 - - // expectations - expectedPodCreations int32 - expectedPodDeletions int32 - expectedServiceCreations int32 - - expectedActiveWorkerPods int32 - expectedSucceededWorkerPods int32 - expectedFailedWorkerPods int32 - - expectedActivePSPods int32 - expectedSucceededPSPods int32 - expectedFailedPSPods int32 - - expectedCondition *kubeflowv1.JobConditionType - expectedConditionReason string - - // There are some cases that should not check start time since the field should be set in the previous sync loop. - needCheckStartTime bool - }{ - "Local TFJob is created": { - 1, 0, - 0, 0, 0, 0, - 0, 0, 0, 0, - 0, 0, - 1, 0, 1, - 0, 0, 0, - 0, 0, 0, - // We can not check if it is created since the condition is set in addTFJob. - nil, "", - false, - }, - "Distributed TFJob (4 workers, 2 PS) is created": { - 4, 2, - 0, 0, 0, 0, - 0, 0, 0, 0, - 0, 0, - 6, 0, 6, - 0, 0, 0, - 0, 0, 0, - nil, "", - false, - }, - "Distributed TFJob (4 workers, 2 PS) is created and all replicas are pending": { - 4, 2, - 4, 0, 0, 0, - 2, 0, 0, 0, - 4, 2, - 0, 0, 0, - 0, 0, 0, - 0, 0, 0, - nil, "", - false, - }, - "Distributed TFJob (4 workers, 2 PS) is created and all replicas are running": { - 4, 2, - 0, 4, 0, 0, - 0, 2, 0, 0, - 4, 2, - 0, 0, 0, - 4, 0, 0, - 2, 0, 0, - &tfJobRunning, commonutil.NewReason(kubeflowv1.TFJobKind, commonutil.JobRunningReason), - true, - }, - "Distributed TFJob (4 workers, 2 PS) is created, 2 workers, 1 PS are pending": { - 4, 2, - 2, 0, 0, 0, - 1, 0, 0, 0, - 2, 1, - 3, 0, 3, - 0, 0, 0, - 0, 0, 0, - nil, "", - false, - }, - "Distributed TFJob (4 workers, 2 PS) is created, 2 workers, 1 PS are pending, 1 worker is running": { - 4, 2, - 2, 1, 0, 0, - 1, 0, 0, 0, - 3, 1, - 2, 0, 2, - 1, 0, 0, - 0, 0, 0, - &tfJobRunning, commonutil.NewReason(kubeflowv1.TFJobKind, commonutil.JobRunningReason), - false, - }, - "Distributed TFJob (4 workers, 2 PS) is created, 2 workers, 1 PS are pending, 1 worker is succeeded": { - 4, 2, - 2, 0, 1, 0, - 1, 0, 0, 0, - 3, 1, - 2, 0, 2, - 0, 1, 0, - 0, 0, 0, - nil, "", - false, - }, - "Distributed TFJob (4 workers, 2 PS) is succeeded": { - 4, 2, - 0, 0, 4, 0, - 0, 0, 2, 0, - 4, 2, - 0, 0, 0, - 0, 4, 0, - 0, 2, 0, - &tfJobSucceeded, commonutil.NewReason(kubeflowv1.TFJobKind, commonutil.JobSucceededReason), - false, - }, - } - - jobNameTemplate := "test-case-norm-%d" - caseIdx := 0 - for name, tc := range testCases { - By(name) - ctx := context.Background() - jobName := fmt.Sprintf(jobNameTemplate, caseIdx) - caseIdx++ - - tfJob := tftestutil.NewTFJob(tc.worker, tc.ps) - tfJob.SetName(jobName) - tfJob.SetUID(uuid.NewUUID()) - - refs := []metav1.OwnerReference{*reconciler.GenOwnerReference(tfJob)} - basicLabels := reconciler.GenLabels(tfJob.GetName()) - - tftestutil.SetPodsStatuses(testK8sClient, tfJob, kubeflowv1.TFJobReplicaTypeWorker, tc.pendingWorkerPods, tc.activeWorkerPods, tc.succeededWorkerPods, tc.failedWorkerPods, nil, refs, basicLabels) - tftestutil.SetPodsStatuses(testK8sClient, tfJob, kubeflowv1.TFJobReplicaTypePS, tc.pendingPSPods, tc.activePSPods, tc.succeededPSPods, tc.failedPSPods, nil, refs, basicLabels) - - tftestutil.SetServices(testK8sClient, tfJob, kubeflowv1.TFJobReplicaTypeWorker, tc.activeWorkerServices, refs, basicLabels) - tftestutil.SetServices(testK8sClient, tfJob, kubeflowv1.TFJobReplicaTypePS, tc.activePSServices, refs, basicLabels) - - totalPodNumber := int(tc.pendingWorkerPods + tc.activeWorkerPods + tc.succeededWorkerPods + tc.failedWorkerPods + tc.pendingPSPods + tc.activePSPods + tc.succeededPSPods + tc.failedPSPods) - totalServiceNumber := int(tc.activeWorkerServices + tc.activePSServices) - - selector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{MatchLabels: reconciler.GenLabels(tfJob.GetName())}) - Expect(err).Should(BeNil()) - listOpt := client.MatchingLabelsSelector{Selector: selector} - Eventually(func() error { - podList := &corev1.PodList{} - svcList := &corev1.ServiceList{} - - err = testK8sClient.List(ctx, podList, listOpt) - if err != nil { - return err - } - if len(podList.Items) != totalPodNumber { - return fmt.Errorf("expected %d Pods, got %d", totalPodNumber, len(podList.Items)) - } - - err = testK8sClient.List(ctx, svcList, listOpt) - if err != nil { - return err - } - if len(svcList.Items) != totalServiceNumber { - return fmt.Errorf("expected %d Services, got %d", totalServiceNumber, len(svcList.Items)) - } - return nil - }).Should(BeNil()) - - _ = reconciler.ReconcileJobs(tfJob, tfJob.Spec.TFReplicaSpecs, tfJob.Status, &tfJob.Spec.RunPolicy) - - // Check the number of Pods and Services - //var pods []*corev1.Pod = nil - //var svcs []*corev1.Service = nil - Eventually(func() error { - podList := &corev1.PodList{} - svcList := &corev1.ServiceList{} - - err = testK8sClient.List(ctx, podList, listOpt) - if err != nil { - return err - } - podCreatedNumber := 0 - if len(podList.Items) > totalPodNumber { - podCreatedNumber = len(podList.Items) - totalPodNumber - } - podDeletedNumber := 0 - if len(podList.Items) < totalPodNumber { - podDeletedNumber = totalPodNumber - len(podList.Items) - } - if podCreatedNumber != int(tc.expectedPodCreations) { - return fmt.Errorf("%s: unexpected number of pod creates. Expected %d, saw %d\n", name, tc.expectedPodCreations, podCreatedNumber) - } - if podDeletedNumber != int(tc.expectedPodDeletions) { - return fmt.Errorf("%s: unexpected number of service creates. Expected %d, saw %d\n", name, tc.expectedServiceCreations, podDeletedNumber) - } - // check controller references for all pods - for _, p := range podList.Items { - for _, ref := range p.GetOwnerReferences() { - if ref.APIVersion != kubeflowv1.SchemeGroupVersion.String() { - return fmt.Errorf("controllerRef.APIVersion = %q, want %q", ref.APIVersion, kubeflowv1.SchemeGroupVersion.String()) - } - if ref.Kind != kubeflowv1.TFJobKind { - return fmt.Errorf("controllerRef.MPIKind = %q, want %q", ref.Kind, kubeflowv1.TFJobKind) - } - if ref.Name != tfJob.GetName() { - return fmt.Errorf("controllerRef.Name = %q, want %q", ref.Name, tfJob.GetName()) - } - if ref.UID != tfJob.GetUID() { - return fmt.Errorf("controllerRef.UID = %q, want %q", ref.UID, tfJob.GetUID()) - } - } - } - - err = testK8sClient.List(ctx, svcList, listOpt) - if err != nil { - return err - } - serviceCreatedNumber := 0 - if len(svcList.Items) > totalServiceNumber { - serviceCreatedNumber = len(svcList.Items) - totalServiceNumber - } - if serviceCreatedNumber != int(tc.expectedServiceCreations) { - return fmt.Errorf("%s: unexpected number of pod deletes. Expected %d, saw %d\n", name, tc.expectedPodDeletions, serviceCreatedNumber) - } - // check controller reference for all services - for _, s := range svcList.Items { - for _, ref := range s.GetOwnerReferences() { - if ref.APIVersion != kubeflowv1.SchemeGroupVersion.String() { - return fmt.Errorf("controllerRef.APIVersion = %q, want %q", ref.APIVersion, kubeflowv1.SchemeGroupVersion.String()) - } - if ref.Kind != kubeflowv1.TFJobKind { - return fmt.Errorf("controllerRef.MPIKind = %q, want %q", ref.Kind, kubeflowv1.TFJobKind) - } - if ref.Name != tfJob.GetName() { - return fmt.Errorf("controllerRef.Name = %q, want %q", ref.Name, tfJob.GetName()) - } - if ref.UID != tfJob.GetUID() { - return fmt.Errorf("controllerRef.UID = %q, want %q", ref.UID, tfJob.GetUID()) - } - } - } - return nil - }).Should(BeNil()) - - // Validate Worker status - if tfJob.Status.ReplicaStatuses[kubeflowv1.TFJobReplicaTypeWorker] != nil { - Expect(tfJob.Status.ReplicaStatuses[kubeflowv1.TFJobReplicaTypeWorker].Active).To(Equal(tc.expectedActiveWorkerPods)) - Expect(tfJob.Status.ReplicaStatuses[kubeflowv1.TFJobReplicaTypeWorker].Succeeded).To(Equal(tc.expectedSucceededWorkerPods)) - Expect(tfJob.Status.ReplicaStatuses[kubeflowv1.TFJobReplicaTypeWorker].Failed).To(Equal(tc.expectedFailedWorkerPods)) - } - // Validate PS status - if tfJob.Status.ReplicaStatuses[kubeflowv1.TFJobReplicaTypePS] != nil { - Expect(tfJob.Status.ReplicaStatuses[kubeflowv1.TFJobReplicaTypePS].Active).To(Equal(tc.expectedActivePSPods)) - Expect(tfJob.Status.ReplicaStatuses[kubeflowv1.TFJobReplicaTypePS].Succeeded).To(Equal(tc.expectedSucceededPSPods)) - Expect(tfJob.Status.ReplicaStatuses[kubeflowv1.TFJobReplicaTypePS].Failed).To(Equal(tc.expectedFailedPSPods)) - } - - // Validate StartTime - if tc.needCheckStartTime { - Expect(tfJob.Status.StartTime).NotTo(BeNil()) - } - - // Validate Conditions - if tc.expectedCondition != nil { - Expect(tftestutil.CheckCondition(tfJob, *tc.expectedCondition, tc.expectedConditionReason)).Should(BeTrue()) - } - } - }) - }) - - Context("TFJob with suspend semantics", func() { - const name = "test-job" - var ( - ns *corev1.Namespace - job *kubeflowv1.TFJob - jobKey types.NamespacedName - chiefKey types.NamespacedName - worker0Key types.NamespacedName - ctx = context.Background() - ) - BeforeEach(func() { - ns = &corev1.Namespace{ - ObjectMeta: metav1.ObjectMeta{ - GenerateName: "tensorflow-test-", - }, - } - Expect(testK8sClient.Create(ctx, ns)).Should(Succeed()) - - // chief=1, worker=1 - job = tftestutil.NewTFJobV2(1, 0, 0, 1, 0) - job.SetName(name) - job.SetNamespace(ns.Name) - jobKey = client.ObjectKeyFromObject(job) - chiefKey = types.NamespacedName{ - Name: fmt.Sprintf("%s-chief-0", name), - Namespace: ns.Name, - } - worker0Key = types.NamespacedName{ - Name: fmt.Sprintf("%s-worker-0", name), - Namespace: ns.Name, - } - }) - AfterEach(func() { - Expect(testK8sClient.Delete(ctx, job)).Should(Succeed()) - Expect(testK8sClient.Delete(ctx, ns)).Should(Succeed()) - }) - - It("Shouldn't create resources if TFJob is suspended", func() { - By("By creating a new TFJob with suspend=true") - job.Spec.RunPolicy.Suspend = ptr.To(true) - Expect(testK8sClient.Create(ctx, job)).Should(Succeed()) - - created := &kubeflowv1.TFJob{} - chiefPod := &corev1.Pod{} - workerPod := &corev1.Pod{} - chiefSvc := &corev1.Service{} - workerSvc := &corev1.Service{} - - By("Checking created TFJob") - Eventually(func() bool { - err := testK8sClient.Get(ctx, jobKey, created) - return err == nil - }, testutil.Timeout, testutil.Interval).Should(BeTrue()) - By("Checking created TFJob has a nil startTime") - Consistently(func() *metav1.Time { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - return created.Status.StartTime - }, testutil.ConsistentDuration, testutil.Interval).Should(BeNil()) - - By("Checking if the pods and services aren't created") - Consistently(func() bool { - errChiefPod := testK8sClient.Get(ctx, chiefKey, chiefPod) - errWorkerPod := testK8sClient.Get(ctx, worker0Key, workerPod) - errChiefSvc := testK8sClient.Get(ctx, chiefKey, chiefSvc) - errWorkerSvc := testK8sClient.Get(ctx, worker0Key, workerSvc) - return errors.IsNotFound(errChiefPod) && errors.IsNotFound(errWorkerPod) && - errors.IsNotFound(errChiefSvc) && errors.IsNotFound(errWorkerSvc) - }, testutil.ConsistentDuration, testutil.Interval).Should(BeTrue()) - - By("Checking if the TFJob has suspended condition") - Eventually(func() []kubeflowv1.JobCondition { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - return created.Status.Conditions - }, testutil.ConsistentDuration, testutil.Interval).Should(BeComparableTo([]kubeflowv1.JobCondition{ - { - Type: kubeflowv1.JobCreated, - Status: corev1.ConditionTrue, - Reason: commonutil.NewReason(kubeflowv1.TFJobKind, commonutil.JobCreatedReason), - Message: fmt.Sprintf("TFJob %s is created.", name), - }, - { - Type: kubeflowv1.JobSuspended, - Status: corev1.ConditionTrue, - Reason: commonutil.NewReason(kubeflowv1.TFJobKind, commonutil.JobSuspendedReason), - Message: fmt.Sprintf("TFJob %s is suspended.", name), - }, - }, testutil.IgnoreJobConditionsTimes)) - }) - - It("Should delete resources after TFJob is suspended; Should resume TFJob after TFJob is unsuspended", func() { - By("By creating a new TFJob") - Expect(testK8sClient.Create(ctx, job)).Should(Succeed()) - - created := &kubeflowv1.TFJob{} - chiefPod := &corev1.Pod{} - workerPod := &corev1.Pod{} - chiefSvc := &corev1.Service{} - workerSvc := &corev1.Service{} - - // We'll need to retry getting this newly created TFJob, given that creation may not immediately happen. - By("Checking created TFJob") - Eventually(func() bool { - err := testK8sClient.Get(ctx, jobKey, created) - return err == nil - }, testutil.Timeout, testutil.Interval).Should(BeTrue()) - - var startTimeBeforeSuspended *metav1.Time - Eventually(func() *metav1.Time { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - startTimeBeforeSuspended = created.Status.StartTime - return startTimeBeforeSuspended - }, testutil.Timeout, testutil.Interval).ShouldNot(BeNil()) - - By("Checking the created pods and services") - Eventually(func() bool { - errChief := testK8sClient.Get(ctx, chiefKey, chiefPod) - errWorker := testK8sClient.Get(ctx, worker0Key, workerPod) - return errChief == nil && errWorker == nil - }, testutil.Timeout, testutil.Interval).Should(BeTrue()) - Eventually(func() bool { - errChief := testK8sClient.Get(ctx, chiefKey, chiefSvc) - errWorker := testK8sClient.Get(ctx, worker0Key, workerSvc) - return errChief == nil && errWorker == nil - }, testutil.Timeout, testutil.Interval).Should(BeTrue()) - - By("Updating the pod's phase with Running") - Eventually(func() error { - Expect(testK8sClient.Get(ctx, chiefKey, chiefPod)).Should(Succeed()) - chiefPod.Status.Phase = corev1.PodRunning - return testK8sClient.Status().Update(ctx, chiefPod) - }, testutil.Timeout, testutil.Interval).Should(Succeed()) - Eventually(func() error { - Expect(testK8sClient.Get(ctx, worker0Key, workerPod)).Should(Succeed()) - workerPod.Status.Phase = corev1.PodRunning - return testK8sClient.Status().Update(ctx, workerPod) - }, testutil.Timeout, testutil.Interval).Should(Succeed()) - - By("Checking the TFJob's condition") - Eventually(func() []kubeflowv1.JobCondition { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - return created.Status.Conditions - }, testutil.Timeout, testutil.Interval).Should(BeComparableTo([]kubeflowv1.JobCondition{ - { - Type: kubeflowv1.JobCreated, - Status: corev1.ConditionTrue, - Reason: commonutil.NewReason(kubeflowv1.TFJobKind, commonutil.JobCreatedReason), - Message: fmt.Sprintf("TFJob %s is created.", name), - }, - { - Type: kubeflowv1.JobRunning, - Status: corev1.ConditionTrue, - Reason: commonutil.NewReason(kubeflowv1.TFJobKind, commonutil.JobRunningReason), - Message: fmt.Sprintf("TFJob %s/%s is running.", ns.Name, name), - }, - }, testutil.IgnoreJobConditionsTimes)) - - By("Updating the TFJob with suspend=true") - Eventually(func() error { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - created.Spec.RunPolicy.Suspend = ptr.To(true) - return testK8sClient.Update(ctx, created) - }, testutil.Timeout, testutil.Interval).Should(Succeed()) - - By("Checking if the pods and services are removed") - Eventually(func() bool { - errChief := testK8sClient.Get(ctx, chiefKey, chiefPod) - errWorker := testK8sClient.Get(ctx, worker0Key, workerPod) - return errors.IsNotFound(errChief) && errors.IsNotFound(errWorker) - }, testutil.Timeout, testutil.Interval).Should(BeTrue()) - Eventually(func() bool { - errChief := testK8sClient.Get(ctx, chiefKey, chiefSvc) - errWorker := testK8sClient.Get(ctx, worker0Key, workerSvc) - return errors.IsNotFound(errChief) && errors.IsNotFound(errWorker) - }, testutil.Timeout, testutil.Interval).Should(BeTrue()) - Consistently(func() bool { - errChiefPod := testK8sClient.Get(ctx, chiefKey, chiefPod) - errWorkerPod := testK8sClient.Get(ctx, worker0Key, workerPod) - errChiefSvc := testK8sClient.Get(ctx, chiefKey, chiefSvc) - errWorkerSvc := testK8sClient.Get(ctx, worker0Key, workerSvc) - return errors.IsNotFound(errChiefPod) && errors.IsNotFound(errWorkerPod) && - errors.IsNotFound(errChiefSvc) && errors.IsNotFound(errWorkerSvc) - }, testutil.ConsistentDuration, testutil.Interval).Should(BeTrue()) - - By("Checking if the TFJob has a suspended condition") - Eventually(func() bool { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - return created.Status.ReplicaStatuses[kubeflowv1.TFJobReplicaTypeChief].Active == 0 && - created.Status.ReplicaStatuses[kubeflowv1.TFJobReplicaTypeWorker].Active == 0 && - created.Status.StartTime.Equal(startTimeBeforeSuspended) - }, testutil.Timeout, testutil.Interval).Should(BeTrue()) - Consistently(func() bool { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - return created.Status.ReplicaStatuses[kubeflowv1.TFJobReplicaTypeChief].Active == 0 && - created.Status.ReplicaStatuses[kubeflowv1.TFJobReplicaTypeWorker].Active == 0 && - created.Status.StartTime.Equal(startTimeBeforeSuspended) - }, testutil.ConsistentDuration, testutil.Interval).Should(BeTrue()) - Expect(created.Status.Conditions).Should(BeComparableTo([]kubeflowv1.JobCondition{ - { - Type: kubeflowv1.JobCreated, - Status: corev1.ConditionTrue, - Reason: commonutil.NewReason(kubeflowv1.TFJobKind, commonutil.JobCreatedReason), - Message: fmt.Sprintf("TFJob %s is created.", name), - }, - { - Type: kubeflowv1.JobRunning, - Status: corev1.ConditionFalse, - Reason: commonutil.NewReason(kubeflowv1.TFJobKind, commonutil.JobSuspendedReason), - Message: fmt.Sprintf("TFJob %s is suspended.", name), - }, - { - Type: kubeflowv1.JobSuspended, - Reason: commonutil.NewReason(kubeflowv1.TFJobKind, commonutil.JobSuspendedReason), - Message: fmt.Sprintf("TFJob %s is suspended.", name), - Status: corev1.ConditionTrue, - }, - }, testutil.IgnoreJobConditionsTimes)) - - By("Unsuspending the TFJob") - Eventually(func() error { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - created.Spec.RunPolicy.Suspend = ptr.To(false) - return testK8sClient.Update(ctx, created) - }, testutil.Timeout, testutil.Interval).Should(Succeed()) - Eventually(func() *metav1.Time { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - return created.Status.StartTime - }, testutil.Timeout, testutil.Interval).ShouldNot(BeNil()) - - By("Check if the pods and services are created") - Eventually(func() error { - return testK8sClient.Get(ctx, chiefKey, chiefPod) - }, testutil.Timeout, testutil.Interval).Should(BeNil()) - Eventually(func() error { - return testK8sClient.Get(ctx, worker0Key, workerPod) - }, testutil.Timeout, testutil.Interval).Should(BeNil()) - Eventually(func() error { - return testK8sClient.Get(ctx, chiefKey, chiefSvc) - }, testutil.Timeout, testutil.Interval).Should(BeNil()) - Eventually(func() error { - return testK8sClient.Get(ctx, worker0Key, workerSvc) - }, testutil.Timeout, testutil.Interval).Should(BeNil()) - - By("Updating Pod's condition with running") - Eventually(func() error { - Expect(testK8sClient.Get(ctx, chiefKey, chiefPod)).Should(Succeed()) - chiefPod.Status.Phase = corev1.PodRunning - return testK8sClient.Status().Update(ctx, chiefPod) - }, testutil.Timeout, testutil.Interval).Should(Succeed()) - Eventually(func() error { - Expect(testK8sClient.Get(ctx, worker0Key, workerPod)).Should(Succeed()) - workerPod.Status.Phase = corev1.PodRunning - return testK8sClient.Status().Update(ctx, workerPod) - }, testutil.Timeout, testutil.Interval).Should(Succeed()) - - By("Checking if the TFJob has resumed conditions") - Eventually(func() []kubeflowv1.JobCondition { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - return created.Status.Conditions - }, testutil.Timeout, testutil.Interval).Should(BeComparableTo([]kubeflowv1.JobCondition{ - { - Type: kubeflowv1.JobCreated, - Status: corev1.ConditionTrue, - Reason: commonutil.NewReason(kubeflowv1.TFJobKind, commonutil.JobCreatedReason), - Message: fmt.Sprintf("TFJob %s is created.", name), - }, - { - Type: kubeflowv1.JobSuspended, - Reason: commonutil.NewReason(kubeflowv1.TFJobKind, commonutil.JobResumedReason), - Message: fmt.Sprintf("TFJob %s is resumed.", name), - Status: corev1.ConditionFalse, - }, - { - Type: kubeflowv1.JobRunning, - Status: corev1.ConditionTrue, - Reason: commonutil.NewReason(kubeflowv1.TFJobKind, commonutil.JobRunningReason), - Message: fmt.Sprintf("TFJob %s/%s is running.", ns.Name, name), - }, - }, testutil.IgnoreJobConditionsTimes)) - - By("Checking if the startTime is updated") - Expect(created.Status.StartTime).ShouldNot(Equal(startTimeBeforeSuspended)) - }) - - It("Should not reconcile a job while managed by external controller", func() { - By("Creating a TFJob managed by external controller") - job.Spec.RunPolicy = kubeflowv1.RunPolicy{ - ManagedBy: ptr.To(kubeflowv1.MultiKueueController), - } - job.Spec.RunPolicy.Suspend = ptr.To(true) - Expect(testK8sClient.Create(ctx, job)).Should(Succeed()) - - created := &kubeflowv1.TFJob{} - By("Checking created TFJob") - Eventually(func() bool { - err := testK8sClient.Get(ctx, jobKey, created) - return err == nil - }, testutil.Timeout, testutil.Interval).Should(BeTrue()) - - By("Checking created TFJob has a nil startTime") - Consistently(func() *metav1.Time { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - return created.Status.StartTime - }, testutil.ConsistentDuration, testutil.Interval).Should(BeNil()) - - By("Checking if the pods and services aren't created") - Consistently(func() bool { - chiefPod := &corev1.Pod{} - workerPod := &corev1.Pod{} - chiefSvc := &corev1.Service{} - workerSvc := &corev1.Service{} - errMasterPod := testK8sClient.Get(ctx, chiefKey, chiefPod) - errWorkerPod := testK8sClient.Get(ctx, worker0Key, workerPod) - errMasterSvc := testK8sClient.Get(ctx, chiefKey, chiefSvc) - errWorkerSvc := testK8sClient.Get(ctx, worker0Key, workerSvc) - return errors.IsNotFound(errMasterPod) && errors.IsNotFound(errWorkerPod) && - errors.IsNotFound(errMasterSvc) && errors.IsNotFound(errWorkerSvc) - }, testutil.ConsistentDuration, testutil.Interval).Should(BeTrue(), "pods and services should be created by external controller (here not existent)") - - By("Checking if the TFJob status was not updated") - Eventually(func() []kubeflowv1.JobCondition { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - return created.Status.Conditions - }, testutil.Timeout, testutil.Interval).Should(BeComparableTo([]kubeflowv1.JobCondition(nil))) - - By("Unsuspending the TFJob") - Eventually(func() error { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - created.Spec.RunPolicy.Suspend = ptr.To(false) - return testK8sClient.Update(ctx, created) - }, testutil.Timeout, testutil.Interval).Should(Succeed()) - - By("Checking created TFJob still has a nil startTime") - Consistently(func() *metav1.Time { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - return created.Status.StartTime - }, testutil.ConsistentDuration, testutil.Interval).Should(BeNil()) - - By("Checking if the TFJob status was not updated, even after unsuspending") - Eventually(func() []kubeflowv1.JobCondition { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - return created.Status.Conditions - }, testutil.Timeout, testutil.Interval).Should(BeComparableTo([]kubeflowv1.JobCondition(nil))) - }) - }) -}) diff --git a/pkg/controller.v1/tensorflow/util.go b/pkg/controller.v1/tensorflow/util.go deleted file mode 100644 index 0b4d3e3be9..0000000000 --- a/pkg/controller.v1/tensorflow/util.go +++ /dev/null @@ -1,118 +0,0 @@ -// Copyright 2018 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package tensorflow - -import ( - corev1 "k8s.io/api/core/v1" - - kubeflowv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" -) - -// GetPortFromTFJob gets the port of tensorflow container. -func GetPortFromTFJob(tfJob *kubeflowv1.TFJob, rtype kubeflowv1.ReplicaType) (int32, error) { - containers := tfJob.Spec.TFReplicaSpecs[rtype].Template.Spec.Containers - for _, container := range containers { - if container.Name == kubeflowv1.TFJobDefaultContainerName { - ports := container.Ports - for _, port := range ports { - if port.Name == kubeflowv1.TFJobDefaultPortName { - return port.ContainerPort, nil - } - } - } - } - return kubeflowv1.TFJobDefaultPort, nil -} - -// ContainsChiefOrMasterSpec returns true if the tfjob contains chief or master spec. -func ContainsChiefOrMasterSpec(replicas map[kubeflowv1.ReplicaType]*kubeflowv1.ReplicaSpec) bool { - if _, ok := replicas[kubeflowv1.TFJobReplicaTypeChief]; ok { - return true - } else if _, ok := replicas[kubeflowv1.TFJobReplicaTypeMaster]; ok { - return true - } - return false -} - -// originally from pkg/controller.v1/tensorflow/pod.go (deleted) -func getContainerExitCode(pod *corev1.Pod) int32 { - var exitCode int32 = 0xbeef // magic number - for _, status := range pod.Status.ContainerStatuses { - state := status.State - if status.Name == kubeflowv1.TFJobDefaultContainerName && state.Terminated != nil { - exitCode = state.Terminated.ExitCode - } - } - return exitCode -} - -// originally from pkg/controller.v1/tensorflow/pod.go (deleted) -func setRestartPolicy(podTemplateSpec *corev1.PodTemplateSpec, spec *kubeflowv1.ReplicaSpec) { - // This is necessary since restartPolicyExitCode is not supported in v1.PodTemplateSpec - if spec.RestartPolicy == kubeflowv1.RestartPolicyExitCode { - podTemplateSpec.Spec.RestartPolicy = corev1.RestartPolicyNever - } else { - podTemplateSpec.Spec.RestartPolicy = corev1.RestartPolicy(spec.RestartPolicy) - } -} - -// isDistributed returns if the TFJob is a distributed training job. -// Ref https://github.com/kubeflow/training-operator/issues/1078. -// originally from pkg/controller.v1/tensorflow/pod.go (deleted) -func isDistributed(tfjob *kubeflowv1.TFJob) bool { - replicas := tfjob.Spec.TFReplicaSpecs - distributionCount := 0 - allTypes := []kubeflowv1.ReplicaType{ - kubeflowv1.TFJobReplicaTypeChief, - kubeflowv1.TFJobReplicaTypeEval, - kubeflowv1.TFJobReplicaTypeMaster, - kubeflowv1.TFJobReplicaTypePS, - kubeflowv1.TFJobReplicaTypeWorker, - } - // Check if there is only one replica. - for _, typ := range allTypes { - if replicas[typ] != nil { - if replicas[typ].Replicas == nil { - distributionCount++ - } else { - distributionCount += int(*replicas[typ].Replicas) - } - } - } - return distributionCount != 1 -} - -// initializeReplicaStatuses initializes the ReplicaStatuses for replica. -// originally from pkg/controller.v1/tensorflow/status.go (deleted) -func initializeReplicaStatuses(jobStatus *kubeflowv1.JobStatus, rtype kubeflowv1.ReplicaType) { - if jobStatus.ReplicaStatuses == nil { - jobStatus.ReplicaStatuses = make(map[kubeflowv1.ReplicaType]*kubeflowv1.ReplicaStatus) - } - - jobStatus.ReplicaStatuses[rtype] = &kubeflowv1.ReplicaStatus{} -} - -// updateJobReplicaStatuses updates the JobReplicaStatuses according to the pod. -// originally from pkg/controller.v1/tensorflow/status.go (deleted) -func updateJobReplicaStatuses(jobStatus *kubeflowv1.JobStatus, rtype kubeflowv1.ReplicaType, pod *corev1.Pod) { - switch pod.Status.Phase { - case corev1.PodRunning: - jobStatus.ReplicaStatuses[rtype].Active++ - case corev1.PodSucceeded: - jobStatus.ReplicaStatuses[rtype].Succeeded++ - case corev1.PodFailed: - jobStatus.ReplicaStatuses[rtype].Failed++ - } -} diff --git a/pkg/controller.v1/tensorflow/util_test.go b/pkg/controller.v1/tensorflow/util_test.go deleted file mode 100644 index 32f18f5b12..0000000000 --- a/pkg/controller.v1/tensorflow/util_test.go +++ /dev/null @@ -1,63 +0,0 @@ -// Copyright 2021 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package tensorflow - -import ( - "testing" - - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/util/uuid" - - kubeflowv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - tftestutil "github.com/kubeflow/training-operator/pkg/controller.v1/tensorflow/testutil" -) - -func TestGenOwnerReference(t *testing.T) { - testUID := uuid.NewUUID() - tfJob := &kubeflowv1.TFJob{ - ObjectMeta: metav1.ObjectMeta{ - Name: tftestutil.TestTFJobName, - UID: testUID, - }, - } - - ref := reconciler.GenOwnerReference(tfJob) - if ref.UID != testUID { - t.Errorf("Expected UID %s, got %s", testUID, ref.UID) - } - if ref.Name != tftestutil.TestTFJobName { - t.Errorf("Expected Name %s, got %s", tftestutil.TestTFJobName, ref.Name) - } - if ref.APIVersion != kubeflowv1.SchemeGroupVersion.String() { - t.Errorf("Expected APIVersion %s, got %s", kubeflowv1.SchemeGroupVersion.String(), ref.APIVersion) - } -} - -func TestGenLabels(t *testing.T) { - testJobName := "test/key" - expctedVal := "test-key" - - labels := reconciler.GenLabels(testJobName) - jobNameLabel := kubeflowv1.JobNameLabel - - if labels[jobNameLabel] != expctedVal { - t.Errorf("Expected %s %s, got %s", jobNameLabel, expctedVal, jobNameLabel) - } - - if labels[kubeflowv1.OperatorNameLabel] != controllerName { - t.Errorf("Expected %s %s, got %s", kubeflowv1.OperatorNameLabel, controllerName, - labels[kubeflowv1.OperatorNameLabel]) - } -} diff --git a/pkg/controller.v1/xgboost/suite_test.go b/pkg/controller.v1/xgboost/suite_test.go deleted file mode 100644 index 6a61611c5b..0000000000 --- a/pkg/controller.v1/xgboost/suite_test.go +++ /dev/null @@ -1,128 +0,0 @@ -// Copyright 2021 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package xgboost - -import ( - "context" - "crypto/tls" - "fmt" - "net" - "testing" - "time" - - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" - "k8s.io/client-go/kubernetes/scheme" - "path/filepath" - ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/envtest" - logf "sigs.k8s.io/controller-runtime/pkg/log" - "sigs.k8s.io/controller-runtime/pkg/log/zap" - metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" - "sigs.k8s.io/controller-runtime/pkg/webhook" - "volcano.sh/apis/pkg/apis/scheduling/v1beta1" - - kubeflowv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - "github.com/kubeflow/training-operator/pkg/controller.v1/common" - xgboostwebhook "github.com/kubeflow/training-operator/pkg/webhooks/xgboost" - //+kubebuilder:scaffold:imports -) - -// These tests use Ginkgo (BDD-style Go testing framework). Refer to -// http://onsi.github.io/ginkgo/ to learn more about Ginkgo. - -var ( - testK8sClient client.Client - testEnv *envtest.Environment - testCtx context.Context - testCancel context.CancelFunc -) - -func TestAPIs(t *testing.T) { - RegisterFailHandler(Fail) - - RunSpecs(t, "Controller Suite") -} - -var _ = BeforeSuite(func() { - logf.SetLogger(zap.New(zap.WriteTo(GinkgoWriter), zap.UseDevMode(true))) - - testCtx, testCancel = context.WithCancel(context.TODO()) - - By("bootstrapping test environment") - testEnv = &envtest.Environment{ - CRDDirectoryPaths: []string{filepath.Join("..", "..", "..", "manifests", "base", "crds")}, - ErrorIfCRDPathMissing: true, - WebhookInstallOptions: envtest.WebhookInstallOptions{ - Paths: []string{filepath.Join("..", "..", "..", "manifests", "base", "webhook", "manifests.yaml")}, - }, - } - - cfg, err := testEnv.Start() - Expect(err).NotTo(HaveOccurred()) - Expect(cfg).NotTo(BeNil()) - - err = v1beta1.AddToScheme(scheme.Scheme) - Expect(err).NotTo(HaveOccurred()) - err = kubeflowv1.AddToScheme(scheme.Scheme) - Expect(err).NotTo(HaveOccurred()) - - //+kubebuilder:scaffold:scheme - - testK8sClient, err = client.New(cfg, client.Options{Scheme: scheme.Scheme}) - Expect(err).NotTo(HaveOccurred()) - Expect(testK8sClient).NotTo(BeNil()) - - mgr, err := ctrl.NewManager(cfg, ctrl.Options{ - Metrics: metricsserver.Options{ - BindAddress: "0", - }, - WebhookServer: webhook.NewServer( - webhook.Options{ - Host: testEnv.WebhookInstallOptions.LocalServingHost, - Port: testEnv.WebhookInstallOptions.LocalServingPort, - CertDir: testEnv.WebhookInstallOptions.LocalServingCertDir, - }), - }) - Expect(err).NotTo(HaveOccurred()) - - gangSchedulingSetupFunc := common.GenNonGangSchedulerSetupFunc() - r := NewReconciler(mgr, gangSchedulingSetupFunc) - - Expect(r.SetupWithManager(mgr, 1)).NotTo(HaveOccurred()) - Expect(xgboostwebhook.SetupWebhook(mgr)).NotTo(HaveOccurred()) - - go func() { - defer GinkgoRecover() - err = mgr.Start(testCtx) - Expect(err).ToNot(HaveOccurred(), "failed to run manager") - }() - - dialer := &net.Dialer{Timeout: time.Second} - addrPort := fmt.Sprintf("%s:%d", testEnv.WebhookInstallOptions.LocalServingHost, testEnv.WebhookInstallOptions.LocalServingPort) - Eventually(func(g Gomega) { - conn, err := tls.DialWithDialer(dialer, "tcp", addrPort, &tls.Config{InsecureSkipVerify: true}) - g.Expect(err).NotTo(HaveOccurred()) - g.Expect(conn.Close()).NotTo(HaveOccurred()) - }).Should(Succeed()) -}) - -var _ = AfterSuite(func() { - By("tearing down the test environment") - testCancel() - err := testEnv.Stop() - Expect(err).NotTo(HaveOccurred()) -}) diff --git a/pkg/controller.v1/xgboost/xgboost.go b/pkg/controller.v1/xgboost/xgboost.go deleted file mode 100644 index 37d5e7d4d8..0000000000 --- a/pkg/controller.v1/xgboost/xgboost.go +++ /dev/null @@ -1,149 +0,0 @@ -// Copyright 2021 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License - -package xgboost - -import ( - "fmt" - "strconv" - "strings" - - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - - kubeflowv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" -) - -// SetPodEnv sets the pod env set for: -// - XGBoost Rabit Tracker and worker -// - LightGBM master and workers -func SetPodEnv(job interface{}, podTemplate *corev1.PodTemplateSpec, rtype, index string) error { - xgboostjob, ok := job.(*kubeflowv1.XGBoostJob) - if !ok { - return fmt.Errorf("%+v is not a type of XGBoostJob", xgboostjob) - } - - rank, err := strconv.Atoi(index) - if err != nil { - return err - } - - // Add master offset for worker pods - if strings.EqualFold(strings.ToLower(rtype), strings.ToLower(string(kubeflowv1.XGBoostJobReplicaTypeWorker))) { - masterSpec := xgboostjob.Spec.XGBReplicaSpecs[kubeflowv1.XGBoostJobReplicaTypeMaster] - masterReplicas := int(*masterSpec.Replicas) - rank += masterReplicas - } - - masterAddr := replicaName(xgboostjob.Name, kubeflowv1.XGBoostJobReplicaTypeMaster, 0) - - masterPort, err := getPortFromXGBoostJob(xgboostjob, kubeflowv1.XGBoostJobReplicaTypeMaster) - if err != nil { - return err - } - - totalReplicas := computeTotalReplicas(xgboostjob) - - var workerPort int32 - var workerAddrs []string - - if totalReplicas > 1 { - workerPortTemp, err := getPortFromXGBoostJob(xgboostjob, kubeflowv1.XGBoostJobReplicaTypeWorker) - if err != nil { - return err - } - workerPort = workerPortTemp - workerAddrs = make([]string, totalReplicas-1) - for i := range workerAddrs { - workerAddrs[i] = replicaName(xgboostjob.Name, kubeflowv1.XGBoostJobReplicaTypeWorker, i) - } - } - - for i := range podTemplate.Spec.Containers { - if len(podTemplate.Spec.Containers[i].Env) == 0 { - podTemplate.Spec.Containers[i].Env = make([]corev1.EnvVar, 0) - } - podTemplate.Spec.Containers[i].Env = append(podTemplate.Spec.Containers[i].Env, corev1.EnvVar{ - Name: "MASTER_PORT", - Value: strconv.Itoa(int(masterPort)), - }) - podTemplate.Spec.Containers[i].Env = append(podTemplate.Spec.Containers[i].Env, corev1.EnvVar{ - Name: "MASTER_ADDR", - Value: masterAddr, - }) - podTemplate.Spec.Containers[i].Env = append(podTemplate.Spec.Containers[i].Env, corev1.EnvVar{ - Name: "WORLD_SIZE", - Value: strconv.Itoa(int(totalReplicas)), - }) - podTemplate.Spec.Containers[i].Env = append(podTemplate.Spec.Containers[i].Env, corev1.EnvVar{ - Name: "RANK", - Value: strconv.Itoa(rank), - }) - podTemplate.Spec.Containers[i].Env = append(podTemplate.Spec.Containers[i].Env, corev1.EnvVar{ - Name: "PYTHONUNBUFFERED", - Value: "1", - }) - // This variables are used if it is a LightGBM job - if totalReplicas > 1 { - podTemplate.Spec.Containers[i].Env = append(podTemplate.Spec.Containers[i].Env, corev1.EnvVar{ - Name: "WORKER_PORT", - Value: strconv.Itoa(int(workerPort)), - }) - podTemplate.Spec.Containers[i].Env = append(podTemplate.Spec.Containers[i].Env, corev1.EnvVar{ - Name: "WORKER_ADDRS", - Value: strings.Join(workerAddrs, ","), - }) - } - } - - return nil -} - -func replicaName(jobName string, rtype kubeflowv1.ReplicaType, index int) string { - n := jobName + "-" + strings.ToLower(string(rtype)) + "-" + strconv.Itoa(index) - return strings.Replace(n, "/", "-", -1) -} - -// getPortFromXGBoostJob gets the port of xgboost container. -func getPortFromXGBoostJob(job *kubeflowv1.XGBoostJob, rtype kubeflowv1.ReplicaType) (int32, error) { - containers := job.Spec.XGBReplicaSpecs[rtype].Template.Spec.Containers - for _, container := range containers { - if container.Name == kubeflowv1.XGBoostJobDefaultContainerName { - ports := container.Ports - for _, port := range ports { - if port.Name == kubeflowv1.XGBoostJobDefaultPortName { - return port.ContainerPort, nil - } - } - } - } - return -1, fmt.Errorf("failed to found the port") -} - -func computeTotalReplicas(obj metav1.Object) int32 { - job := obj.(*kubeflowv1.XGBoostJob) - jobReplicas := int32(0) - - if len(job.Spec.XGBReplicaSpecs) == 0 { - return jobReplicas - } - for _, r := range job.Spec.XGBReplicaSpecs { - if r.Replicas == nil { - continue - } else { - jobReplicas += *r.Replicas - } - } - return jobReplicas -} diff --git a/pkg/controller.v1/xgboost/xgboostjob_controller.go b/pkg/controller.v1/xgboost/xgboostjob_controller.go deleted file mode 100644 index 5756b787b8..0000000000 --- a/pkg/controller.v1/xgboost/xgboostjob_controller.go +++ /dev/null @@ -1,467 +0,0 @@ -// Copyright 2021 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package xgboost - -import ( - "context" - "fmt" - "reflect" - "time" - - kubeflowv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - trainingoperatorcommon "github.com/kubeflow/training-operator/pkg/common" - "github.com/kubeflow/training-operator/pkg/common/util" - "github.com/kubeflow/training-operator/pkg/controller.v1/common" - "github.com/kubeflow/training-operator/pkg/controller.v1/control" - "github.com/kubeflow/training-operator/pkg/controller.v1/expectation" - commonutil "github.com/kubeflow/training-operator/pkg/util" - - "github.com/go-logr/logr" - "github.com/sirupsen/logrus" - corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/errors" - "k8s.io/apimachinery/pkg/api/meta" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/runtime/schema" - "k8s.io/apimachinery/pkg/types" - utilruntime "k8s.io/apimachinery/pkg/util/runtime" - "k8s.io/client-go/informers" - kubeclientset "k8s.io/client-go/kubernetes" - "k8s.io/client-go/tools/record" - ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/controller" - "sigs.k8s.io/controller-runtime/pkg/event" - "sigs.k8s.io/controller-runtime/pkg/handler" - "sigs.k8s.io/controller-runtime/pkg/manager" - "sigs.k8s.io/controller-runtime/pkg/predicate" - "sigs.k8s.io/controller-runtime/pkg/reconcile" - "sigs.k8s.io/controller-runtime/pkg/source" - schedulerpluginsv1alpha1 "sigs.k8s.io/scheduler-plugins/apis/scheduling/v1alpha1" - "volcano.sh/apis/pkg/apis/scheduling/v1beta1" -) - -const ( - controllerName = "xgboostjob-controller" - - // Reasons for job events. - FailedDeleteJobReason = "FailedDeleteJob" - SuccessfulDeleteJobReason = "SuccessfulDeleteJob" -) - -// NewReconciler creates a XGBoostJob Reconciler -func NewReconciler(mgr manager.Manager, gangSchedulingSetupFunc common.GangSchedulingSetupFunc) *XGBoostJobReconciler { - r := &XGBoostJobReconciler{ - Client: mgr.GetClient(), - Scheme: mgr.GetScheme(), - recorder: mgr.GetEventRecorderFor(controllerName), - apiReader: mgr.GetAPIReader(), - Log: ctrl.Log.WithName("controllers").WithName(kubeflowv1.XGBoostJobKind), - } - - // Create clients - cfg := mgr.GetConfig() - kubeClientSet := kubeclientset.NewForConfigOrDie(cfg) - sharedInformers := informers.NewSharedInformerFactory(kubeClientSet, 0) - priorityClassInformer := sharedInformers.Scheduling().V1().PriorityClasses() - - // Initialize common job controller - r.JobController = common.JobController{ - Controller: r, - Expectations: expectation.NewControllerExpectations(), - WorkQueue: &util.FakeWorkQueue[string]{}, - Recorder: r.recorder, - KubeClientSet: kubeClientSet, - PriorityClassLister: priorityClassInformer.Lister(), - PriorityClassInformerSynced: priorityClassInformer.Informer().HasSynced, - PodControl: control.RealPodControl{KubeClient: kubeClientSet, Recorder: r.recorder}, - ServiceControl: control.RealServiceControl{KubeClient: kubeClientSet, Recorder: r.recorder}, - } - - gangSchedulingSetupFunc(&r.JobController) - - return r -} - -// XGBoostJobReconciler reconciles a XGBoostJob object -type XGBoostJobReconciler struct { - common.JobController - client.Client - Log logr.Logger - Scheme *runtime.Scheme - recorder record.EventRecorder - apiReader client.Reader -} - -// +kubebuilder:rbac:groups=kubeflow.org,resources=xgboostjobs,verbs=get;list;watch;create;update;patch;delete -// +kubebuilder:rbac:groups=kubeflow.org,resources=xgboostjobs/status,verbs=get;update;patch -// +kubebuilder:rbac:groups=kubeflow.org,resources=xgboostjobs/finalizers,verbs=update -// +kubebuilder:rbac:groups="",resources=pods,verbs=get;list;watch;create;update;patch;delete -// +kubebuilder:rbac:groups="",resources=services,verbs=get;list;watch;create;delete -// +kubebuilder:rbac:groups=scheduling.volcano.sh,resources=podgroups,verbs=get;list;watch;create;update;patch;delete -// +kubebuilder:rbac:groups=scheduling.x-k8s.io,resources=podgroups,verbs=get;list;watch;create;update;patch;delete -// +kubebuilder:rbac:groups="",resources=events,verbs=get;list;watch;create;update;patch;delete - -// Reconcile reads that state of the cluster for a XGBoostJob object and makes changes based on the state read -// and what is in the XGBoostJob.Spec -// Automatically generate RBAC rules to allow the Controller to read and write Deployments -func (r *XGBoostJobReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { - logger := r.Log.WithValues(kubeflowv1.XGBoostJobSingular, req.NamespacedName) - - xgboostjob := &kubeflowv1.XGBoostJob{} - err := r.Get(ctx, req.NamespacedName, xgboostjob) - if err != nil { - logger.Info(err.Error(), "unable to fetch XGBoostJob", req.NamespacedName.String()) - // Object not found, return. Created objects are automatically garbage collected. - // For additional cleanup logic use finalizers. - return ctrl.Result{}, client.IgnoreNotFound(err) - } - - if manager := r.ManagedByExternalController(xgboostjob.Spec.RunPolicy.ManagedBy); manager != nil { - logger.Info("Skipping XGBoostJob managed by a custom controller", "managed-by", manager) - return ctrl.Result{}, nil - } - - // Check reconcile is required. - jobKey, err := common.KeyFunc(xgboostjob) - if err != nil { - utilruntime.HandleError(fmt.Errorf("couldn't get jobKey for job object %#v: %v", xgboostjob, err)) - } - - replicaTypes := util.GetReplicaTypes(xgboostjob.Spec.XGBReplicaSpecs) - needSync := util.SatisfiedExpectations(r.Expectations, jobKey, replicaTypes) - - if !needSync || xgboostjob.GetDeletionTimestamp() != nil { - logger.Info("reconcile cancelled, job does not need to do reconcile or has been deleted", - "sync", needSync, "deleted", xgboostjob.GetDeletionTimestamp() != nil) - return reconcile.Result{}, nil - } - - // Set default priorities for xgboost job - r.Scheme.Default(xgboostjob) - - // Use common to reconcile the job related pod and service - err = r.ReconcileJobs(xgboostjob, xgboostjob.Spec.XGBReplicaSpecs, xgboostjob.Status, &xgboostjob.Spec.RunPolicy) - if err != nil { - logger.V(1).Error(err, "Reconcile XGBoost Job error") - return ctrl.Result{}, err - } - - t, err := util.DurationUntilExpireTime(&xgboostjob.Spec.RunPolicy, xgboostjob.Status) - if err != nil { - logrus.Warnf("Reconcile XGBoost Job error %v", err) - return ctrl.Result{}, err - } - if t >= 0 { - return ctrl.Result{Requeue: true, RequeueAfter: t}, nil - } - - return reconcile.Result{}, nil -} - -// SetupWithManager sets up the controller with the Manager. -func (r *XGBoostJobReconciler) SetupWithManager(mgr ctrl.Manager, controllerThreads int) error { - c, err := controller.New(r.ControllerName(), mgr, controller.Options{ - Reconciler: r, - MaxConcurrentReconciles: controllerThreads, - }) - if err != nil { - return err - } - // using onOwnerCreateFunc is easier to set defaults - if err = c.Watch(source.Kind[*kubeflowv1.XGBoostJob](mgr.GetCache(), &kubeflowv1.XGBoostJob{}, - &handler.TypedEnqueueRequestForObject[*kubeflowv1.XGBoostJob]{}, - predicate.TypedFuncs[*kubeflowv1.XGBoostJob]{CreateFunc: r.onOwnerCreateFunc()}), - ); err != nil { - return err - } - // inject watching for job related pod - if err = c.Watch(source.Kind[*corev1.Pod](mgr.GetCache(), &corev1.Pod{}, - handler.TypedEnqueueRequestForOwner[*corev1.Pod](mgr.GetScheme(), mgr.GetRESTMapper(), &kubeflowv1.XGBoostJob{}, handler.OnlyControllerOwner()), - util.OnDependentFuncs[*corev1.Pod](r.Scheme, r.Expectations, &r.JobController))); err != nil { - return err - } - // inject watching for job related service - if err = c.Watch(source.Kind[*corev1.Service](mgr.GetCache(), &corev1.Service{}, - handler.TypedEnqueueRequestForOwner[*corev1.Service](mgr.GetScheme(), mgr.GetRESTMapper(), &kubeflowv1.XGBoostJob{}, handler.OnlyControllerOwner()), - util.OnDependentFuncs[*corev1.Service](r.Scheme, r.Expectations, &r.JobController))); err != nil { - return err - } - // skip watching volcano PodGroup if volcano PodGroup is not installed - if _, err = mgr.GetRESTMapper().RESTMapping(schema.GroupKind{Group: v1beta1.GroupName, Kind: "PodGroup"}, - v1beta1.SchemeGroupVersion.Version); err == nil { - // inject watching for job related volcano PodGroup - if err = c.Watch(source.Kind[*v1beta1.PodGroup](mgr.GetCache(), &v1beta1.PodGroup{}, - handler.TypedEnqueueRequestForOwner[*v1beta1.PodGroup](mgr.GetScheme(), mgr.GetRESTMapper(), &kubeflowv1.XGBoostJob{}, handler.OnlyControllerOwner()), - util.OnDependentFuncs[*v1beta1.PodGroup](r.Scheme, r.Expectations, &r.JobController))); err != nil { - return err - } - } - // skip watching scheduler-plugins PodGroup if scheduler-plugins PodGroup is not installed - if _, err = mgr.GetRESTMapper().RESTMapping(schema.GroupKind{Group: schedulerpluginsv1alpha1.SchemeGroupVersion.Group, Kind: "PodGroup"}, - schedulerpluginsv1alpha1.SchemeGroupVersion.Version); err == nil { - // inject watching for job related scheduler-plugins PodGroup - if err = c.Watch(source.Kind[*schedulerpluginsv1alpha1.PodGroup](mgr.GetCache(), &schedulerpluginsv1alpha1.PodGroup{}, - handler.TypedEnqueueRequestForOwner[*schedulerpluginsv1alpha1.PodGroup](mgr.GetScheme(), mgr.GetRESTMapper(), &kubeflowv1.XGBoostJob{}, handler.OnlyControllerOwner()), - util.OnDependentFuncs[*schedulerpluginsv1alpha1.PodGroup](r.Scheme, r.Expectations, &r.JobController))); err != nil { - return err - } - } - return nil -} - -func (r *XGBoostJobReconciler) ControllerName() string { - return controllerName -} - -func (r *XGBoostJobReconciler) GetAPIGroupVersionKind() schema.GroupVersionKind { - return kubeflowv1.GroupVersion.WithKind(kubeflowv1.XGBoostJobKind) -} - -func (r *XGBoostJobReconciler) GetAPIGroupVersion() schema.GroupVersion { - return kubeflowv1.GroupVersion -} - -func (r *XGBoostJobReconciler) GetGroupNameLabelValue() string { - return kubeflowv1.GroupVersion.Group -} - -func (r *XGBoostJobReconciler) GetFrameworkName() string { - return kubeflowv1.XGBoostJobFrameworkName -} - -// GetJobFromInformerCache returns the Job from Informer Cache -func (r *XGBoostJobReconciler) GetJobFromInformerCache(namespace, name string) (metav1.Object, error) { - job := &kubeflowv1.XGBoostJob{} - // Default reader for XGBoostJob is cache reader. - err := r.Get(context.Background(), types.NamespacedName{Namespace: namespace, Name: name}, job) - if err != nil { - if errors.IsNotFound(err) { - r.Log.Error(err, "xgboost job not found", "namespace", namespace, "name", name) - } else { - r.Log.Error(err, "failed to get job from api-server", "namespace", namespace, "name", name) - } - return nil, err - } - return job, nil -} - -// GetJobFromAPIClient returns the Job from API server -func (r *XGBoostJobReconciler) GetJobFromAPIClient(namespace, name string) (metav1.Object, error) { - job := &kubeflowv1.XGBoostJob{} - - err := r.apiReader.Get(context.Background(), types.NamespacedName{Namespace: namespace, Name: name}, job) - if err != nil { - if errors.IsNotFound(err) { - r.Log.Error(err, "xgboost job not found", "namespace", namespace, "name", name) - } else { - r.Log.Error(err, "failed to get job from api-server", "namespace", namespace, "name", name) - } - return nil, err - } - return job, nil -} - -// GetPodsForJob returns the pods managed by the job. This can be achieved by selecting pods using label key "job-name" -// i.e. all pods created by the job will come with label "job-name" = -func (r *XGBoostJobReconciler) GetPodsForJob(obj interface{}) ([]*corev1.Pod, error) { - job, err := meta.Accessor(obj) - if err != nil { - return nil, err - } - // List all pods to include those that don't match the selector anymore - // but have a ControllerRef pointing to this controller. - podlist := &corev1.PodList{} - err = r.List(context.Background(), podlist, client.MatchingLabels(r.GenLabels(job.GetName())), client.InNamespace(job.GetNamespace())) - if err != nil { - return nil, err - } - - return util.JobControlledPodList(podlist.Items, job), nil -} - -// GetServicesForJob returns the services managed by the job. This can be achieved by selecting services using label key "job-name" -// i.e. all services created by the job will come with label "job-name" = -func (r *XGBoostJobReconciler) GetServicesForJob(obj interface{}) ([]*corev1.Service, error) { - job, err := meta.Accessor(obj) - if err != nil { - return nil, fmt.Errorf("%+v is not a type of XGBoostJob", job) - } - // List all pods to include those that don't match the selector anymore - // but have a ControllerRef pointing to this controller. - serviceList := &corev1.ServiceList{} - err = r.List(context.Background(), serviceList, client.MatchingLabels(r.GenLabels(job.GetName())), client.InNamespace(job.GetNamespace())) - if err != nil { - return nil, err - } - - ret := util.ConvertServiceList(serviceList.Items) - return ret, nil -} - -// DeleteJob deletes the job -func (r *XGBoostJobReconciler) DeleteJob(job interface{}) error { - xgboostjob, ok := job.(*kubeflowv1.XGBoostJob) - if !ok { - return fmt.Errorf("%+v is not a type of XGBoostJob", xgboostjob) - } - if err := r.Delete(context.Background(), xgboostjob); err != nil { - r.recorder.Eventf(xgboostjob, corev1.EventTypeWarning, FailedDeleteJobReason, "Error deleting: %v", err) - r.Log.Error(err, "failed to delete job", "namespace", xgboostjob.Namespace, "name", xgboostjob.Name) - return err - } - r.recorder.Eventf(xgboostjob, corev1.EventTypeNormal, SuccessfulDeleteJobReason, "Deleted job: %v", xgboostjob.Name) - r.Log.Info("job deleted", "namespace", xgboostjob.Namespace, "name", xgboostjob.Name) - trainingoperatorcommon.DeletedJobsCounterInc(xgboostjob.Namespace, r.GetFrameworkName()) - return nil -} - -// UpdateJobStatus updates the job status and job conditions -func (r *XGBoostJobReconciler) UpdateJobStatus(job interface{}, replicas map[kubeflowv1.ReplicaType]*kubeflowv1.ReplicaSpec, jobStatus *kubeflowv1.JobStatus) error { - xgboostJob, ok := job.(*kubeflowv1.XGBoostJob) - if !ok { - return fmt.Errorf("%+v is not a type of xgboostJob", xgboostJob) - } - - xgboostJobKey, err := common.KeyFunc(xgboostJob) - if err != nil { - utilruntime.HandleError(fmt.Errorf("couldn't get key for xgboostjob object %#v: %v", xgboostJob, err)) - return err - } - - logger := commonutil.LoggerForJob(xgboostJob) - - // Set StartTime. - if jobStatus.StartTime == nil { - now := metav1.Now() - jobStatus.StartTime = &now - // enqueue a sync to check if job past ActiveDeadlineSeconds - if xgboostJob.Spec.RunPolicy.ActiveDeadlineSeconds != nil { - logger.Infof("Job with ActiveDeadlineSeconds will sync after %d seconds", *xgboostJob.Spec.RunPolicy.ActiveDeadlineSeconds) - r.WorkQueue.AddAfter(xgboostJobKey, time.Duration(*xgboostJob.Spec.RunPolicy.ActiveDeadlineSeconds)*time.Second) - } - } - - for rtype, spec := range replicas { - status := jobStatus.ReplicaStatuses[rtype] - - succeeded := status.Succeeded - expected := *(spec.Replicas) - succeeded - running := status.Active - failed := status.Failed - runningMsg := fmt.Sprintf("XGBoostJob %s is running.", xgboostJob.Name) - - logrus.Infof("XGBoostJob=%s, ReplicaType=%s expected=%d, running=%d, succeeded=%d , failed=%d", - xgboostJob.Name, rtype, expected, running, succeeded, failed) - - if rtype == kubeflowv1.XGBoostJobReplicaTypeMaster { - if running > 0 { - commonutil.UpdateJobConditions(jobStatus, kubeflowv1.JobRunning, corev1.ConditionTrue, commonutil.NewReason(kubeflowv1.XGBoostJobKind, commonutil.JobRunningReason), runningMsg) - } - // when master is succeed, the job is finished. - if expected == 0 { - commonutil.UpdateJobConditions(jobStatus, kubeflowv1.JobRunning, corev1.ConditionTrue, commonutil.NewReason(kubeflowv1.XGBoostJobKind, commonutil.JobRunningReason), runningMsg) - msg := fmt.Sprintf("XGBoostJob %s is successfully completed.", xgboostJob.Name) - logrus.Info(msg) - r.Recorder.Event(xgboostJob, corev1.EventTypeNormal, commonutil.NewReason(kubeflowv1.XGBoostJobKind, commonutil.JobSucceededReason), msg) - if jobStatus.CompletionTime == nil { - now := metav1.Now() - jobStatus.CompletionTime = &now - } - commonutil.UpdateJobConditions(jobStatus, kubeflowv1.JobSucceeded, corev1.ConditionTrue, commonutil.NewReason(kubeflowv1.XGBoostJobKind, commonutil.JobSucceededReason), msg) - trainingoperatorcommon.SuccessfulJobsCounterInc(xgboostJob.Namespace, r.GetFrameworkName()) - return nil - } - } - if failed > 0 { - commonutil.UpdateJobConditions(jobStatus, kubeflowv1.JobRunning, corev1.ConditionTrue, commonutil.NewReason(kubeflowv1.XGBoostJobKind, commonutil.JobRunningReason), runningMsg) - if spec.RestartPolicy == kubeflowv1.RestartPolicyExitCode { - msg := fmt.Sprintf("XGBoostJob %s is restarting because %d %s replica(s) failed.", xgboostJob.Name, failed, rtype) - r.Recorder.Event(xgboostJob, corev1.EventTypeWarning, commonutil.NewReason(kubeflowv1.XGBoostJobKind, commonutil.JobRestartingReason), msg) - commonutil.UpdateJobConditions(jobStatus, kubeflowv1.JobRestarting, corev1.ConditionTrue, commonutil.NewReason(kubeflowv1.XGBoostJobKind, commonutil.JobRestartingReason), msg) - trainingoperatorcommon.RestartedJobsCounterInc(xgboostJob.Namespace, r.GetFrameworkName()) - } else { - msg := fmt.Sprintf("XGBoostJob %s is failed because %d %s replica(s) failed.", xgboostJob.Name, failed, rtype) - r.Recorder.Event(xgboostJob, corev1.EventTypeNormal, commonutil.NewReason(kubeflowv1.XGBoostJobKind, commonutil.JobFailedReason), msg) - if jobStatus.CompletionTime == nil { - now := metav1.Now() - jobStatus.CompletionTime = &now - } - commonutil.UpdateJobConditions(jobStatus, kubeflowv1.JobFailed, corev1.ConditionTrue, commonutil.NewReason(kubeflowv1.XGBoostJobKind, commonutil.JobFailedReason), msg) - trainingoperatorcommon.FailedJobsCounterInc(xgboostJob.Namespace, r.GetFrameworkName()) - } - } - } - return nil -} - -// UpdateJobStatusInApiServer updates the job status in to cluster. -func (r *XGBoostJobReconciler) UpdateJobStatusInApiServer(job interface{}, jobStatus *kubeflowv1.JobStatus) error { - if jobStatus.ReplicaStatuses == nil { - jobStatus.ReplicaStatuses = map[kubeflowv1.ReplicaType]*kubeflowv1.ReplicaStatus{} - } - - xgboostjob, ok := job.(*kubeflowv1.XGBoostJob) - if !ok { - return fmt.Errorf("%+v is not a type of XGBoostJob", xgboostjob) - } - - // Job status passed in differs with status in job, update in basis of the passed in one. - if !reflect.DeepEqual(&xgboostjob.Status, jobStatus) { - xgboostjob = xgboostjob.DeepCopy() - xgboostjob.Status = *jobStatus.DeepCopy() - } - - result := r.Status().Update(context.Background(), xgboostjob) - - if result != nil { - commonutil.LoggerForJob(xgboostjob).Error(result, "failed to update XGBoost Job conditions in the API server") - return result - } - - return nil -} - -// SetClusterSpec sets the cluster spec for the pod -func (r *XGBoostJobReconciler) SetClusterSpec(job interface{}, podTemplate *corev1.PodTemplateSpec, rtype, index string) error { - return SetPodEnv(job, podTemplate, rtype, index) -} - -func (r *XGBoostJobReconciler) GetDefaultContainerName() string { - return kubeflowv1.XGBoostJobDefaultContainerName -} - -func (r *XGBoostJobReconciler) GetDefaultContainerPortName() string { - return kubeflowv1.XGBoostJobDefaultPortName -} - -func (r *XGBoostJobReconciler) IsMasterRole(replicas map[kubeflowv1.ReplicaType]*kubeflowv1.ReplicaSpec, - rtype kubeflowv1.ReplicaType, index int) bool { - return string(rtype) == string(kubeflowv1.XGBoostJobReplicaTypeMaster) -} - -// onOwnerCreateFunc modify creation condition. -func (r *XGBoostJobReconciler) onOwnerCreateFunc() func(createEvent event.TypedCreateEvent[*kubeflowv1.XGBoostJob]) bool { - return func(e event.TypedCreateEvent[*kubeflowv1.XGBoostJob]) bool { - xgboostJob := e.Object - r.Scheme.Default(xgboostJob) - msg := fmt.Sprintf("XGBoostJob %s is created.", e.Object.GetName()) - logrus.Info() - trainingoperatorcommon.CreatedJobsCounterInc(xgboostJob.Namespace, r.GetFrameworkName()) - commonutil.UpdateJobConditions(&xgboostJob.Status, kubeflowv1.JobCreated, corev1.ConditionTrue, commonutil.NewReason(kubeflowv1.XGBoostJobKind, commonutil.JobCreatedReason), msg) - return true - } -} diff --git a/pkg/controller.v1/xgboost/xgboostjob_controller_test.go b/pkg/controller.v1/xgboost/xgboostjob_controller_test.go deleted file mode 100644 index b1a1fce9c6..0000000000 --- a/pkg/controller.v1/xgboost/xgboostjob_controller_test.go +++ /dev/null @@ -1,433 +0,0 @@ -// Copyright 2023 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package xgboost - -import ( - "context" - "fmt" - - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" - corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/errors" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/types" - "k8s.io/utils/ptr" - "sigs.k8s.io/controller-runtime/pkg/client" - - kubeflowv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - commonutil "github.com/kubeflow/training-operator/pkg/util" - "github.com/kubeflow/training-operator/pkg/util/testutil" -) - -var _ = Describe("XGBoost controller", func() { - // Define utility constants for object names. - const ( - expectedPort = int32(9999) - ) - Context("When creating the XGBoostJob", func() { - const name = "test-job" - var ( - ns *corev1.Namespace - job *kubeflowv1.XGBoostJob - jobKey types.NamespacedName - masterKey types.NamespacedName - worker0Key types.NamespacedName - ctx = context.Background() - ) - BeforeEach(func() { - ns = &corev1.Namespace{ - ObjectMeta: metav1.ObjectMeta{ - GenerateName: "xgboost-test-", - }, - } - Expect(testK8sClient.Create(ctx, ns)).Should(Succeed()) - - job = newXGBoostForTest(name, ns.Name) - jobKey = client.ObjectKeyFromObject(job) - masterKey = types.NamespacedName{ - Name: fmt.Sprintf("%s-master-0", name), - Namespace: ns.Name, - } - worker0Key = types.NamespacedName{ - Name: fmt.Sprintf("%s-worker-0", name), - Namespace: ns.Name, - } - job.Spec.XGBReplicaSpecs = map[kubeflowv1.ReplicaType]*kubeflowv1.ReplicaSpec{ - kubeflowv1.XGBoostJobReplicaTypeMaster: { - Replicas: ptr.To[int32](1), - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Image: "test-image", - Name: kubeflowv1.XGBoostJobDefaultContainerName, - Ports: []corev1.ContainerPort{ - { - Name: kubeflowv1.XGBoostJobDefaultPortName, - ContainerPort: expectedPort, - Protocol: corev1.ProtocolTCP, - }, - }, - }, - }, - }, - }, - }, - kubeflowv1.XGBoostJobReplicaTypeWorker: { - Replicas: ptr.To[int32](2), - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Image: "test-image", - Name: kubeflowv1.XGBoostJobDefaultContainerName, - Ports: []corev1.ContainerPort{ - { - Name: kubeflowv1.XGBoostJobDefaultPortName, - ContainerPort: expectedPort, - Protocol: corev1.ProtocolTCP, - }, - }, - }, - }, - }, - }, - }, - } - }) - AfterEach(func() { - Expect(testK8sClient.Delete(ctx, job)).Should(Succeed()) - Expect(testK8sClient.Delete(ctx, ns)).Should(Succeed()) - }) - It("Shouldn't create resources if XGBoostJob is suspended", func() { - By("By creating a new XGBoostJob with suspend=true") - job.Spec.RunPolicy.Suspend = ptr.To(true) - job.Spec.XGBReplicaSpecs[kubeflowv1.XGBoostJobReplicaTypeWorker].Replicas = ptr.To[int32](1) - Expect(testK8sClient.Create(ctx, job)).Should(Succeed()) - - created := &kubeflowv1.XGBoostJob{} - masterPod := &corev1.Pod{} - workerPod := &corev1.Pod{} - masterSvc := &corev1.Service{} - workerSvc := &corev1.Service{} - - By("Checking created XGBoostJob") - Eventually(func() bool { - err := testK8sClient.Get(ctx, jobKey, created) - return err == nil - }, testutil.Timeout, testutil.Interval).Should(BeTrue()) - By("Checking created XGBoostJob has a nil startTime") - Consistently(func() *metav1.Time { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - return created.Status.StartTime - }, testutil.ConsistentDuration, testutil.Interval).Should(BeNil()) - - By("Checking if the pods and services aren't created") - Consistently(func() bool { - errMasterPod := testK8sClient.Get(ctx, masterKey, masterPod) - errWorkerPod := testK8sClient.Get(ctx, worker0Key, workerPod) - errMasterSvc := testK8sClient.Get(ctx, masterKey, masterSvc) - errWorkerSvc := testK8sClient.Get(ctx, worker0Key, workerSvc) - return errors.IsNotFound(errMasterPod) && errors.IsNotFound(errWorkerPod) && - errors.IsNotFound(errMasterSvc) && errors.IsNotFound(errWorkerSvc) - }, testutil.ConsistentDuration, testutil.Interval).Should(BeTrue()) - - By("Checking if the XGBoostJob has suspended condition") - Eventually(func() []kubeflowv1.JobCondition { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - return created.Status.Conditions - }, testutil.ConsistentDuration, testutil.Interval).Should(BeComparableTo([]kubeflowv1.JobCondition{ - { - Type: kubeflowv1.JobCreated, - Status: corev1.ConditionTrue, - Reason: commonutil.NewReason(kubeflowv1.XGBoostJobKind, commonutil.JobCreatedReason), - Message: fmt.Sprintf("XGBoostJob %s is created.", name), - }, - { - Type: kubeflowv1.JobSuspended, - Status: corev1.ConditionTrue, - Reason: commonutil.NewReason(kubeflowv1.XGBoostJobKind, commonutil.JobSuspendedReason), - Message: fmt.Sprintf("XGBoostJob %s is suspended.", name), - }, - }, testutil.IgnoreJobConditionsTimes)) - }) - - It("Should delete resources after XGBoostJob is suspended; Should resume XGBoostJob after XGBoostJob is unsuspended", func() { - By("By creating a new XGBoostJob") - job.Spec.XGBReplicaSpecs[kubeflowv1.XGBoostJobReplicaTypeWorker].Replicas = ptr.To[int32](1) - Expect(testK8sClient.Create(ctx, job)).Should(Succeed()) - - created := &kubeflowv1.XGBoostJob{} - masterPod := &corev1.Pod{} - workerPod := &corev1.Pod{} - masterSvc := &corev1.Service{} - workerSvc := &corev1.Service{} - - // We'll need to retry getting this newly created XGBoostJob, given that creation may not immediately happen. - By("Checking created XGBoostJob") - Eventually(func() bool { - err := testK8sClient.Get(ctx, jobKey, created) - return err == nil - }, testutil.Timeout, testutil.Interval).Should(BeTrue()) - - var startTimeBeforeSuspended *metav1.Time - Eventually(func() *metav1.Time { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - startTimeBeforeSuspended = created.Status.StartTime - return startTimeBeforeSuspended - }, testutil.Timeout, testutil.Interval).ShouldNot(BeNil()) - - By("Checking the created pods and services") - Eventually(func() bool { - errMaster := testK8sClient.Get(ctx, masterKey, masterPod) - errWorker := testK8sClient.Get(ctx, worker0Key, workerPod) - return errMaster == nil && errWorker == nil - }, testutil.Timeout, testutil.Interval).Should(BeTrue()) - Eventually(func() bool { - errMaster := testK8sClient.Get(ctx, masterKey, masterSvc) - errWorker := testK8sClient.Get(ctx, worker0Key, workerSvc) - return errMaster == nil && errWorker == nil - }, testutil.Timeout, testutil.Interval).Should(BeTrue()) - - By("Updating the Pod's phase with Running") - Eventually(func() error { - Expect(testK8sClient.Get(ctx, masterKey, masterPod)).Should(Succeed()) - masterPod.Status.Phase = corev1.PodRunning - return testK8sClient.Status().Update(ctx, masterPod) - }, testutil.Timeout, testutil.Interval).Should(Succeed()) - Eventually(func() error { - Expect(testK8sClient.Get(ctx, worker0Key, workerPod)).Should(Succeed()) - workerPod.Status.Phase = corev1.PodRunning - return testK8sClient.Status().Update(ctx, workerPod) - }, testutil.Timeout, testutil.Interval).Should(Succeed()) - - By("Checking the XGBoostJob's condition") - Eventually(func() []kubeflowv1.JobCondition { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - return created.Status.Conditions - }, testutil.Timeout, testutil.Interval).Should(BeComparableTo([]kubeflowv1.JobCondition{ - { - Type: kubeflowv1.JobCreated, - Status: corev1.ConditionTrue, - Reason: commonutil.NewReason(kubeflowv1.XGBoostJobKind, commonutil.JobCreatedReason), - Message: fmt.Sprintf("XGBoostJob %s is created.", name), - }, - { - Type: kubeflowv1.JobRunning, - Status: corev1.ConditionTrue, - Reason: commonutil.NewReason(kubeflowv1.XGBoostJobKind, commonutil.JobRunningReason), - Message: fmt.Sprintf("XGBoostJob %s is running.", name), - }, - }, testutil.IgnoreJobConditionsTimes)) - - By("Updating the XGBoostJob with suspend=true") - Eventually(func() error { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - created.Spec.RunPolicy.Suspend = ptr.To(true) - return testK8sClient.Update(ctx, created) - }, testutil.Timeout, testutil.Interval).Should(Succeed()) - - By("Checking if the pods and services are removed") - Eventually(func() bool { - errMaster := testK8sClient.Get(ctx, masterKey, masterPod) - errWorker := testK8sClient.Get(ctx, worker0Key, workerPod) - return errors.IsNotFound(errMaster) && errors.IsNotFound(errWorker) - }, testutil.Timeout, testutil.Interval).Should(BeTrue()) - Eventually(func() bool { - errMaster := testK8sClient.Get(ctx, masterKey, masterSvc) - errWorker := testK8sClient.Get(ctx, worker0Key, workerSvc) - return errors.IsNotFound(errMaster) && errors.IsNotFound(errWorker) - }, testutil.Timeout, testutil.Interval).Should(BeTrue()) - Consistently(func() bool { - errMasterPod := testK8sClient.Get(ctx, masterKey, masterPod) - errWorkerPod := testK8sClient.Get(ctx, worker0Key, workerPod) - errMasterSvc := testK8sClient.Get(ctx, masterKey, masterSvc) - errWorkerSvc := testK8sClient.Get(ctx, worker0Key, workerSvc) - return errors.IsNotFound(errMasterPod) && errors.IsNotFound(errWorkerPod) && - errors.IsNotFound(errMasterSvc) && errors.IsNotFound(errWorkerSvc) - }, testutil.ConsistentDuration, testutil.Interval).Should(BeTrue()) - - By("Checking if the XGBoostJob has a suspended condition") - Eventually(func() bool { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - return created.Status.ReplicaStatuses[kubeflowv1.XGBoostJobReplicaTypeMaster].Active == 0 && - created.Status.ReplicaStatuses[kubeflowv1.XGBoostJobReplicaTypeWorker].Active == 0 && - created.Status.StartTime.Equal(startTimeBeforeSuspended) - }, testutil.Timeout, testutil.Interval).Should(BeTrue()) - Consistently(func() bool { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - return created.Status.ReplicaStatuses[kubeflowv1.XGBoostJobReplicaTypeMaster].Active == 0 && - created.Status.ReplicaStatuses[kubeflowv1.XGBoostJobReplicaTypeWorker].Active == 0 && - created.Status.StartTime.Equal(startTimeBeforeSuspended) - }, testutil.ConsistentDuration, testutil.Interval).Should(BeTrue()) - Expect(created.Status.Conditions).Should(BeComparableTo([]kubeflowv1.JobCondition{ - { - Type: kubeflowv1.JobCreated, - Status: corev1.ConditionTrue, - Reason: commonutil.NewReason(kubeflowv1.XGBoostJobKind, commonutil.JobCreatedReason), - Message: fmt.Sprintf("XGBoostJob %s is created.", name), - }, - { - Type: kubeflowv1.JobRunning, - Status: corev1.ConditionFalse, - Reason: commonutil.NewReason(kubeflowv1.XGBoostJobKind, commonutil.JobSuspendedReason), - Message: fmt.Sprintf("XGBoostJob %s is suspended.", name), - }, - { - Type: kubeflowv1.JobSuspended, - Reason: commonutil.NewReason(kubeflowv1.XGBoostJobKind, commonutil.JobSuspendedReason), - Message: fmt.Sprintf("XGBoostJob %s is suspended.", name), - Status: corev1.ConditionTrue, - }, - }, testutil.IgnoreJobConditionsTimes)) - - By("Unsuspending the XGBoostJob") - Eventually(func() error { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - created.Spec.RunPolicy.Suspend = ptr.To(false) - return testK8sClient.Update(ctx, created) - }, testutil.Timeout, testutil.Interval).Should(Succeed()) - Eventually(func() *metav1.Time { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - return created.Status.StartTime - }, testutil.Timeout, testutil.Interval).ShouldNot(BeNil()) - - By("Check if the pods and services are created") - Eventually(func() error { - return testK8sClient.Get(ctx, masterKey, masterPod) - }, testutil.Timeout, testutil.Interval).Should(BeNil()) - Eventually(func() error { - return testK8sClient.Get(ctx, worker0Key, workerPod) - }, testutil.Timeout, testutil.Interval).Should(BeNil()) - Eventually(func() error { - return testK8sClient.Get(ctx, masterKey, masterSvc) - }, testutil.Timeout, testutil.Interval).Should(BeNil()) - Eventually(func() error { - return testK8sClient.Get(ctx, worker0Key, workerSvc) - }, testutil.Timeout, testutil.Interval).Should(BeNil()) - - By("Updating Pod's condition with Running") - Eventually(func() error { - Expect(testK8sClient.Get(ctx, masterKey, masterPod)).Should(Succeed()) - masterPod.Status.Phase = corev1.PodRunning - return testK8sClient.Status().Update(ctx, masterPod) - }, testutil.Timeout, testutil.Interval).Should(Succeed()) - Eventually(func() error { - Expect(testK8sClient.Get(ctx, worker0Key, workerPod)).Should(Succeed()) - workerPod.Status.Phase = corev1.PodRunning - return testK8sClient.Status().Update(ctx, workerPod) - }, testutil.Timeout, testutil.Interval).Should(Succeed()) - - By("Checking if the XGBoostJob has resumed conditions") - Eventually(func() []kubeflowv1.JobCondition { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - return created.Status.Conditions - }, testutil.Timeout, testutil.Interval).Should(BeComparableTo([]kubeflowv1.JobCondition{ - { - Type: kubeflowv1.JobCreated, - Status: corev1.ConditionTrue, - Reason: commonutil.NewReason(kubeflowv1.XGBoostJobKind, commonutil.JobCreatedReason), - Message: fmt.Sprintf("XGBoostJob %s is created.", name), - }, - { - Type: kubeflowv1.JobSuspended, - Reason: commonutil.NewReason(kubeflowv1.XGBoostJobKind, commonutil.JobResumedReason), - Message: fmt.Sprintf("XGBoostJob %s is resumed.", name), - Status: corev1.ConditionFalse, - }, - { - Type: kubeflowv1.JobRunning, - Status: corev1.ConditionTrue, - Reason: commonutil.NewReason(kubeflowv1.XGBoostJobKind, commonutil.JobRunningReason), - Message: fmt.Sprintf("XGBoostJob %s is running.", name), - }, - }, testutil.IgnoreJobConditionsTimes)) - - By("Checking if the startTime is updated") - Expect(created.Status.StartTime).ShouldNot(Equal(startTimeBeforeSuspended)) - }) - - It("Should not reconcile a job while managed by external controller", func() { - By("Creating a XGBoostJob managed by external controller") - job.Spec.RunPolicy = kubeflowv1.RunPolicy{ - ManagedBy: ptr.To(kubeflowv1.MultiKueueController), - } - job.Spec.RunPolicy.Suspend = ptr.To(true) - Expect(testK8sClient.Create(ctx, job)).Should(Succeed()) - - created := &kubeflowv1.XGBoostJob{} - By("Checking created XGBoostJob") - Eventually(func() bool { - err := testK8sClient.Get(ctx, jobKey, created) - return err == nil - }, testutil.Timeout, testutil.Interval).Should(BeTrue()) - - By("Checking created XGBoostJob has a nil startTime") - Consistently(func() *metav1.Time { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - return created.Status.StartTime - }, testutil.ConsistentDuration, testutil.Interval).Should(BeNil()) - - By("Checking if the pods and services aren't created") - Consistently(func() bool { - masterPod := &corev1.Pod{} - workerPod := &corev1.Pod{} - masterSvc := &corev1.Service{} - workerSvc := &corev1.Service{} - errMasterPod := testK8sClient.Get(ctx, masterKey, masterPod) - errWorkerPod := testK8sClient.Get(ctx, worker0Key, workerPod) - errMasterSvc := testK8sClient.Get(ctx, masterKey, masterSvc) - errWorkerSvc := testK8sClient.Get(ctx, worker0Key, workerSvc) - return errors.IsNotFound(errMasterPod) && errors.IsNotFound(errWorkerPod) && - errors.IsNotFound(errMasterSvc) && errors.IsNotFound(errWorkerSvc) - }, testutil.ConsistentDuration, testutil.Interval).Should(BeTrue(), "pods and services should be created by external controller (here not existent)") - - By("Checking if the XGBoostJob status was not updated") - Eventually(func() []kubeflowv1.JobCondition { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - return created.Status.Conditions - }, testutil.Timeout, testutil.Interval).Should(BeComparableTo([]kubeflowv1.JobCondition(nil))) - - By("Unsuspending the XGBoostJob") - Eventually(func() error { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - created.Spec.RunPolicy.Suspend = ptr.To(false) - return testK8sClient.Update(ctx, created) - }, testutil.Timeout, testutil.Interval).Should(Succeed()) - - By("Checking created XGBoostJob still has a nil startTime") - Consistently(func() *metav1.Time { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - return created.Status.StartTime - }, testutil.ConsistentDuration, testutil.Interval).Should(BeNil()) - - By("Checking if the XGBoostJob status was not updated, even after unsuspending") - Eventually(func() []kubeflowv1.JobCondition { - Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed()) - return created.Status.Conditions - }, testutil.Timeout, testutil.Interval).Should(BeComparableTo([]kubeflowv1.JobCondition(nil))) - }) - }) -}) - -func newXGBoostForTest(name, namespace string) *kubeflowv1.XGBoostJob { - return &kubeflowv1.XGBoostJob{ - ObjectMeta: metav1.ObjectMeta{ - Name: name, - Namespace: namespace, - }, - } -} diff --git a/pkg/core/job.go b/pkg/core/job.go deleted file mode 100644 index 361cbe313f..0000000000 --- a/pkg/core/job.go +++ /dev/null @@ -1,133 +0,0 @@ -/* -Copyright 2023 The Kubeflow Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package core - -import ( - "sort" - "strings" - "time" - - log "github.com/sirupsen/logrus" - - apiv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - - v1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/runtime" - "k8s.io/client-go/tools/record" -) - -// RecordAbnormalPods records the active pod whose latest condition is not in True status. -func RecordAbnormalPods(activePods []*v1.Pod, object runtime.Object, recorder record.EventRecorder) { - for _, pod := range activePods { - // If the pod starts running, should checks the container statuses rather than the conditions. - recordContainerStatus := func(status *v1.ContainerStatus) { - if status.State.Terminated != nil && status.State.Terminated.ExitCode != 0 { - terminated := status.State.Terminated - recorder.Eventf(object, v1.EventTypeWarning, terminated.Reason, - "Error pod %s container %s exitCode: %d terminated message: %s", - pod.Name, status.Name, terminated.ExitCode, terminated.Message) - } - // The terminated state and waiting state don't simultaneously exists, checks them at the same time. - if status.State.Waiting != nil && status.State.Waiting.Message != "" { - wait := status.State.Waiting - recorder.Eventf(object, v1.EventTypeWarning, wait.Reason, - "Error pod %s container %s waiting message: %s", pod.Name, status.Name, wait.Message) - } - } - if len(pod.Status.ContainerStatuses) != 0 { - for _, status := range pod.Status.ContainerStatuses { - recordContainerStatus(&status) - } - // If the pod has container status info, that means the init container statuses are normal. - continue - } - if len(pod.Status.InitContainerStatuses) != 0 { - for _, status := range pod.Status.InitContainerStatuses { - recordContainerStatus(&status) - } - continue - } - if len(pod.Status.Conditions) == 0 { - continue - } - // Should not modify the original pod which is stored in the informer cache. - status := pod.Status.DeepCopy() - sort.Slice(status.Conditions, func(i, j int) bool { - return status.Conditions[i].LastTransitionTime.After(status.Conditions[j].LastTransitionTime.Time) - }) - condition := status.Conditions[0] - if condition.Status == v1.ConditionTrue { - continue - } - recorder.Eventf(object, v1.EventTypeWarning, condition.Reason, "Error pod %s condition message: %s", pod.Name, condition.Message) - } -} - -// PastActiveDeadline checks if job has ActiveDeadlineSeconds field set and if it is exceeded. -func PastActiveDeadline(runPolicy *apiv1.RunPolicy, jobStatus apiv1.JobStatus) bool { - if runPolicy.ActiveDeadlineSeconds == nil || jobStatus.StartTime == nil { - return false - } - now := metav1.Now() - start := jobStatus.StartTime.Time - duration := now.Time.Sub(start) - allowedDuration := time.Duration(*runPolicy.ActiveDeadlineSeconds) * time.Second - return duration >= allowedDuration -} - -// PastBackoffLimit checks if container restartCounts sum exceeds BackoffLimit -// this method applies only to pods when restartPolicy is one of OnFailure, Always or ExitCode -func PastBackoffLimit(jobName string, runPolicy *apiv1.RunPolicy, - replicas map[apiv1.ReplicaType]*apiv1.ReplicaSpec, pods []*v1.Pod, - podFilterFunc func(pods []*v1.Pod, replicaType string) ([]*v1.Pod, error)) (bool, error) { - if runPolicy.BackoffLimit == nil { - return false, nil - } - result := int32(0) - for rtype, spec := range replicas { - if spec.RestartPolicy != apiv1.RestartPolicyOnFailure && spec.RestartPolicy != apiv1.RestartPolicyAlways && spec.RestartPolicy != apiv1.RestartPolicyExitCode { - log.Warnf("The restart policy of replica %v of the job %v is not OnFailure, Always or ExitCode. Not counted in backoff limit.", rtype, jobName) - continue - } - // Convert ReplicaType to lower string. - rt := strings.ToLower(string(rtype)) - pods, err := podFilterFunc(pods, rt) - if err != nil { - return false, err - } - for i := range pods { - po := pods[i] - if po.Status.Phase != v1.PodRunning { - continue - } - for j := range po.Status.InitContainerStatuses { - stat := po.Status.InitContainerStatuses[j] - result += stat.RestartCount - } - for j := range po.Status.ContainerStatuses { - stat := po.Status.ContainerStatuses[j] - result += stat.RestartCount - } - } - } - - if *runPolicy.BackoffLimit == 0 { - return result > 0, nil - } - return result >= *runPolicy.BackoffLimit, nil -} diff --git a/pkg/core/pod.go b/pkg/core/pod.go deleted file mode 100644 index ac327134a4..0000000000 --- a/pkg/core/pod.go +++ /dev/null @@ -1,88 +0,0 @@ -/* -Copyright 2023 The Kubeflow Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package core - -import ( - utillabels "github.com/kubeflow/training-operator/pkg/util/labels" - - apiv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - log "github.com/sirupsen/logrus" - v1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/labels" -) - -// FilterPodsForReplicaType returns pods belong to a replicaType. -func FilterPodsForReplicaType(pods []*v1.Pod, replicaType string) ([]*v1.Pod, error) { - var result []*v1.Pod - - selector := labels.SelectorFromValidatedSet(labels.Set{ - apiv1.ReplicaTypeLabel: replicaType, - }) - - for _, pod := range pods { - set := labels.Set(pod.Labels) - if !selector.Matches(set) { - continue - } - result = append(result, pod) - } - return result, nil -} - -// GetPodSlices returns a slice, which element is the slice of pod. -// It gives enough information to caller to make decision to up/down scale resources. -func GetPodSlices(pods []*v1.Pod, replicas int, logger *log.Entry) [][]*v1.Pod { - podSlices := make([][]*v1.Pod, CalculatePodSliceSize(pods, replicas)) - for _, pod := range pods { - index, err := utillabels.ReplicaIndex(pod.Labels) - if err != nil { - logger.Warningf("Error obtaining replica index from Pod %s/%s: %v", pod.Namespace, pod.Name, err) - continue - } - if index < 0 || index >= replicas { - logger.Warningf("The label index is not expected: %d, pod: %s/%s", index, pod.Namespace, pod.Name) - } - - podSlices[index] = append(podSlices[index], pod) - } - return podSlices -} - -// CalculatePodSliceSize compare max pod index with desired replicas and return larger size -func CalculatePodSliceSize(pods []*v1.Pod, replicas int) int { - size := 0 - for _, pod := range pods { - index, err := utillabels.ReplicaIndex(pod.Labels) - if err != nil { - continue - } - size = MaxInt(size, index) - } - - // size comes from index, need to +1 to indicate real size - return MaxInt(size+1, replicas) -} - -// SetRestartPolicy check the RestartPolicy defined in job spec and overwrite RestartPolicy in podTemplate if necessary -func SetRestartPolicy(podTemplateSpec *v1.PodTemplateSpec, spec *apiv1.ReplicaSpec) { - // This is necessary since restartPolicyExitCode is not supported in v1.PodTemplateSpec - if spec.RestartPolicy == apiv1.RestartPolicyExitCode { - podTemplateSpec.Spec.RestartPolicy = v1.RestartPolicyNever - } else { - podTemplateSpec.Spec.RestartPolicy = v1.RestartPolicy(spec.RestartPolicy) - } -} diff --git a/pkg/core/service.go b/pkg/core/service.go deleted file mode 100644 index ffafd10634..0000000000 --- a/pkg/core/service.go +++ /dev/null @@ -1,101 +0,0 @@ -/* -Copyright 2023 The Kubeflow Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package core - -import ( - "fmt" - - apiv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - utillabels "github.com/kubeflow/training-operator/pkg/util/labels" - log "github.com/sirupsen/logrus" - v1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/labels" -) - -// FilterServicesForReplicaType returns service belong to a replicaType. -func FilterServicesForReplicaType(services []*v1.Service, replicaType string) ([]*v1.Service, error) { - var result []*v1.Service - - selector := labels.SelectorFromValidatedSet(labels.Set{ - apiv1.ReplicaTypeLabel: replicaType, - }) - - for _, service := range services { - set := labels.Set(service.Labels) - if !selector.Matches(set) { - continue - } - result = append(result, service) - } - return result, nil -} - -// GetServiceSlices returns a slice, which element is the slice of service. -// Assume the return object is serviceSlices, then serviceSlices[i] is an -// array of pointers to services corresponding to Services for replica i. -func GetServiceSlices(services []*v1.Service, replicas int, logger *log.Entry) [][]*v1.Service { - serviceSlices := make([][]*v1.Service, CalculateServiceSliceSize(services, replicas)) - for _, service := range services { - index, err := utillabels.ReplicaIndex(service.Labels) - if err != nil { - logger.Warningf("Error obtaining index for service %s/%s: %v", service.Namespace, service.Name, err) - continue - } - if index < 0 || index >= replicas { - logger.Warningf("The label index is not expected: %d, service: %s/%s", index, service.Namespace, service.Name) - } - - serviceSlices[index] = append(serviceSlices[index], service) - } - return serviceSlices -} - -// CalculateServiceSliceSize compare max pod index with desired replicas and return larger size -func CalculateServiceSliceSize(services []*v1.Service, replicas int) int { - size := 0 - for _, svc := range services { - index, err := utillabels.ReplicaIndex(svc.Labels) - if err != nil { - continue - } - size = MaxInt(size, index) - } - - // size comes from index, need to +1 to indicate real size - return MaxInt(size+1, replicas) -} - -// GetPortsFromJob gets the ports of job container. Port could be nil, if distributed communication strategy doesn't need and no other ports that need to be exposed. -func GetPortsFromJob(spec *apiv1.ReplicaSpec, defaultContainerName string) (map[string]int32, error) { - ports := make(map[string]int32) - - containers := spec.Template.Spec.Containers - for _, container := range containers { - if container.Name == defaultContainerName { - containerPorts := container.Ports - if len(containerPorts) == 0 { - return nil, nil - } - for _, port := range containerPorts { - ports[port.Name] = port.ContainerPort - } - return ports, nil - } - } - - return nil, fmt.Errorf("failed to find the port") -} diff --git a/pkg/core/status.go b/pkg/core/status.go deleted file mode 100644 index eb9985a868..0000000000 --- a/pkg/core/status.go +++ /dev/null @@ -1,50 +0,0 @@ -/* -Copyright 2023 The Kubeflow Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package core - -import ( - apiv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - corev1 "k8s.io/api/core/v1" -) - -// InitializeReplicaStatuses initializes the ReplicaStatuses for replica. -func InitializeReplicaStatuses(jobStatus *apiv1.JobStatus, rtype apiv1.ReplicaType) { - if jobStatus.ReplicaStatuses == nil { - jobStatus.ReplicaStatuses = make(map[apiv1.ReplicaType]*apiv1.ReplicaStatus) - } - - jobStatus.ReplicaStatuses[rtype] = &apiv1.ReplicaStatus{} -} - -// UpdateJobReplicaStatuses updates the JobReplicaStatuses according to the pod. -func UpdateJobReplicaStatuses(jobStatus *apiv1.JobStatus, rtype apiv1.ReplicaType, pod *corev1.Pod) { - switch pod.Status.Phase { - case corev1.PodRunning: - if pod.DeletionTimestamp != nil { - // when node is not ready, the pod will be in terminating state. - // Count deleted Pods as failures to account for orphan Pods that - // never have a chance to reach the Failed phase. - jobStatus.ReplicaStatuses[rtype].Failed++ - } else { - jobStatus.ReplicaStatuses[rtype].Active++ - } - case corev1.PodSucceeded: - jobStatus.ReplicaStatuses[rtype].Succeeded++ - case corev1.PodFailed: - jobStatus.ReplicaStatuses[rtype].Failed++ - } -} diff --git a/pkg/core/utils.go b/pkg/core/utils.go deleted file mode 100644 index e969c1d314..0000000000 --- a/pkg/core/utils.go +++ /dev/null @@ -1,33 +0,0 @@ -/* -Copyright 2023 The Kubeflow Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package core - -import ( - "strings" -) - -func MaxInt(x, y int) int { - if x < y { - return y - } - return x -} - -func GenGeneralName(jobName string, rtype string, index string) string { - n := jobName + "-" + strings.ToLower(rtype) + "-" + index - return strings.Replace(n, "/", "-", -1) -} diff --git a/pkg/util/counter.go b/pkg/util/counter.go deleted file mode 100644 index 0fb5fa2606..0000000000 --- a/pkg/util/counter.go +++ /dev/null @@ -1,71 +0,0 @@ -package util - -import ( - "fmt" - "sync" -) - -type Counter struct { - lock sync.Mutex - data map[string]int -} - -func NewCounter() *Counter { - return &Counter{ - lock: sync.Mutex{}, - data: map[string]int{}, - } -} - -func (c *Counter) Inc(key string) { - c.lock.Lock() - defer c.lock.Unlock() - - v, ok := c.data[key] - if ok { - c.data[key] = v + 1 - return - } - c.data[key] = 0 -} - -func (c *Counter) DeleteKey(key string) { - c.lock.Lock() - defer c.lock.Lock() - - delete(c.data, key) -} - -func (c *Counter) Counts(key string) (int, error) { - c.lock.Lock() - defer c.lock.Unlock() - - v, ok := c.data[key] - if !ok { - return 0, fmt.Errorf("cannot get key %s", key) - } - var err error = nil - if v < 0 { - err = fmt.Errorf("count %s:%d is negative", key, v) - } - return v, err -} - -func (c *Counter) Dec(key string) error { - c.lock.Lock() - defer c.lock.Unlock() - - v, ok := c.data[key] - if ok { - if v > 1 { - c.data[key] = v - 1 - return nil - } - if v == 1 { - c.DeleteKey(key) - return nil - } - return fmt.Errorf("cannot minus one: key %s has value %d", key, v) - } - return fmt.Errorf("cannot find key %s", key) -} diff --git a/pkg/util/k8sutil/client.go b/pkg/util/k8sutil/client.go deleted file mode 100644 index e08ebfa507..0000000000 --- a/pkg/util/k8sutil/client.go +++ /dev/null @@ -1,101 +0,0 @@ -/* -Copyright 2023 The Kubeflow Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package k8sutil - -import ( - "context" - "fmt" - "net/http" - - "github.com/kubeflow/training-operator/pkg/util" - metav1unstructured "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" - "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/runtime/schema" - "k8s.io/apimachinery/pkg/runtime/serializer" - "k8s.io/client-go/kubernetes/scheme" - "k8s.io/client-go/rest" -) - -// CRDRestClient defines an interface for working with CRDs using the REST client. -// In most cases we want to use the auto-generated clientset for specific CRDs. -// The only exception is when the CRD spec is invalid and we can't parse the type into the corresponding -// go struct. -type CRDClient interface { - // Update a Job. - Update(obj *metav1unstructured.Unstructured) error -} - -// CRDRestClient uses the Kubernetes rest interface to talk to the CRD. -type CRDRestClient struct { - restcli *rest.RESTClient -} - -func NewCRDRestClient(version *schema.GroupVersion) (*CRDRestClient, error) { - config, err := GetClusterConfig() - if err != nil { - return nil, err - } - config.GroupVersion = version - config.APIPath = "/apis" - config.ContentType = runtime.ContentTypeJSON - config.NegotiatedSerializer = serializer.WithoutConversionCodecFactory{CodecFactory: scheme.Codecs} - - restcli, err := rest.RESTClientFor(config) - if err != nil { - return nil, err - } - - cli := &CRDRestClient{ - restcli: restcli, - } - return cli, nil -} - -// HttpClient returns the http client used. -func (c *CRDRestClient) Client() *http.Client { - return c.restcli.Client -} - -func (c *CRDRestClient) Update(obj *metav1unstructured.Unstructured, plural string) error { - logger := util.LoggerForUnstructured(obj, obj.GetKind()) - // TODO(jlewi): Can we just call obj.GetKind() to get the kind? I think that will return the singular - // not plural will that work? - if plural == "" { - logger.Errorf("Could not issue update because plural not set.") - return fmt.Errorf("plural must be set") - } - r := c.restcli.Put().Resource(plural).Namespace(obj.GetNamespace()).Name(obj.GetName()).Body(obj) - _, err := r.DoRaw(context.TODO()) - if err != nil { - logger.Errorf("Could not issue update using URL: %v; error; %v", r.URL().String(), err) - } - return err -} - -func (c *CRDRestClient) UpdateStatus(obj *metav1unstructured.Unstructured, plural string) error { - logger := util.LoggerForUnstructured(obj, obj.GetKind()) - if plural == "" { - logger.Errorf("Could not issue update because plural not set.") - return fmt.Errorf("plural must be set") - } - r := c.restcli.Put().Resource(plural).Namespace(obj.GetNamespace()).Name(obj.GetName()).SubResource("status").Body(obj) - _, err := r.DoRaw(context.TODO()) - if err != nil { - logger.Errorf("Could not issue update using URL: %v; error; %v", r.URL().String(), err) - } - return err -} diff --git a/pkg/util/k8sutil/k8sutil.go b/pkg/util/k8sutil/k8sutil.go deleted file mode 100644 index 342d3a6b5f..0000000000 --- a/pkg/util/k8sutil/k8sutil.go +++ /dev/null @@ -1,145 +0,0 @@ -// Copyright 2018 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package k8sutil - -import ( - "net" - "os" - - apiv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - log "github.com/sirupsen/logrus" - v1 "k8s.io/api/core/v1" - apierrors "k8s.io/apimachinery/pkg/api/errors" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/client-go/kubernetes" - _ "k8s.io/client-go/plugin/pkg/client/auth/gcp" // for gcp auth - "k8s.io/client-go/rest" - "k8s.io/client-go/tools/clientcmd" -) - -// RecommendedConfigPathEnvVar is a environment variable for path configuration -const RecommendedConfigPathEnvVar = "KUBECONFIG" - -// MustNewKubeClient returns new kubernetes client for cluster configuration -func MustNewKubeClient() kubernetes.Interface { - cfg, err := GetClusterConfig() - if err != nil { - log.Fatal(err) - } - return kubernetes.NewForConfigOrDie(cfg) -} - -// GetClusterConfig obtain the config from the Kube configuration used by kubeconfig, or from k8s cluster. -func GetClusterConfig() (*rest.Config, error) { - if len(os.Getenv(RecommendedConfigPathEnvVar)) > 0 { - // use the current context in kubeconfig - // This is very useful for running locally. - return clientcmd.BuildConfigFromFlags("", os.Getenv(RecommendedConfigPathEnvVar)) - } - - // Work around https://github.com/kubernetes/kubernetes/issues/40973 - // See https://github.com/coreos/etcd-operator/issues/731#issuecomment-283804819 - if len(os.Getenv("KUBERNETES_SERVICE_HOST")) == 0 { - addrs, err := net.LookupHost("kubernetes.default.svc") - if err != nil { - panic(err) - } - if err := os.Setenv("KUBERNETES_SERVICE_HOST", addrs[0]); err != nil { - return nil, err - } - } - if len(os.Getenv("KUBERNETES_SERVICE_PORT")) == 0 { - if err := os.Setenv("KUBERNETES_SERVICE_PORT", "443"); err != nil { - panic(err) - } - } - return rest.InClusterConfig() -} - -// IsKubernetesResourceAlreadyExistError throws error when kubernetes resources already exist. -func IsKubernetesResourceAlreadyExistError(err error) bool { - return apierrors.IsAlreadyExists(err) -} - -// IsKubernetesResourceNotFoundError throws error when there is no kubernetes resource found. -func IsKubernetesResourceNotFoundError(err error) bool { - return apierrors.IsNotFound(err) -} - -// TODO(jlewi): CascadeDeletOptions are part of garbage collection policy. -// CascadeDeleteOptions deletes the workload after the grace period -// Do we want to use this? See -// https://kubernetes.io/docs/concepts/workloads/controllers/garbage-collection/ -func CascadeDeleteOptions(gracePeriodSeconds int64) *metav1.DeleteOptions { - return &metav1.DeleteOptions{ - GracePeriodSeconds: func(t int64) *int64 { return &t }(gracePeriodSeconds), - PropagationPolicy: func() *metav1.DeletionPropagation { - foreground := metav1.DeletePropagationForeground - return &foreground - }(), - } -} - -// FilterActivePods returns pods that have not terminated. -func FilterActivePods(pods []*v1.Pod) []*v1.Pod { - var result []*v1.Pod - for _, p := range pods { - if IsPodActive(p) { - result = append(result, p) - } else { - log.Infof("Ignoring inactive pod %v/%v in state %v, deletion time %v", - p.Namespace, p.Name, p.Status.Phase, p.DeletionTimestamp) - } - } - return result -} - -func IsPodActive(p *v1.Pod) bool { - return v1.PodSucceeded != p.Status.Phase && - v1.PodFailed != p.Status.Phase && - p.DeletionTimestamp == nil -} - -// filterPodCount returns pods based on their phase. -func FilterPodCount(pods []*v1.Pod, phase v1.PodPhase) int32 { - var result int32 - for i := range pods { - if phase == pods[i].Status.Phase { - result++ - } - } - return result -} - -func GetTotalReplicas(replicas map[apiv1.ReplicaType]*apiv1.ReplicaSpec) int32 { - jobReplicas := int32(0) - for _, r := range replicas { - if r.Replicas != nil { - jobReplicas += *r.Replicas - } else { - // If unspecified, defaults to 1. - jobReplicas += 1 - } - } - return jobReplicas -} - -func GetTotalFailedReplicas(replicas map[apiv1.ReplicaType]*apiv1.ReplicaStatus) int32 { - totalFailedReplicas := int32(0) - for _, status := range replicas { - totalFailedReplicas += status.Failed - } - return totalFailedReplicas -} diff --git a/pkg/util/labels/labels.go b/pkg/util/labels/labels.go deleted file mode 100644 index 791f20b109..0000000000 --- a/pkg/util/labels/labels.go +++ /dev/null @@ -1,59 +0,0 @@ -// Copyright 2021 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package labels - -import ( - "errors" - "strconv" - - v1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" -) - -func ReplicaIndex(labels map[string]string) (int, error) { - v, ok := labels[v1.ReplicaIndexLabel] - if !ok { - return 0, errors.New("replica index label not found") - } - return strconv.Atoi(v) -} - -func SetReplicaIndex(labels map[string]string, idx int) { - SetReplicaIndexStr(labels, strconv.Itoa(idx)) -} - -func SetReplicaIndexStr(labels map[string]string, idx string) { - labels[v1.ReplicaIndexLabel] = idx -} - -func ReplicaType(labels map[string]string) (v1.ReplicaType, error) { - v, ok := labels[v1.ReplicaTypeLabel] - if !ok { - return "", errors.New("replica type label not found") - } - return v1.ReplicaType(v), nil -} - -func SetReplicaType(labels map[string]string, rt string) { - labels[v1.ReplicaTypeLabel] = rt -} - -func HasKnownLabels(labels map[string]string, groupName string) bool { - _, has := labels[v1.OperatorNameLabel] - return has -} - -func SetJobRole(labels map[string]string, role string) { - labels[v1.JobRoleLabel] = role -} diff --git a/pkg/util/labels/labels_test.go b/pkg/util/labels/labels_test.go deleted file mode 100644 index bf42d44e41..0000000000 --- a/pkg/util/labels/labels_test.go +++ /dev/null @@ -1,107 +0,0 @@ -/* -Copyright 2023 The Kubeflow Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package labels - -import ( - "testing" - - v1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" -) - -func TestReplicaIndex(t *testing.T) { - cases := map[string]struct { - labels map[string]string - want int - wantErr bool - }{ - "new": { - labels: map[string]string{ - v1.ReplicaIndexLabel: "2", - }, - want: 2, - }, - "old": { - labels: map[string]string{ - v1.ReplicaIndexLabel: "3", - }, - want: 3, - }, - "none": { - labels: map[string]string{}, - wantErr: true, - }, - "both": { - labels: map[string]string{ - v1.ReplicaIndexLabel: "4", - }, - want: 4, - }, - } - for name, tc := range cases { - t.Run(name, func(t *testing.T) { - got, err := ReplicaIndex(tc.labels) - if gotErr := err != nil; tc.wantErr != gotErr { - t.Errorf("ReplicaIndex returned error (%t) want (%t)", gotErr, tc.wantErr) - } - if got != tc.want { - t.Errorf("ReplicaIndex returned %d, want %d", got, tc.want) - } - }) - } -} - -func TestReplicaType(t *testing.T) { - cases := map[string]struct { - labels map[string]string - want v1.ReplicaType - wantErr bool - }{ - "new": { - labels: map[string]string{ - v1.ReplicaTypeLabel: "Foo", - }, - want: "Foo", - }, - "old": { - labels: map[string]string{ - v1.ReplicaTypeLabel: "Bar", - }, - want: "Bar", - }, - "none": { - labels: map[string]string{}, - wantErr: true, - }, - "both": { - labels: map[string]string{ - v1.ReplicaTypeLabel: "Baz", - }, - want: "Baz", - }, - } - for name, tc := range cases { - t.Run(name, func(t *testing.T) { - got, err := ReplicaType(tc.labels) - if gotErr := err != nil; tc.wantErr != gotErr { - t.Errorf("ReplicaType returned error (%t) want (%t)", gotErr, tc.wantErr) - } - if got != tc.want { - t.Errorf("ReplicaType returned %v, want %v", got, tc.want) - } - }) - } -} diff --git a/pkg/util/logger.go b/pkg/util/logger.go deleted file mode 100644 index 8d523fe1e4..0000000000 --- a/pkg/util/logger.go +++ /dev/null @@ -1,96 +0,0 @@ -// Copyright 2018 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package util - -import ( - "strings" - - log "github.com/sirupsen/logrus" - v1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - metav1unstructured "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" -) - -func LoggerForReplica(job metav1.Object, rtype string) *log.Entry { - return log.WithFields(log.Fields{ - // We use job to match the key used in controller.go - // Its more common in K8s to use a period to indicate namespace.name. So that's what we use. - "job": job.GetNamespace() + "." + job.GetName(), - "uid": job.GetUID(), - "replica-type": rtype, - }) -} - -func LoggerForJob(job metav1.Object) *log.Entry { - return log.WithFields(log.Fields{ - // We use job to match the key used in controller.go - // Its more common in K8s to use a period to indicate namespace.name. So that's what we use. - "job": job.GetNamespace() + "." + job.GetName(), - "uid": job.GetUID(), - }) -} - -func LoggerForPod(pod *v1.Pod, kind string) *log.Entry { - job := "" - if controllerRef := metav1.GetControllerOf(pod); controllerRef != nil { - if controllerRef.Kind == kind { - job = pod.Namespace + "." + controllerRef.Name - } - } - return log.WithFields(log.Fields{ - // We use job to match the key used in controller.go - // In controller.go we log the key used with the workqueue. - "job": job, - "pod": pod.Namespace + "." + pod.Name, - "uid": pod.ObjectMeta.UID, - }) -} - -func LoggerForService(svc *v1.Service, kind string) *log.Entry { - job := "" - if controllerRef := metav1.GetControllerOf(svc); controllerRef != nil { - if controllerRef.Kind == kind { - job = svc.Namespace + "." + controllerRef.Name - } - } - return log.WithFields(log.Fields{ - // We use job to match the key used in controller.go - // In controller.go we log the key used with the workqueue. - "job": job, - "service": svc.Namespace + "." + svc.Name, - "uid": svc.ObjectMeta.UID, - }) -} - -func LoggerForKey(key string) *log.Entry { - return log.WithFields(log.Fields{ - // The key used by the workQueue should be namespace + "/" + name. - // Its more common in K8s to use a period to indicate namespace.name. So that's what we use. - "job": strings.Replace(key, "/", ".", -1), - }) -} - -func LoggerForUnstructured(obj *metav1unstructured.Unstructured, kind string) *log.Entry { - job := "" - if obj.GetKind() == kind { - job = obj.GetNamespace() + "." + obj.GetName() - } - return log.WithFields(log.Fields{ - // We use job to match the key used in controller.go - // In controller.go we log the key used with the workqueue. - "job": job, - "uid": obj.GetUID(), - }) -} diff --git a/pkg/util/signals/signal.go b/pkg/util/signals/signal.go deleted file mode 100644 index 107459405a..0000000000 --- a/pkg/util/signals/signal.go +++ /dev/null @@ -1,43 +0,0 @@ -/* -Copyright 2017 The Kubeflow Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package signals - -import ( - "os" - "os/signal" -) - -var onlyOneSignalHandler = make(chan struct{}) - -// SetupSignalHandler registered for SIGTERM and SIGINT. A stop channel is returned -// which is closed on one of these signals. If a second signal is caught, the program -// is terminated with exit code 1. -func SetupSignalHandler() (stopCh <-chan struct{}) { - close(onlyOneSignalHandler) // panics when called twice - - stop := make(chan struct{}) - c := make(chan os.Signal, 2) - signal.Notify(c, shutdownSignals...) - go func() { - <-c - close(stop) - <-c - os.Exit(1) // second signal. Exit directly. - }() - - return stop -} diff --git a/pkg/util/signals/signal_posix.go b/pkg/util/signals/signal_posix.go deleted file mode 100644 index 7e60718c78..0000000000 --- a/pkg/util/signals/signal_posix.go +++ /dev/null @@ -1,27 +0,0 @@ -//go:build !windows -// +build !windows - -/* -Copyright 2017 The Kubeflow Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package signals - -import ( - "os" - "syscall" -) - -var shutdownSignals = []os.Signal{os.Interrupt, syscall.SIGTERM} diff --git a/pkg/util/signals/signal_windows.go b/pkg/util/signals/signal_windows.go deleted file mode 100644 index b4d9d9272c..0000000000 --- a/pkg/util/signals/signal_windows.go +++ /dev/null @@ -1,23 +0,0 @@ -/* -Copyright 2017 The Kubeflow Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package signals - -import ( - "os" -) - -var shutdownSignals = []os.Signal{os.Interrupt} diff --git a/pkg/util/status.go b/pkg/util/status.go deleted file mode 100644 index 9a1b4eba54..0000000000 --- a/pkg/util/status.go +++ /dev/null @@ -1,149 +0,0 @@ -package util - -import ( - "fmt" - - v1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - - apiv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" -) - -const ( - // JobCreatedReason is added in a job when it is created. - JobCreatedReason = "Created" - // JobSucceededReason is added in a job when it is succeeded. - JobSucceededReason = "Succeeded" - // JobRunningReason is added in a job when it is running. - JobRunningReason = "Running" - // JobFailedReason is added in a job when it is failed. - JobFailedReason = "Failed" - // JobRestartingReason is added in a job when it is restarting. - JobRestartingReason = "Restarting" - // JobFailedValidationReason is added in a job when it failed validation - JobFailedValidationReason = "FailedValidation" - // JobSuspendedReason is added in a job when it is suspended. - JobSuspendedReason = "Suspended" - // JobResumedReason is added in a job when it is unsuspended. - JobResumedReason = "Resumed" -) - -func NewReason(kind, reason string) string { - return fmt.Sprintf("%s%s", kind, reason) -} - -// IsFinished checks if the job is succeeded or failed -func IsFinished(status apiv1.JobStatus) bool { - return IsSucceeded(status) || IsFailed(status) -} - -// IsSucceeded checks if the job is succeeded -func IsSucceeded(status apiv1.JobStatus) bool { - return isStatusConditionTrue(status, apiv1.JobSucceeded) -} - -// IsFailed checks if the job is failed -func IsFailed(status apiv1.JobStatus) bool { - return isStatusConditionTrue(status, apiv1.JobFailed) -} - -func IsRunning(status apiv1.JobStatus) bool { - return isStatusConditionTrue(status, apiv1.JobRunning) -} - -func IsSuspended(status apiv1.JobStatus) bool { - return isStatusConditionTrue(status, apiv1.JobSuspended) -} - -// UpdateJobConditions adds to the jobStatus a new condition if needed, with the conditionType, reason, and message -func UpdateJobConditions( - jobStatus *apiv1.JobStatus, - conditionType apiv1.JobConditionType, - conditionStatus v1.ConditionStatus, - reason, message string, -) { - condition := newCondition(conditionType, conditionStatus, reason, message) - setCondition(jobStatus, condition) -} - -func isStatusConditionTrue(status apiv1.JobStatus, condType apiv1.JobConditionType) bool { - for _, condition := range status.Conditions { - if condition.Type == condType && condition.Status == v1.ConditionTrue { - return true - } - } - return false -} - -// newCondition creates a new job condition. -func newCondition(conditionType apiv1.JobConditionType, conditionStatus v1.ConditionStatus, reason, message string) apiv1.JobCondition { - return apiv1.JobCondition{ - Type: conditionType, - Status: conditionStatus, - LastUpdateTime: metav1.Now(), - LastTransitionTime: metav1.Now(), - Reason: reason, - Message: message, - } -} - -// getCondition returns the condition with the provided type. -func getCondition(status apiv1.JobStatus, condType apiv1.JobConditionType) *apiv1.JobCondition { - for _, condition := range status.Conditions { - if condition.Type == condType { - return &condition - } - } - return nil -} - -// setCondition updates the job to include the provided condition. -// If the condition that we are about to add already exists -// and has the same status and reason then we are not going to update. -func setCondition(status *apiv1.JobStatus, condition apiv1.JobCondition) { - // Do nothing if JobStatus have failed condition - if IsFailed(*status) { - return - } - - currentCond := getCondition(*status, condition.Type) - - // Do nothing if condition doesn't change - if currentCond != nil && currentCond.Status == condition.Status && currentCond.Reason == condition.Reason { - return - } - - // Do not update lastTransitionTime if the status of the condition doesn't change. - if currentCond != nil && currentCond.Status == condition.Status { - condition.LastTransitionTime = currentCond.LastTransitionTime - } - - // Append the updated condition to the conditions - newConditions := filterOutCondition(status.Conditions, condition.Type) - status.Conditions = append(newConditions, condition) -} - -// filterOutCondition returns a new slice of job conditions without conditions with the provided type. -func filterOutCondition(conditions []apiv1.JobCondition, condType apiv1.JobConditionType) []apiv1.JobCondition { - var newConditions []apiv1.JobCondition - for _, c := range conditions { - if condType == apiv1.JobRestarting && c.Type == apiv1.JobRunning { - continue - } - if condType == apiv1.JobRunning && c.Type == apiv1.JobRestarting { - continue - } - - if c.Type == condType { - continue - } - - // Set the running condition status to be false when current condition failed or succeeded - if (condType == apiv1.JobFailed || condType == apiv1.JobSucceeded) && c.Type == apiv1.JobRunning { - c.Status = v1.ConditionFalse - } - - newConditions = append(newConditions, c) - } - return newConditions -} diff --git a/pkg/util/status_test.go b/pkg/util/status_test.go deleted file mode 100644 index 4c483b8264..0000000000 --- a/pkg/util/status_test.go +++ /dev/null @@ -1,165 +0,0 @@ -package util - -import ( - "testing" - - "github.com/stretchr/testify/assert" - corev1 "k8s.io/api/core/v1" - - apiv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" -) - -func TestIsFinished(t *testing.T) { - cases := map[string]struct { - jobStatus apiv1.JobStatus - want bool - }{ - "Succeeded job": { - jobStatus: apiv1.JobStatus{ - Conditions: []apiv1.JobCondition{ - { - Type: apiv1.JobSucceeded, - Status: corev1.ConditionTrue, - }, - }, - }, - want: true, - }, - "Failed job": { - jobStatus: apiv1.JobStatus{ - Conditions: []apiv1.JobCondition{ - { - Type: apiv1.JobFailed, - Status: corev1.ConditionTrue, - }, - }, - }, - want: true, - }, - "Suspended job": { - jobStatus: apiv1.JobStatus{ - Conditions: []apiv1.JobCondition{ - { - Type: apiv1.JobSuspended, - Status: corev1.ConditionTrue, - }, - }, - }, - want: false, - }, - } - for name, tc := range cases { - t.Run(name, func(t *testing.T) { - got := IsFinished(tc.jobStatus) - if tc.want != got { - t.Errorf("Unexpected result from IsFinished() \nwant: %v, got: %v\n", tc.want, got) - } - }) - } -} - -func TestIsSucceeded(t *testing.T) { - jobStatus := apiv1.JobStatus{ - Conditions: []apiv1.JobCondition{ - { - Type: apiv1.JobSucceeded, - Status: corev1.ConditionTrue, - }, - }, - } - assert.True(t, IsSucceeded(jobStatus)) -} - -func TestIsFailed(t *testing.T) { - jobStatus := apiv1.JobStatus{ - Conditions: []apiv1.JobCondition{ - { - Type: apiv1.JobFailed, - Status: corev1.ConditionTrue, - }, - }, - } - assert.True(t, IsFailed(jobStatus)) -} - -func TestIsRunning(t *testing.T) { - jobStatus := apiv1.JobStatus{ - Conditions: []apiv1.JobCondition{ - { - Type: apiv1.JobRunning, - Status: corev1.ConditionTrue, - }, - }, - } - assert.True(t, IsRunning(jobStatus)) -} - -func TestIsSuspended(t *testing.T) { - jobStatus := apiv1.JobStatus{ - Conditions: []apiv1.JobCondition{ - { - Type: apiv1.JobSuspended, - Status: corev1.ConditionTrue, - }, - }, - } - assert.True(t, IsSuspended(jobStatus)) -} - -func TestUpdateJobConditions(t *testing.T) { - jobStatus := apiv1.JobStatus{} - conditionType := apiv1.JobCreated - reason := "Job Created" - message := "Job Created" - - UpdateJobConditions(&jobStatus, conditionType, corev1.ConditionTrue, reason, message) - // Check JobCreated condition is appended - conditionInStatus := jobStatus.Conditions[0] - assert.Equal(t, conditionInStatus.Type, conditionType) - assert.Equal(t, conditionInStatus.Reason, reason) - assert.Equal(t, conditionInStatus.Message, message) - - conditionType = apiv1.JobRunning - reason = "Job Running" - message = "Job Running" - UpdateJobConditions(&jobStatus, conditionType, corev1.ConditionTrue, reason, message) - // Check JobRunning condition is appended - conditionInStatus = jobStatus.Conditions[1] - assert.Equal(t, conditionInStatus.Type, conditionType) - assert.Equal(t, conditionInStatus.Reason, reason) - assert.Equal(t, conditionInStatus.Message, message) - - conditionType = apiv1.JobRestarting - reason = "Job Restarting" - message = "Job Restarting" - UpdateJobConditions(&jobStatus, conditionType, corev1.ConditionTrue, reason, message) - // Check JobRunning condition is filtered out and JobRestarting state is appended - conditionInStatus = jobStatus.Conditions[1] - assert.Equal(t, conditionInStatus.Type, conditionType) - assert.Equal(t, conditionInStatus.Reason, reason) - assert.Equal(t, conditionInStatus.Message, message) - - conditionType = apiv1.JobRunning - reason = "Job Running" - message = "Job Running" - UpdateJobConditions(&jobStatus, conditionType, corev1.ConditionTrue, reason, message) - // Again, Check JobRestarting condition is filtered and JobRestarting is appended - conditionInStatus = jobStatus.Conditions[1] - assert.Equal(t, conditionInStatus.Type, conditionType) - assert.Equal(t, conditionInStatus.Reason, reason) - assert.Equal(t, conditionInStatus.Message, message) - - conditionType = apiv1.JobFailed - reason = "Job Failed" - message = "Job Failed" - UpdateJobConditions(&jobStatus, conditionType, corev1.ConditionTrue, reason, message) - // Check JobRunning condition is set to false - jobRunningCondition := jobStatus.Conditions[1] - assert.Equal(t, jobRunningCondition.Type, apiv1.JobRunning) - assert.Equal(t, jobRunningCondition.Status, corev1.ConditionFalse) - // Check JobFailed state is appended - conditionInStatus = jobStatus.Conditions[2] - assert.Equal(t, conditionInStatus.Type, conditionType) - assert.Equal(t, conditionInStatus.Reason, reason) - assert.Equal(t, conditionInStatus.Message, message) -} diff --git a/pkg/util/testutil/constants.go b/pkg/util/testutil/constants.go deleted file mode 100644 index 74e0a11796..0000000000 --- a/pkg/util/testutil/constants.go +++ /dev/null @@ -1,19 +0,0 @@ -package testutil - -import ( - "time" - - "github.com/google/go-cmp/cmp/cmpopts" - - kubeflowv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" -) - -const ( - Timeout = 30 * time.Second - Interval = 250 * time.Millisecond - ConsistentDuration = 3 * time.Second -) - -var ( - IgnoreJobConditionsTimes = cmpopts.IgnoreFields(kubeflowv1.JobCondition{}, "LastUpdateTime", "LastTransitionTime") -) diff --git a/pkg/util/train/train_util.go b/pkg/util/train/train_util.go deleted file mode 100644 index c6ca98b87b..0000000000 --- a/pkg/util/train/train_util.go +++ /dev/null @@ -1,30 +0,0 @@ -// Copyright 2018 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package that various helper routines for training. -package train - -import ( - "k8s.io/utils/ptr" - - kubeflowv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" -) - -func IsRetryableExitCode(exitCode int32) bool { - return exitCode >= 128 -} - -func IsJobSuspended(runPolicy *kubeflowv1.RunPolicy) bool { - return runPolicy != nil && ptr.Deref(runPolicy.Suspend, false) -} diff --git a/pkg/util/train/train_util_test.go b/pkg/util/train/train_util_test.go deleted file mode 100644 index 9e6f3c8b27..0000000000 --- a/pkg/util/train/train_util_test.go +++ /dev/null @@ -1,96 +0,0 @@ -// Copyright 2018 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package train - -import ( - "testing" - - "k8s.io/utils/ptr" - - kubeflowv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" -) - -func TestIsRetryableExitCode(t *testing.T) { - tcs := []struct { - ExitCode int32 - Expected bool - }{ - { - ExitCode: 1, - Expected: false, - }, - { - ExitCode: 2, - Expected: false, - }, - { - ExitCode: 3, - Expected: false, - }, - { - ExitCode: 130, - Expected: true, - }, - { - ExitCode: 138, - Expected: true, - }, - } - - for _, tc := range tcs { - actual := IsRetryableExitCode(tc.ExitCode) - if actual != tc.Expected { - t.Errorf("ExitCode %d: Expected %t, got %t", tc.ExitCode, tc.Expected, actual) - } - } -} - -func TestIsJobSuspended(t *testing.T) { - cases := map[string]struct { - runPolicy *kubeflowv1.RunPolicy - want bool - }{ - "runPolicy is nil": { - runPolicy: nil, - want: false, - }, - "suspend is nil": { - runPolicy: &kubeflowv1.RunPolicy{ - Suspend: nil, - }, - want: false, - }, - "suspend is false": { - runPolicy: &kubeflowv1.RunPolicy{ - Suspend: ptr.To(false), - }, - want: false, - }, - "suspend is true": { - runPolicy: &kubeflowv1.RunPolicy{ - Suspend: ptr.To(true), - }, - want: true, - }, - } - for name, tc := range cases { - t.Run(name, func(t *testing.T) { - got := IsJobSuspended(tc.runPolicy) - if tc.want != got { - t.Errorf("Unexpected suspended from IsJobSuspended \nwant: %v\n, \ngot: %v\n", tc.want, got) - } - }) - } -} diff --git a/pkg/webhook.v2/clustertrainingruntime_webhook.go b/pkg/webhooks.v2/clustertrainingruntime_webhook.go similarity index 99% rename from pkg/webhook.v2/clustertrainingruntime_webhook.go rename to pkg/webhooks.v2/clustertrainingruntime_webhook.go index c98d71a15b..47b2bd3392 100644 --- a/pkg/webhook.v2/clustertrainingruntime_webhook.go +++ b/pkg/webhooks.v2/clustertrainingruntime_webhook.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package webhookv2 +package webhooksv2 import ( "context" diff --git a/pkg/webhook.v2/setup.go b/pkg/webhooks.v2/setup.go similarity index 98% rename from pkg/webhook.v2/setup.go rename to pkg/webhooks.v2/setup.go index 6e7c7f290e..bd10cdf53e 100644 --- a/pkg/webhook.v2/setup.go +++ b/pkg/webhooks.v2/setup.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package webhookv2 +package webhooksv2 import ( ctrl "sigs.k8s.io/controller-runtime" diff --git a/pkg/webhook.v2/trainingruntime_webhook.go b/pkg/webhooks.v2/trainingruntime_webhook.go similarity index 99% rename from pkg/webhook.v2/trainingruntime_webhook.go rename to pkg/webhooks.v2/trainingruntime_webhook.go index fa6a7186db..8196e8ca3e 100644 --- a/pkg/webhook.v2/trainingruntime_webhook.go +++ b/pkg/webhooks.v2/trainingruntime_webhook.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package webhookv2 +package webhooksv2 import ( "context" diff --git a/pkg/webhook.v2/trainingruntime_webhook_test.go b/pkg/webhooks.v2/trainingruntime_webhook_test.go similarity index 99% rename from pkg/webhook.v2/trainingruntime_webhook_test.go rename to pkg/webhooks.v2/trainingruntime_webhook_test.go index fbbf9a6a35..b54f322985 100644 --- a/pkg/webhook.v2/trainingruntime_webhook_test.go +++ b/pkg/webhooks.v2/trainingruntime_webhook_test.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package webhookv2 +package webhooksv2 import ( "testing" diff --git a/pkg/webhook.v2/trainjob_webhook.go b/pkg/webhooks.v2/trainjob_webhook.go similarity index 99% rename from pkg/webhook.v2/trainjob_webhook.go rename to pkg/webhooks.v2/trainjob_webhook.go index cf75400c82..bf8a594cc5 100644 --- a/pkg/webhook.v2/trainjob_webhook.go +++ b/pkg/webhooks.v2/trainjob_webhook.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package webhookv2 +package webhooksv2 import ( "context" diff --git a/pkg/webhooks/jax/jaxjob_webhook.go b/pkg/webhooks/jax/jaxjob_webhook.go deleted file mode 100644 index 12888b3d3c..0000000000 --- a/pkg/webhooks/jax/jaxjob_webhook.go +++ /dev/null @@ -1,124 +0,0 @@ -/* -Copyright 2024 The Kubeflow Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package jax - -import ( - "context" - "fmt" - "slices" - "strings" - - apimachineryvalidation "k8s.io/apimachinery/pkg/api/validation" - "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/util/validation/field" - "k8s.io/klog/v2" - ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/webhook" - "sigs.k8s.io/controller-runtime/pkg/webhook/admission" - - trainingoperator "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" -) - -var ( - specPath = field.NewPath("spec") - jaxReplicaSpecPath = specPath.Child("jaxReplicaSpecs") -) - -type Webhook struct{} - -func SetupWebhook(mgr ctrl.Manager) error { - return ctrl.NewWebhookManagedBy(mgr). - For(&trainingoperator.JAXJob{}). - WithValidator(&Webhook{}). - Complete() -} - -// +kubebuilder:webhook:path=/validate-kubeflow-org-v1-jaxjob,mutating=false,failurePolicy=fail,sideEffects=None,groups=kubeflow.org,resources=jaxjobs,verbs=create;update,versions=v1,name=validator.jaxjob.training-operator.kubeflow.org,admissionReviewVersions=v1 - -var _ webhook.CustomValidator = &Webhook{} - -func (w *Webhook) ValidateCreate(ctx context.Context, obj runtime.Object) (admission.Warnings, error) { - job := obj.(*trainingoperator.JAXJob) - log := ctrl.LoggerFrom(ctx).WithName("jaxjob-webhook") - log.V(5).Info("Validating create", "jaxJob", klog.KObj(job)) - return nil, validateJAXJob(job).ToAggregate() -} - -func (w *Webhook) ValidateUpdate(ctx context.Context, _ runtime.Object, newObj runtime.Object) (admission.Warnings, error) { - job := newObj.(*trainingoperator.JAXJob) - log := ctrl.LoggerFrom(ctx).WithName("jaxjob-webhook") - log.V(5).Info("Validating update", "jaxJob", klog.KObj(job)) - return nil, validateJAXJob(job).ToAggregate() -} - -func (w *Webhook) ValidateDelete(context.Context, runtime.Object) (admission.Warnings, error) { - return nil, nil -} - -func validateJAXJob(job *trainingoperator.JAXJob) field.ErrorList { - var allErrs field.ErrorList - if errors := apimachineryvalidation.NameIsDNS1035Label(job.ObjectMeta.Name, false); len(errors) != 0 { - allErrs = append(allErrs, field.Invalid(field.NewPath("metadata").Child("name"), job.Name, fmt.Sprintf("should match: %v", strings.Join(errors, ",")))) - } - - allErrs = append(allErrs, validateSpec(job.Spec)...) - return allErrs -} - -func validateSpec(spec trainingoperator.JAXJobSpec) field.ErrorList { - return validateJAXReplicaSpecs(spec.JAXReplicaSpecs) -} - -func validateJAXReplicaSpecs(rSpecs map[trainingoperator.ReplicaType]*trainingoperator.ReplicaSpec) field.ErrorList { - var allErrs field.ErrorList - - if rSpecs == nil { - allErrs = append(allErrs, field.Required(jaxReplicaSpecPath, "must be required")) - } - for rType, rSpec := range rSpecs { - rolePath := jaxReplicaSpecPath.Key(string(rType)) - containersPath := rolePath.Child("template").Child("spec").Child("containers") - - // Make sure the replica type is valid. - validRoleTypes := []trainingoperator.ReplicaType{ - trainingoperator.JAXJobReplicaTypeWorker, - } - if !slices.Contains(validRoleTypes, rType) { - allErrs = append(allErrs, field.NotSupported(rolePath, rType, validRoleTypes)) - } - - if rSpec == nil || len(rSpec.Template.Spec.Containers) == 0 { - allErrs = append(allErrs, field.Required(containersPath, "must be specified")) - } - - // Make sure the image is defined in the container - defaultContainerPresent := false - for idx, container := range rSpec.Template.Spec.Containers { - if container.Image == "" { - allErrs = append(allErrs, field.Required(containersPath.Index(idx).Child("image"), "must be required")) - } - if container.Name == trainingoperator.JAXJobDefaultContainerName { - defaultContainerPresent = true - } - } - // Make sure there has at least one container named "jax" - if !defaultContainerPresent { - allErrs = append(allErrs, field.Required(containersPath, fmt.Sprintf("must have at least one container with name %s", trainingoperator.JAXJobDefaultContainerName))) - } - } - return allErrs -} diff --git a/pkg/webhooks/jax/jaxjob_webhook_test.go b/pkg/webhooks/jax/jaxjob_webhook_test.go deleted file mode 100644 index a6463fb3aa..0000000000 --- a/pkg/webhooks/jax/jaxjob_webhook_test.go +++ /dev/null @@ -1,198 +0,0 @@ -/* -Copyright 2024 The Kubeflow Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package jax - -import ( - "testing" - - "github.com/google/go-cmp/cmp" - "github.com/google/go-cmp/cmp/cmpopts" - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/util/validation/field" - "k8s.io/utils/ptr" - - trainingoperator "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" -) - -func TestValidateV1JAXJob(t *testing.T) { - validJAXReplicaSpecs := map[trainingoperator.ReplicaType]*trainingoperator.ReplicaSpec{ - trainingoperator.JAXJobReplicaTypeWorker: { - Replicas: ptr.To[int32](1), - RestartPolicy: trainingoperator.RestartPolicyOnFailure, - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{{ - Name: "jax", - Image: "docker.io/kubeflow/jaxjob-simple:latest", - Ports: []corev1.ContainerPort{{ - Name: "jaxjob-port", - ContainerPort: 6666, - }}, - ImagePullPolicy: corev1.PullAlways, - Command: []string{ - "python", - "train.py", - }, - }}, - }, - }, - }, - } - - testCases := map[string]struct { - jaxJob *trainingoperator.JAXJob - wantErr field.ErrorList - }{ - "valid JAXJob": { - jaxJob: &trainingoperator.JAXJob{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test", - }, - Spec: trainingoperator.JAXJobSpec{ - JAXReplicaSpecs: validJAXReplicaSpecs, - }, - }, - }, - "jaxJob name does not meet DNS1035": { - jaxJob: &trainingoperator.JAXJob{ - ObjectMeta: metav1.ObjectMeta{ - Name: "0-test", - }, - Spec: trainingoperator.JAXJobSpec{ - JAXReplicaSpecs: validJAXReplicaSpecs, - }, - }, - wantErr: field.ErrorList{ - field.Invalid(field.NewPath("metadata").Child("name"), "", ""), - }, - }, - "no containers": { - jaxJob: &trainingoperator.JAXJob{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test", - }, - Spec: trainingoperator.JAXJobSpec{ - JAXReplicaSpecs: map[trainingoperator.ReplicaType]*trainingoperator.ReplicaSpec{ - trainingoperator.JAXJobReplicaTypeWorker: { - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{}, - }, - }, - }, - }, - }, - }, - wantErr: field.ErrorList{ - field.Required(jaxReplicaSpecPath. - Key(string(trainingoperator.JAXJobReplicaTypeWorker)). - Child("template"). - Child("spec"). - Child("containers"), ""), - field.Required(jaxReplicaSpecPath. - Key(string(trainingoperator.JAXJobReplicaTypeWorker)). - Child("template"). - Child("spec"). - Child("containers"), ""), - }, - }, - "image is empty": { - jaxJob: &trainingoperator.JAXJob{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test", - }, - Spec: trainingoperator.JAXJobSpec{ - JAXReplicaSpecs: map[trainingoperator.ReplicaType]*trainingoperator.ReplicaSpec{ - trainingoperator.JAXJobReplicaTypeWorker: { - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: "jax", - Image: "", - }, - }, - }, - }, - }, - }, - }, - }, - wantErr: field.ErrorList{ - field.Required(jaxReplicaSpecPath. - Key(string(trainingoperator.JAXJobReplicaTypeWorker)). - Child("template"). - Child("spec"). - Child("containers"). - Index(0). - Child("image"), ""), - }, - }, - "jaxJob default container name doesn't present": { - jaxJob: &trainingoperator.JAXJob{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test", - }, - Spec: trainingoperator.JAXJobSpec{ - JAXReplicaSpecs: map[trainingoperator.ReplicaType]*trainingoperator.ReplicaSpec{ - trainingoperator.JAXJobReplicaTypeWorker: { - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: "", - Image: "gcr.io/kubeflow-ci/jaxjob-dist-spmd-mnist_test:1.0", - }, - }, - }, - }, - }, - }, - }, - }, - wantErr: field.ErrorList{ - field.Required(jaxReplicaSpecPath. - Key(string(trainingoperator.JAXJobReplicaTypeWorker)). - Child("template"). - Child("spec"). - Child("containers"), ""), - }, - }, - "replicaSpec is nil": { - jaxJob: &trainingoperator.JAXJob{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test", - }, - Spec: trainingoperator.JAXJobSpec{ - JAXReplicaSpecs: nil, - }, - }, - wantErr: field.ErrorList{ - field.Required(jaxReplicaSpecPath, ""), - }, - }, - } - for name, tc := range testCases { - t.Run(name, func(t *testing.T) { - got := validateJAXJob(tc.jaxJob) - if diff := cmp.Diff(tc.wantErr, got, cmpopts.IgnoreFields(field.Error{}, "Detail", "BadValue")); len(diff) != 0 { - t.Errorf("Unexpected error (-want,+got):\n%s", diff) - } - }) - } -} diff --git a/pkg/webhooks/paddlepaddle/paddlepaddle_webhook.go b/pkg/webhooks/paddlepaddle/paddlepaddle_webhook.go deleted file mode 100644 index fedc95b5f7..0000000000 --- a/pkg/webhooks/paddlepaddle/paddlepaddle_webhook.go +++ /dev/null @@ -1,126 +0,0 @@ -/* -Copyright 2024 The Kubeflow Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package paddlepaddle - -import ( - "context" - "fmt" - "slices" - "strings" - - apimachineryvalidation "k8s.io/apimachinery/pkg/api/validation" - "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/util/validation/field" - "k8s.io/klog/v2" - ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/webhook" - "sigs.k8s.io/controller-runtime/pkg/webhook/admission" - - trainingoperator "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - "github.com/kubeflow/training-operator/pkg/common/util" -) - -var ( - specPath = field.NewPath("spec") - paddleReplicaSpecPath = specPath.Child("paddleReplicaSpecs") -) - -type Webhook struct{} - -func SetupWebhook(mgr ctrl.Manager) error { - return ctrl.NewWebhookManagedBy(mgr). - For(&trainingoperator.PaddleJob{}). - WithValidator(&Webhook{}). - Complete() -} - -// +kubebuilder:webhook:path=/validate-kubeflow-org-v1-paddlejob,mutating=false,failurePolicy=fail,sideEffects=None,groups=kubeflow.org,resources=paddlejobs,verbs=create;update,versions=v1,name=validator.paddlejob.training-operator.kubeflow.org,admissionReviewVersions=v1 - -var _ webhook.CustomValidator = &Webhook{} - -func (w Webhook) ValidateCreate(ctx context.Context, obj runtime.Object) (admission.Warnings, error) { - job := obj.(*trainingoperator.PaddleJob) - log := ctrl.LoggerFrom(ctx).WithName("paddlejob-webhook") - log.V(5).Info("Validating create", "paddleJob", klog.KObj(job)) - return nil, validatePaddleJob(nil, job).ToAggregate() -} - -func (w Webhook) ValidateUpdate(ctx context.Context, oldObj, newObj runtime.Object) (admission.Warnings, error) { - oldJob := oldObj.(*trainingoperator.PaddleJob) - newJob := newObj.(*trainingoperator.PaddleJob) - log := ctrl.LoggerFrom(ctx).WithName("paddlejob-webhook") - log.V(5).Info("Validating update", "paddleJob", klog.KObj(newJob)) - return nil, validatePaddleJob(oldJob, newJob).ToAggregate() -} - -func (w Webhook) ValidateDelete(context.Context, runtime.Object) (admission.Warnings, error) { - return nil, nil -} - -func validatePaddleJob(oldJob, newJob *trainingoperator.PaddleJob) field.ErrorList { - var allErrs field.ErrorList - if errors := apimachineryvalidation.NameIsDNS1035Label(newJob.Name, false); len(errors) != 0 { - allErrs = append(allErrs, field.Invalid(field.NewPath("metadata").Child("name"), newJob.Name, fmt.Sprintf("should match: %v", strings.Join(errors, ",")))) - } - if oldJob != nil { - allErrs = append(allErrs, util.ValidateRunPolicyUpdate(&oldJob.Spec.RunPolicy, &newJob.Spec.RunPolicy)...) - } - allErrs = append(allErrs, util.ValidateRunPolicy(&newJob.Spec.RunPolicy)...) - allErrs = append(allErrs, validateSpec(newJob.Spec.PaddleReplicaSpecs)...) - return allErrs -} - -func validateSpec(rSpecs map[trainingoperator.ReplicaType]*trainingoperator.ReplicaSpec) field.ErrorList { - var allErrs field.ErrorList - - if rSpecs == nil { - allErrs = append(allErrs, field.Required(paddleReplicaSpecPath, "must be required")) - } - for rType, rSpec := range rSpecs { - rolePath := paddleReplicaSpecPath.Key(string(rType)) - containersPath := rolePath.Child("template").Child("spec").Child("containers") - - // Make sure the replica type is valid. - validReplicaTypes := []trainingoperator.ReplicaType{ - trainingoperator.PaddleJobReplicaTypeMaster, - trainingoperator.PaddleJobReplicaTypeWorker, - } - if !slices.Contains(validReplicaTypes, rType) { - allErrs = append(allErrs, field.NotSupported(rolePath, rType, validReplicaTypes)) - } - - if rSpec == nil || len(rSpec.Template.Spec.Containers) == 0 { - allErrs = append(allErrs, field.Required(containersPath, "must be specified")) - } - - // Make sure the image is defined in the container - defaultContainerPresent := false - for idx, container := range rSpec.Template.Spec.Containers { - if container.Image == "" { - allErrs = append(allErrs, field.Required(containersPath.Index(idx).Child("image"), "must be required")) - } - if container.Name == trainingoperator.PaddleJobDefaultContainerName { - defaultContainerPresent = true - } - } - // Make sure there has at least one container named "paddle" - if !defaultContainerPresent { - allErrs = append(allErrs, field.Required(containersPath, fmt.Sprintf("must have at least one container with name %q", trainingoperator.PaddleJobDefaultContainerName))) - } - } - return allErrs -} diff --git a/pkg/webhooks/paddlepaddle/paddlepaddle_webhook_test.go b/pkg/webhooks/paddlepaddle/paddlepaddle_webhook_test.go deleted file mode 100644 index 10cc0ea053..0000000000 --- a/pkg/webhooks/paddlepaddle/paddlepaddle_webhook_test.go +++ /dev/null @@ -1,203 +0,0 @@ -/* -Copyright 2024 The Kubeflow Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package paddlepaddle - -import ( - "testing" - - "github.com/google/go-cmp/cmp" - "github.com/google/go-cmp/cmp/cmpopts" - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/util/sets" - "k8s.io/apimachinery/pkg/util/validation/field" - "k8s.io/utils/ptr" - - trainingoperator "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" -) - -func TestValidateV1PaddleJob(t *testing.T) { - validPaddleReplicaSpecs := map[trainingoperator.ReplicaType]*trainingoperator.ReplicaSpec{ - trainingoperator.PaddleJobReplicaTypeWorker: { - Replicas: ptr.To[int32](2), - RestartPolicy: trainingoperator.RestartPolicyNever, - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{{ - Name: "paddle", - Image: "registry.baidubce.com/paddlepaddle/paddle:2.4.0rc0-cpu", - Command: []string{"python"}, - Args: []string{ - "-m", - "paddle.distributed.launch", - "run_check", - }, - Ports: []corev1.ContainerPort{{ - Name: "master", - ContainerPort: int32(37777), - }}, - ImagePullPolicy: corev1.PullAlways, - }}, - }, - }, - }, - } - testCases := map[string]struct { - paddleJob *trainingoperator.PaddleJob - wantErr field.ErrorList - }{ - "valid paddleJob": { - paddleJob: &trainingoperator.PaddleJob{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test", - }, - Spec: trainingoperator.PaddleJobSpec{ - RunPolicy: trainingoperator.RunPolicy{ - ManagedBy: ptr.To(trainingoperator.KubeflowJobsController), - }, - PaddleReplicaSpecs: validPaddleReplicaSpecs, - }, - }, - }, - "paddleJob name does not meet DNS1035": { - paddleJob: &trainingoperator.PaddleJob{ - ObjectMeta: metav1.ObjectMeta{ - Name: "__test", - }, - Spec: trainingoperator.PaddleJobSpec{ - PaddleReplicaSpecs: validPaddleReplicaSpecs, - }, - }, - wantErr: field.ErrorList{ - field.Invalid(field.NewPath("metadata").Child("name"), "", ""), - }, - }, - "no containers": { - paddleJob: &trainingoperator.PaddleJob{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test", - }, - Spec: trainingoperator.PaddleJobSpec{ - PaddleReplicaSpecs: map[trainingoperator.ReplicaType]*trainingoperator.ReplicaSpec{ - trainingoperator.PaddleJobReplicaTypeWorker: { - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{}, - }, - }, - }, - }, - }, - }, - wantErr: field.ErrorList{ - field.Required(paddleReplicaSpecPath.Key(string(trainingoperator.PaddleJobReplicaTypeWorker)).Child("template").Child("spec").Child("containers"), ""), - field.Required(paddleReplicaSpecPath.Key(string(trainingoperator.PaddleJobReplicaTypeWorker)).Child("template").Child("spec").Child("containers"), ""), - }, - }, - "image is empty": { - paddleJob: &trainingoperator.PaddleJob{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test", - }, - Spec: trainingoperator.PaddleJobSpec{ - PaddleReplicaSpecs: map[trainingoperator.ReplicaType]*trainingoperator.ReplicaSpec{ - trainingoperator.PaddleJobReplicaTypeWorker: { - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: "paddle", - Image: "", - }, - }, - }, - }, - }, - }, - }, - }, - wantErr: field.ErrorList{ - field.Required(paddleReplicaSpecPath.Key(string(trainingoperator.PaddleJobReplicaTypeWorker)).Child("template").Child("spec").Child("containers").Index(0).Child("image"), ""), - }, - }, - "paddle default container name doesn't find": { - paddleJob: &trainingoperator.PaddleJob{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test", - }, - Spec: trainingoperator.PaddleJobSpec{ - PaddleReplicaSpecs: map[trainingoperator.ReplicaType]*trainingoperator.ReplicaSpec{ - trainingoperator.PaddleJobReplicaTypeWorker: { - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: "", - Image: "gcr.io/kubeflow-ci/paddle-dist-mnist_test:1.0", - }, - }, - }, - }, - }, - }, - }, - }, - wantErr: field.ErrorList{ - field.Required(paddleReplicaSpecPath.Key(string(trainingoperator.PaddleJobReplicaTypeWorker)).Child("template").Child("spec").Child("containers"), ""), - }, - }, - "replicaSpec is nil": { - paddleJob: &trainingoperator.PaddleJob{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test", - }, - Spec: trainingoperator.PaddleJobSpec{ - PaddleReplicaSpecs: nil, - }, - }, - wantErr: field.ErrorList{ - field.Required(paddleReplicaSpecPath, ""), - }, - }, - "attempt to set unsupported managedBy controller name gets rejected": { - paddleJob: &trainingoperator.PaddleJob{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test", - }, - Spec: trainingoperator.PaddleJobSpec{ - RunPolicy: trainingoperator.RunPolicy{ - ManagedBy: ptr.To("other-job-controller"), - }, - PaddleReplicaSpecs: validPaddleReplicaSpecs, - }, - }, - wantErr: field.ErrorList{ - field.NotSupported(field.NewPath("spec", "runPolicy", "managedBy"), "", sets.List(sets.New( - trainingoperator.MultiKueueController, - trainingoperator.KubeflowJobsController))), - }, - }, - } - for name, tc := range testCases { - t.Run(name, func(t *testing.T) { - got := validatePaddleJob(nil, tc.paddleJob) - if diff := cmp.Diff(tc.wantErr, got, cmpopts.IgnoreFields(field.Error{}, "Detail", "BadValue")); len(diff) != 0 { - t.Errorf("Unexpected error (-want,+got):\n%s", diff) - } - }) - } -} diff --git a/pkg/webhooks/pytorch/pytorchjob_webhook.go b/pkg/webhooks/pytorch/pytorchjob_webhook.go deleted file mode 100644 index bc2d9fc527..0000000000 --- a/pkg/webhooks/pytorch/pytorchjob_webhook.go +++ /dev/null @@ -1,161 +0,0 @@ -/* -Copyright 2024 The Kubeflow Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package pytorch - -import ( - "context" - "fmt" - "slices" - "strings" - - apimachineryvalidation "k8s.io/apimachinery/pkg/api/validation" - "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/util/validation/field" - "k8s.io/klog/v2" - ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/webhook" - "sigs.k8s.io/controller-runtime/pkg/webhook/admission" - - trainingoperator "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - "github.com/kubeflow/training-operator/pkg/common/util" -) - -var ( - specPath = field.NewPath("spec") - pytorchReplicaSpecPath = specPath.Child("pytorchReplicaSpecs") -) - -type Webhook struct{} - -func SetupWebhook(mgr ctrl.Manager) error { - return ctrl.NewWebhookManagedBy(mgr). - For(&trainingoperator.PyTorchJob{}). - WithValidator(&Webhook{}). - Complete() -} - -// +kubebuilder:webhook:path=/validate-kubeflow-org-v1-pytorchjob,mutating=false,failurePolicy=fail,sideEffects=None,groups=kubeflow.org,resources=pytorchjobs,verbs=create;update,versions=v1,name=validator.pytorchjob.training-operator.kubeflow.org,admissionReviewVersions=v1 - -var _ webhook.CustomValidator = &Webhook{} - -func (w *Webhook) ValidateCreate(ctx context.Context, obj runtime.Object) (admission.Warnings, error) { - job := obj.(*trainingoperator.PyTorchJob) - log := ctrl.LoggerFrom(ctx).WithName("pytorchjob-webhook") - log.V(5).Info("Validating create", "pytorchJob", klog.KObj(job)) - warnings, errs := validatePyTorchJob(nil, job) - return warnings, errs.ToAggregate() -} - -func (w *Webhook) ValidateUpdate(ctx context.Context, oldObj, newObj runtime.Object) (admission.Warnings, error) { - oldJob := newObj.(*trainingoperator.PyTorchJob) - newJob := newObj.(*trainingoperator.PyTorchJob) - log := ctrl.LoggerFrom(ctx).WithName("pytorchjob-webhook") - log.V(5).Info("Validating update", "pytorchJob", klog.KObj(newJob)) - warnings, errs := validatePyTorchJob(oldJob, newJob) - return warnings, errs.ToAggregate() -} - -func (w *Webhook) ValidateDelete(context.Context, runtime.Object) (admission.Warnings, error) { - return nil, nil -} - -func validatePyTorchJob(oldJob, newJob *trainingoperator.PyTorchJob) (admission.Warnings, field.ErrorList) { - var allErrs field.ErrorList - var warnings admission.Warnings - - if errors := apimachineryvalidation.NameIsDNS1035Label(newJob.ObjectMeta.Name, false); len(errors) != 0 { - allErrs = append(allErrs, field.Invalid(field.NewPath("metadata").Child("name"), newJob.Name, fmt.Sprintf("should match: %v", strings.Join(errors, ",")))) - } - if oldJob != nil { - allErrs = append(allErrs, util.ValidateRunPolicyUpdate(&oldJob.Spec.RunPolicy, &newJob.Spec.RunPolicy)...) - } - allErrs = append(allErrs, util.ValidateRunPolicy(&newJob.Spec.RunPolicy)...) - ws, err := validateSpec(newJob.Spec) - warnings = append(warnings, ws...) - allErrs = append(allErrs, err...) - return warnings, allErrs -} - -func validateSpec(spec trainingoperator.PyTorchJobSpec) (admission.Warnings, field.ErrorList) { - var allErrs field.ErrorList - var warnings admission.Warnings - if spec.ElasticPolicy != nil { - _, ok := spec.PyTorchReplicaSpecs[trainingoperator.PyTorchJobReplicaTypeWorker] - workerPath := pytorchReplicaSpecPath.Key(string(trainingoperator.PyTorchJobReplicaTypeWorker)) - if !ok { - allErrs = append(allErrs, field.Required(workerPath, "must be configured if elastic policy is used")) - } - if spec.ElasticPolicy.NProcPerNode != nil { - elasticNProcPerNodePath := specPath.Child("elasticPolicy").Child("nProcPerNode") - nprocPerNodePath := specPath.Child("nprocPerNode") - warnings = append(warnings, fmt.Sprintf("%s is deprecated, use %s instead", elasticNProcPerNodePath.String(), nprocPerNodePath.String())) - if spec.NprocPerNode != nil { - allErrs = append(allErrs, field.Forbidden(elasticNProcPerNodePath, fmt.Sprintf("must not be used with %s", nprocPerNodePath))) - } - } - } - allErrs = append(allErrs, validatePyTorchReplicaSpecs(spec.PyTorchReplicaSpecs)...) - return warnings, allErrs -} - -func validatePyTorchReplicaSpecs(rSpecs map[trainingoperator.ReplicaType]*trainingoperator.ReplicaSpec) field.ErrorList { - var allErrs field.ErrorList - - if rSpecs == nil { - allErrs = append(allErrs, field.Required(pytorchReplicaSpecPath, "must be required")) - } - for rType, rSpec := range rSpecs { - rolePath := pytorchReplicaSpecPath.Key(string(rType)) - containersPath := rolePath.Child("template").Child("spec").Child("containers") - - // Make sure the replica type is valid. - validRoleTypes := []trainingoperator.ReplicaType{ - trainingoperator.PyTorchJobReplicaTypeMaster, - trainingoperator.PyTorchJobReplicaTypeWorker, - } - if !slices.Contains(validRoleTypes, rType) { - allErrs = append(allErrs, field.NotSupported(rolePath, rType, validRoleTypes)) - } - - if rSpec == nil || len(rSpec.Template.Spec.Containers) == 0 { - allErrs = append(allErrs, field.Required(containersPath, "must be specified")) - } - - // Make sure the image is defined in the container - defaultContainerPresent := false - for idx, container := range rSpec.Template.Spec.Containers { - if container.Image == "" { - allErrs = append(allErrs, field.Required(containersPath.Index(idx).Child("image"), "must be required")) - } - if container.Name == trainingoperator.PyTorchJobDefaultContainerName { - defaultContainerPresent = true - } - } - // Make sure there has at least one container named "pytorch" - if !defaultContainerPresent { - allErrs = append(allErrs, field.Required(containersPath, fmt.Sprintf("must have at least one container with name %s", trainingoperator.PyTorchJobDefaultContainerName))) - } - if rType == trainingoperator.PyTorchJobReplicaTypeMaster { - if rSpec.Replicas == nil || int(*rSpec.Replicas) != 1 { - allErrs = append(allErrs, field.Forbidden(rolePath.Child("replicas"), "must be 1")) - } - } else if rSpec.Replicas != nil && int(*rSpec.Replicas) < 1 { - allErrs = append(allErrs, field.Forbidden(rolePath.Child("replicas"), "must be at least 1")) - } - } - return allErrs -} diff --git a/pkg/webhooks/pytorch/pytorchjob_webhook_test.go b/pkg/webhooks/pytorch/pytorchjob_webhook_test.go deleted file mode 100644 index e1b7d3f69e..0000000000 --- a/pkg/webhooks/pytorch/pytorchjob_webhook_test.go +++ /dev/null @@ -1,436 +0,0 @@ -/* -Copyright 2024 The Kubeflow Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package pytorch - -import ( - "fmt" - "testing" - - "github.com/google/go-cmp/cmp" - "github.com/google/go-cmp/cmp/cmpopts" - corev1 "k8s.io/api/core/v1" - apivalidation "k8s.io/apimachinery/pkg/api/validation" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/util/sets" - "k8s.io/apimachinery/pkg/util/validation/field" - "k8s.io/utils/ptr" - "sigs.k8s.io/controller-runtime/pkg/webhook/admission" - - trainingoperator "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" -) - -func TestValidateV1PyTorchJob(t *testing.T) { - validPyTorchReplicaSpecs := map[trainingoperator.ReplicaType]*trainingoperator.ReplicaSpec{ - trainingoperator.PyTorchJobReplicaTypeMaster: { - Replicas: ptr.To[int32](1), - RestartPolicy: trainingoperator.RestartPolicyOnFailure, - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{{ - Name: "pytorch", - Image: "docker.io/kubeflowkatib/pytorch-mnist:v1beta1-45c5727", - ImagePullPolicy: corev1.PullAlways, - Command: []string{ - "python3", - "/opt/pytorch-mnist/mnist.py", - "--epochs=1", - }, - }}, - }, - }, - }, - trainingoperator.PyTorchJobReplicaTypeWorker: { - Replicas: ptr.To[int32](1), - RestartPolicy: trainingoperator.RestartPolicyOnFailure, - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{{ - Name: "pytorch", - Image: "docker.io/kubeflowkatib/pytorch-mnist:v1beta1-45c5727", - ImagePullPolicy: corev1.PullAlways, - Command: []string{ - "python3", - "/opt/pytorch-mnist/mnist.py", - "--epochs=1", - }, - }}, - }, - }, - }, - } - - testCases := map[string]struct { - pytorchJob *trainingoperator.PyTorchJob - oldPytorchJob *trainingoperator.PyTorchJob - wantErr field.ErrorList - wantWarnings admission.Warnings - }{ - "valid PyTorchJob": { - pytorchJob: &trainingoperator.PyTorchJob{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test", - }, - Spec: trainingoperator.PyTorchJobSpec{ - RunPolicy: trainingoperator.RunPolicy{ - ManagedBy: ptr.To(trainingoperator.KubeflowJobsController), - }, - ElasticPolicy: &trainingoperator.ElasticPolicy{ - RDZVBackend: ptr.To(trainingoperator.BackendC10D), - }, - PyTorchReplicaSpecs: validPyTorchReplicaSpecs, - }, - }, - }, - "pytorchJob name does not meet DNS1035": { - pytorchJob: &trainingoperator.PyTorchJob{ - ObjectMeta: metav1.ObjectMeta{ - Name: "0-test", - }, - Spec: trainingoperator.PyTorchJobSpec{ - PyTorchReplicaSpecs: validPyTorchReplicaSpecs, - }, - }, - wantErr: field.ErrorList{ - field.Invalid(field.NewPath("metadata").Child("name"), "", ""), - }, - }, - "no containers": { - pytorchJob: &trainingoperator.PyTorchJob{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test", - }, - Spec: trainingoperator.PyTorchJobSpec{ - PyTorchReplicaSpecs: map[trainingoperator.ReplicaType]*trainingoperator.ReplicaSpec{ - trainingoperator.PyTorchJobReplicaTypeWorker: { - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{}, - }, - }, - }, - }, - }, - }, - wantErr: field.ErrorList{ - field.Required(pytorchReplicaSpecPath. - Key(string(trainingoperator.PyTorchJobReplicaTypeWorker)). - Child("template"). - Child("spec"). - Child("containers"), ""), - field.Required(pytorchReplicaSpecPath. - Key(string(trainingoperator.PyTorchJobReplicaTypeWorker)). - Child("template"). - Child("spec"). - Child("containers"), ""), - }, - }, - "image is empty": { - pytorchJob: &trainingoperator.PyTorchJob{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test", - }, - Spec: trainingoperator.PyTorchJobSpec{ - PyTorchReplicaSpecs: map[trainingoperator.ReplicaType]*trainingoperator.ReplicaSpec{ - trainingoperator.PyTorchJobReplicaTypeWorker: { - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: "pytorch", - Image: "", - }, - }, - }, - }, - }, - }, - }, - }, - wantErr: field.ErrorList{ - field.Required(pytorchReplicaSpecPath. - Key(string(trainingoperator.PyTorchJobReplicaTypeWorker)). - Child("template"). - Child("spec"). - Child("containers"). - Index(0). - Child("image"), ""), - }, - }, - "pytorchJob default container name doesn't present": { - pytorchJob: &trainingoperator.PyTorchJob{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test", - }, - Spec: trainingoperator.PyTorchJobSpec{ - PyTorchReplicaSpecs: map[trainingoperator.ReplicaType]*trainingoperator.ReplicaSpec{ - trainingoperator.PyTorchJobReplicaTypeWorker: { - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: "", - Image: "gcr.io/kubeflow-ci/pytorch-dist-mnist_test:1.0", - }, - }, - }, - }, - }, - }, - }, - }, - wantErr: field.ErrorList{ - field.Required(pytorchReplicaSpecPath. - Key(string(trainingoperator.PyTorchJobReplicaTypeWorker)). - Child("template"). - Child("spec"). - Child("containers"), ""), - }, - }, - "the number of replicas in masterReplica is other than 1": { - pytorchJob: &trainingoperator.PyTorchJob{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test", - }, - Spec: trainingoperator.PyTorchJobSpec{ - PyTorchReplicaSpecs: map[trainingoperator.ReplicaType]*trainingoperator.ReplicaSpec{ - trainingoperator.PyTorchJobReplicaTypeMaster: { - Replicas: ptr.To[int32](2), - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: "pytorch", - Image: "gcr.io/kubeflow-ci/pytorch-dist-mnist_test:1.0", - }, - }, - }, - }, - }, - }, - }, - }, - wantErr: field.ErrorList{ - field.Forbidden(pytorchReplicaSpecPath.Key(string(trainingoperator.PyTorchJobReplicaTypeMaster)).Child("replicas"), ""), - }, - }, - "Spec.ElasticPolicy.NProcPerNode are set": { - pytorchJob: &trainingoperator.PyTorchJob{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test", - }, - Spec: trainingoperator.PyTorchJobSpec{ - ElasticPolicy: &trainingoperator.ElasticPolicy{ - NProcPerNode: ptr.To[int32](1), - }, - PyTorchReplicaSpecs: map[trainingoperator.ReplicaType]*trainingoperator.ReplicaSpec{ - trainingoperator.PyTorchJobReplicaTypeMaster: { - Replicas: ptr.To[int32](1), - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: "pytorch", - Image: "gcr.io/kubeflow-ci/pytorch-dist-mnist_test:1.0", - }, - }, - }, - }, - }, - trainingoperator.PyTorchJobReplicaTypeWorker: { - Replicas: ptr.To[int32](1), - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: "pytorch", - Image: "gcr.io/kubeflow-ci/pytorch-dist-mnist_test:1.0", - }, - }, - }, - }, - }, - }, - }, - }, - wantWarnings: admission.Warnings{ - fmt.Sprintf("%s is deprecated, use %s instead", - specPath.Child("elasticPolicy").Child("nProcPerNode"), specPath.Child("nprocPerNode")), - }, - }, - "Spec.NprocPerNode and Spec.ElasticPolicy.NProcPerNode are set": { - pytorchJob: &trainingoperator.PyTorchJob{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test", - }, - Spec: trainingoperator.PyTorchJobSpec{ - NprocPerNode: ptr.To("1"), - ElasticPolicy: &trainingoperator.ElasticPolicy{ - NProcPerNode: ptr.To[int32](1), - }, - PyTorchReplicaSpecs: map[trainingoperator.ReplicaType]*trainingoperator.ReplicaSpec{ - trainingoperator.PyTorchJobReplicaTypeMaster: { - Replicas: ptr.To[int32](1), - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: "pytorch", - Image: "gcr.io/kubeflow-ci/pytorch-dist-mnist_test:1.0", - }, - }, - }, - }, - }, - trainingoperator.PyTorchJobReplicaTypeWorker: { - Replicas: ptr.To[int32](1), - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: "pytorch", - Image: "gcr.io/kubeflow-ci/pytorch-dist-mnist_test:1.0", - }, - }, - }, - }, - }, - }, - }, - }, - wantErr: field.ErrorList{ - field.Forbidden(specPath.Child("elasticPolicy").Child("nProcPerNode"), ""), - }, - wantWarnings: admission.Warnings{ - fmt.Sprintf("%s is deprecated, use %s instead", - specPath.Child("elasticPolicy").Child("nProcPerNode"), specPath.Child("nprocPerNode")), - }, - }, - "attempt to set unsupported managedBy controller name gets rejected": { - pytorchJob: &trainingoperator.PyTorchJob{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test", - }, - Spec: trainingoperator.PyTorchJobSpec{ - RunPolicy: trainingoperator.RunPolicy{ - ManagedBy: ptr.To("other-job-controller"), - }, - PyTorchReplicaSpecs: validPyTorchReplicaSpecs, - }, - }, - wantErr: field.ErrorList{ - field.NotSupported(field.NewPath("spec", "runPolicy", "managedBy"), "", sets.List(sets.New( - trainingoperator.MultiKueueController, - trainingoperator.KubeflowJobsController))), - }, - }, - "attempt to update the managedBy field gets rejected": { - oldPytorchJob: &trainingoperator.PyTorchJob{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test", - }, - Spec: trainingoperator.PyTorchJobSpec{ - RunPolicy: trainingoperator.RunPolicy{ - ManagedBy: ptr.To(trainingoperator.KubeflowJobsController), - }, - PyTorchReplicaSpecs: validPyTorchReplicaSpecs, - }, - }, - pytorchJob: &trainingoperator.PyTorchJob{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test", - }, - Spec: trainingoperator.PyTorchJobSpec{ - RunPolicy: trainingoperator.RunPolicy{ - ManagedBy: ptr.To(trainingoperator.MultiKueueController), - }, - PyTorchReplicaSpecs: validPyTorchReplicaSpecs, - }, - }, - wantErr: field.ErrorList{ - field.Invalid(field.NewPath("spec", "runPolicy", "managedBy"), trainingoperator.MultiKueueController, apivalidation.FieldImmutableErrorMsg), - }, - }, - "attempt to configure elasticPolicy when no worker is configured": { - pytorchJob: &trainingoperator.PyTorchJob{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test", - }, - Spec: trainingoperator.PyTorchJobSpec{ - ElasticPolicy: &trainingoperator.ElasticPolicy{}, - PyTorchReplicaSpecs: map[trainingoperator.ReplicaType]*trainingoperator.ReplicaSpec{ - trainingoperator.PyTorchJobReplicaTypeMaster: { - Replicas: ptr.To[int32](1), - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: "pytorch", - Image: "gcr.io/kubeflow-ci/pytorch-dist-mnist_test:1.0", - }, - }, - }, - }, - }, - }, - }, - }, - wantErr: field.ErrorList{ - field.Required(pytorchReplicaSpecPath.Key(string(trainingoperator.PyTorchJobReplicaTypeWorker)), ""), - }, - }, - "attempt to configure worker with 0 replicas": { - pytorchJob: &trainingoperator.PyTorchJob{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test", - }, - Spec: trainingoperator.PyTorchJobSpec{ - ElasticPolicy: &trainingoperator.ElasticPolicy{}, - PyTorchReplicaSpecs: map[trainingoperator.ReplicaType]*trainingoperator.ReplicaSpec{ - trainingoperator.PyTorchJobReplicaTypeWorker: { - Replicas: ptr.To[int32](0), - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: "pytorch", - Image: "gcr.io/kubeflow-ci/pytorch-dist-mnist_test:1.0", - }, - }, - }, - }, - }, - }, - }, - }, - wantErr: field.ErrorList{ - field.Forbidden(pytorchReplicaSpecPath.Key(string(trainingoperator.PyTorchJobReplicaTypeWorker)).Child("replicas"), ""), - }, - }, - } - - for name, tc := range testCases { - t.Run(name, func(t *testing.T) { - gotWarnings, gotError := validatePyTorchJob(tc.oldPytorchJob, tc.pytorchJob) - if diff := cmp.Diff(tc.wantWarnings, gotWarnings, cmpopts.SortSlices(func(a, b string) bool { return a < b })); len(diff) != 0 { - t.Errorf("Unexpected warnings (-want,+got):\n%s", diff) - } - if diff := cmp.Diff(tc.wantErr, gotError, cmpopts.IgnoreFields(field.Error{}, "Detail", "BadValue")); len(diff) != 0 { - t.Errorf("Unexpected errors (-want,+got):\n%s", diff) - } - }) - } -} diff --git a/pkg/webhooks/tensorflow/tfjob_webhook.go b/pkg/webhooks/tensorflow/tfjob_webhook.go deleted file mode 100644 index 95f187f44f..0000000000 --- a/pkg/webhooks/tensorflow/tfjob_webhook.go +++ /dev/null @@ -1,127 +0,0 @@ -/* -Copyright 2024 The Kubeflow Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package tensorflow - -import ( - "context" - "fmt" - "strings" - - apimachineryvalidation "k8s.io/apimachinery/pkg/api/validation" - "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/util/validation/field" - "k8s.io/klog/v2" - ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/webhook" - "sigs.k8s.io/controller-runtime/pkg/webhook/admission" - - trainingoperator "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - "github.com/kubeflow/training-operator/pkg/common/util" -) - -var ( - specPath = field.NewPath("spec") - tfReplicaSpecPath = specPath.Child("tfReplicaSpecs") -) - -type Webhook struct{} - -func SetupWebhook(mgr ctrl.Manager) error { - return ctrl.NewWebhookManagedBy(mgr). - For(&trainingoperator.TFJob{}). - WithValidator(&Webhook{}). - Complete() -} - -// +kubebuilder:webhook:path=/validate-kubeflow-org-v1-tfjob,mutating=false,failurePolicy=fail,sideEffects=None,groups=kubeflow.org,resources=tfjobs,verbs=create;update,versions=v1,name=validator.tfjob.training-operator.kubeflow.org,admissionReviewVersions=v1 - -var _ webhook.CustomValidator = &Webhook{} - -func (w *Webhook) ValidateCreate(ctx context.Context, obj runtime.Object) (admission.Warnings, error) { - job := obj.(*trainingoperator.TFJob) - log := ctrl.LoggerFrom(ctx).WithName("tfjob-webhook") - log.V(5).Info("Validating create", "TFJob", klog.KObj(job)) - return nil, validateTFJob(nil, job).ToAggregate() -} - -func (w *Webhook) ValidateUpdate(ctx context.Context, oldObj, newObj runtime.Object) (admission.Warnings, error) { - oldJob := oldObj.(*trainingoperator.TFJob) - newJob := newObj.(*trainingoperator.TFJob) - log := ctrl.LoggerFrom(ctx).WithName("tfjob-webhook") - log.V(5).Info("Validating update", "NewTFJob", klog.KObj(newJob)) - return nil, validateTFJob(oldJob, newJob).ToAggregate() -} - -func (w *Webhook) ValidateDelete(context.Context, runtime.Object) (admission.Warnings, error) { - return nil, nil -} - -func validateTFJob(oldJob, newJob *trainingoperator.TFJob) field.ErrorList { - var allErrs field.ErrorList - if errors := apimachineryvalidation.NameIsDNS1035Label(newJob.Name, false); len(errors) != 0 { - allErrs = append(allErrs, field.Invalid(field.NewPath("metadata").Child("name"), newJob.Name, fmt.Sprintf("should match: %v", strings.Join(errors, ",")))) - } - if oldJob != nil { - allErrs = append(allErrs, util.ValidateRunPolicyUpdate(&oldJob.Spec.RunPolicy, &newJob.Spec.RunPolicy)...) - } - allErrs = append(allErrs, util.ValidateRunPolicy(&newJob.Spec.RunPolicy)...) - allErrs = append(allErrs, validateSpec(newJob.Spec)...) - return allErrs -} - -func validateSpec(spec trainingoperator.TFJobSpec) field.ErrorList { - return validateTFReplicaSpecs(spec.TFReplicaSpecs) -} - -func validateTFReplicaSpecs(rSpecs map[trainingoperator.ReplicaType]*trainingoperator.ReplicaSpec) field.ErrorList { - var allErrs field.ErrorList - - if rSpecs == nil { - allErrs = append(allErrs, field.Required(tfReplicaSpecPath, "must be required")) - } - - chiefOrMaster := 0 - for rType, rSpec := range rSpecs { - rolePath := tfReplicaSpecPath.Key(string(rType)) - containerPath := rolePath.Child("template").Child("spec").Child("containers") - - if rSpec == nil || len(rSpec.Template.Spec.Containers) == 0 { - allErrs = append(allErrs, field.Required(containerPath, "must be specified")) - } - if trainingoperator.IsChiefOrMaster(rType) { - chiefOrMaster++ - } - // Make sure the image is defined in the container. - defaultContainerPresent := false - for idx, container := range rSpec.Template.Spec.Containers { - if container.Image == "" { - allErrs = append(allErrs, field.Required(containerPath.Index(idx).Child("image"), "must be required")) - } - if container.Name == trainingoperator.TFJobDefaultContainerName { - defaultContainerPresent = true - } - } - // Make sure there has at least one container named "tensorflow". - if !defaultContainerPresent { - allErrs = append(allErrs, field.Required(containerPath, fmt.Sprintf("must have at least one container with name %s", trainingoperator.TFJobDefaultContainerName))) - } - } - if chiefOrMaster > 1 { - allErrs = append(allErrs, field.Forbidden(tfReplicaSpecPath, "must not have more than 1 Chief or Master role")) - } - return allErrs -} diff --git a/pkg/webhooks/tensorflow/tfjob_webhook_test.go b/pkg/webhooks/tensorflow/tfjob_webhook_test.go deleted file mode 100644 index 759cc1b58b..0000000000 --- a/pkg/webhooks/tensorflow/tfjob_webhook_test.go +++ /dev/null @@ -1,214 +0,0 @@ -/* -Copyright 2024 The Kubeflow Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package tensorflow - -import ( - "testing" - - "github.com/google/go-cmp/cmp" - "github.com/google/go-cmp/cmp/cmpopts" - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/util/sets" - "k8s.io/apimachinery/pkg/util/validation/field" - "k8s.io/utils/ptr" - - trainingoperator "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" -) - -func TestValidateTFJob(t *testing.T) { - validTFReplicaSpecs := map[trainingoperator.ReplicaType]*trainingoperator.ReplicaSpec{ - trainingoperator.TFJobReplicaTypeWorker: { - Replicas: ptr.To[int32](2), - RestartPolicy: trainingoperator.RestartPolicyOnFailure, - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{{ - Name: "tensorflow", - Image: "kubeflow/tf-mnist-with-summaries:latest", - Command: []string{ - "python", - "/var/tf_mnist/mnist_with_summaries.py", - }, - }}, - }, - }, - }, - } - - testCases := map[string]struct { - tfJob *trainingoperator.TFJob - wantErr field.ErrorList - }{ - "valid tfJob": { - tfJob: &trainingoperator.TFJob{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test", - }, - Spec: trainingoperator.TFJobSpec{ - RunPolicy: trainingoperator.RunPolicy{ - ManagedBy: ptr.To(trainingoperator.KubeflowJobsController), - }, - TFReplicaSpecs: validTFReplicaSpecs, - }, - }, - }, - "TFJob name does not meet DNS1035": { - tfJob: &trainingoperator.TFJob{ - ObjectMeta: metav1.ObjectMeta{ - Name: "00test", - }, - Spec: trainingoperator.TFJobSpec{ - TFReplicaSpecs: validTFReplicaSpecs, - }, - }, - wantErr: field.ErrorList{ - field.Invalid(field.NewPath("metadata").Child("name"), "", ""), - }, - }, - "no containers": { - tfJob: &trainingoperator.TFJob{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test", - }, - Spec: trainingoperator.TFJobSpec{ - TFReplicaSpecs: map[trainingoperator.ReplicaType]*trainingoperator.ReplicaSpec{ - trainingoperator.TFJobReplicaTypeWorker: { - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{}, - }, - }, - }, - }, - }, - }, - wantErr: field.ErrorList{ - field.Required(tfReplicaSpecPath.Key(string(trainingoperator.TFJobReplicaTypeWorker)).Child("template").Child("spec").Child("containers"), ""), - field.Required(tfReplicaSpecPath.Key(string(trainingoperator.TFJobReplicaTypeWorker)).Child("template").Child("spec").Child("containers"), ""), - }, - }, - "empty image": { - tfJob: &trainingoperator.TFJob{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test", - }, - Spec: trainingoperator.TFJobSpec{ - TFReplicaSpecs: map[trainingoperator.ReplicaType]*trainingoperator.ReplicaSpec{ - trainingoperator.TFJobReplicaTypeWorker: { - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{{ - Name: "tensorflow", - Image: "", - }}, - }, - }, - }, - }, - }, - }, - wantErr: field.ErrorList{ - field.Required(tfReplicaSpecPath.Key(string(trainingoperator.TFJobReplicaTypeWorker)).Child("template").Child("spec").Child("containers").Index(0).Child("image"), ""), - }, - }, - "tfJob default container name doesn't present": { - tfJob: &trainingoperator.TFJob{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test", - }, - Spec: trainingoperator.TFJobSpec{ - TFReplicaSpecs: map[trainingoperator.ReplicaType]*trainingoperator.ReplicaSpec{ - trainingoperator.TFJobReplicaTypeWorker: { - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{{ - Name: "", - Image: "kubeflow/tf-dist-mnist-test:1.0", - }}, - }, - }, - }, - }, - }, - }, - wantErr: field.ErrorList{ - field.Required(tfReplicaSpecPath.Key(string(trainingoperator.TFJobReplicaTypeWorker)).Child("template").Child("spec").Child("containers"), ""), - }, - }, - "there are more than 2 masterReplica's or ChiefReplica's": { - tfJob: &trainingoperator.TFJob{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test", - }, - Spec: trainingoperator.TFJobSpec{ - TFReplicaSpecs: map[trainingoperator.ReplicaType]*trainingoperator.ReplicaSpec{ - trainingoperator.TFJobReplicaTypeChief: { - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{{ - Name: "tensorflow", - Image: "kubeflow/tf-dist-mnist-test:1.0", - }}, - }, - }, - }, - trainingoperator.TFJobReplicaTypeMaster: { - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{{ - Name: "tensorflow", - Image: "kubeflow/tf-dist-mnist-test:1.0", - }}, - }, - }, - }, - }, - }, - }, - wantErr: field.ErrorList{ - field.Forbidden(tfReplicaSpecPath, ""), - }, - }, - "attempt to set unsupported managedBy controller name gets rejected": { - tfJob: &trainingoperator.TFJob{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test", - }, - Spec: trainingoperator.TFJobSpec{ - RunPolicy: trainingoperator.RunPolicy{ - ManagedBy: ptr.To("other-job-controller"), - }, - TFReplicaSpecs: validTFReplicaSpecs, - }, - }, - wantErr: field.ErrorList{ - field.NotSupported(field.NewPath("spec", "runPolicy", "managedBy"), "", sets.List(sets.New( - trainingoperator.MultiKueueController, - trainingoperator.KubeflowJobsController))), - }, - }, - } - for name, tc := range testCases { - t.Run(name, func(t *testing.T) { - got := validateTFJob(nil, tc.tfJob) - if diff := cmp.Diff(tc.wantErr, got, cmpopts.IgnoreFields(field.Error{}, "Detail", "BadValue")); len(diff) != 0 { - t.Errorf("Unexpected error (-want,+got):\n%s", diff) - } - }) - } -} diff --git a/pkg/webhooks/webhooks.go b/pkg/webhooks/webhooks.go deleted file mode 100644 index d1dd2b2f8e..0000000000 --- a/pkg/webhooks/webhooks.go +++ /dev/null @@ -1,45 +0,0 @@ -/* -Copyright 2024 The Kubeflow Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package webhooks - -import ( - "sigs.k8s.io/controller-runtime/pkg/manager" - - trainingoperator "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - "github.com/kubeflow/training-operator/pkg/webhooks/jax" - "github.com/kubeflow/training-operator/pkg/webhooks/paddlepaddle" - "github.com/kubeflow/training-operator/pkg/webhooks/pytorch" - "github.com/kubeflow/training-operator/pkg/webhooks/tensorflow" - "github.com/kubeflow/training-operator/pkg/webhooks/xgboost" -) - -type WebhookSetupFunc func(manager manager.Manager) error - -var ( - SupportedSchemeWebhook = map[string]WebhookSetupFunc{ - trainingoperator.PyTorchJobKind: pytorch.SetupWebhook, - trainingoperator.TFJobKind: tensorflow.SetupWebhook, - trainingoperator.XGBoostJobKind: xgboost.SetupWebhook, - trainingoperator.MPIJobKind: scaffold, - trainingoperator.PaddleJobKind: paddlepaddle.SetupWebhook, - trainingoperator.JAXJobKind: jax.SetupWebhook, - } -) - -func scaffold(manager.Manager) error { - return nil -} diff --git a/pkg/webhooks/xgboost/xgboostjob_webhook.go b/pkg/webhooks/xgboost/xgboostjob_webhook.go deleted file mode 100644 index 5372317487..0000000000 --- a/pkg/webhooks/xgboost/xgboostjob_webhook.go +++ /dev/null @@ -1,140 +0,0 @@ -/* -Copyright 2024 The Kubeflow Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package xgboost - -import ( - "context" - "fmt" - "slices" - "strings" - - apimachineryvalidation "k8s.io/apimachinery/pkg/api/validation" - "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/util/validation/field" - "k8s.io/klog/v2" - ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/webhook" - "sigs.k8s.io/controller-runtime/pkg/webhook/admission" - - trainingoperator "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - "github.com/kubeflow/training-operator/pkg/common/util" -) - -var ( - specPath = field.NewPath("spec") - xgbReplicaSpecPath = specPath.Child("xgbReplicaSpecs") -) - -type Webhook struct{} - -func SetupWebhook(mgr ctrl.Manager) error { - return ctrl.NewWebhookManagedBy(mgr). - For(&trainingoperator.XGBoostJob{}). - WithValidator(&Webhook{}). - Complete() -} - -// +kubebuilder:webhook:path=/validate-kubeflow-org-v1-xgboostjob,mutating=false,failurePolicy=fail,sideEffects=None,groups=kubeflow.org,resources=xgboostjobs,verbs=create;update,versions=v1,name=validator.xgboostjob.training-operator.kubeflow.org,admissionReviewVersions=v1 - -var _ webhook.CustomValidator = &Webhook{} - -func (w *Webhook) ValidateCreate(ctx context.Context, obj runtime.Object) (admission.Warnings, error) { - job := obj.(*trainingoperator.XGBoostJob) - log := ctrl.LoggerFrom(ctx).WithName("xgboostjob-webhook") - log.V(5).Info("Validating create", "xgboostJob", klog.KObj(job)) - return nil, validateXGBoostJob(nil, job).ToAggregate() -} - -func (w *Webhook) ValidateUpdate(ctx context.Context, oldObj, newObj runtime.Object) (admission.Warnings, error) { - oldJob := oldObj.(*trainingoperator.XGBoostJob) - newJob := newObj.(*trainingoperator.XGBoostJob) - log := ctrl.LoggerFrom(ctx).WithName("xgboostjob-webhook") - log.V(5).Info("Validating create", "xgboostJob", klog.KObj(newJob)) - return nil, validateXGBoostJob(oldJob, newJob).ToAggregate() -} - -func (w *Webhook) ValidateDelete(context.Context, runtime.Object) (admission.Warnings, error) { - return nil, nil -} - -func validateXGBoostJob(oldJob, newJob *trainingoperator.XGBoostJob) field.ErrorList { - var allErrs field.ErrorList - if errors := apimachineryvalidation.NameIsDNS1035Label(newJob.Name, false); len(errors) != 0 { - allErrs = append(allErrs, field.Invalid(field.NewPath("metadata").Child("name"), newJob.Name, fmt.Sprintf("should match: %v", strings.Join(errors, ",")))) - } - if oldJob != nil { - allErrs = append(allErrs, util.ValidateRunPolicyUpdate(&oldJob.Spec.RunPolicy, &newJob.Spec.RunPolicy)...) - } - allErrs = append(allErrs, util.ValidateRunPolicy(&newJob.Spec.RunPolicy)...) - allErrs = append(allErrs, validateSpec(newJob.Spec)...) - return allErrs -} - -func validateSpec(spec trainingoperator.XGBoostJobSpec) field.ErrorList { - return validateXGBReplicaSpecs(spec.XGBReplicaSpecs) -} - -func validateXGBReplicaSpecs(rSpecs map[trainingoperator.ReplicaType]*trainingoperator.ReplicaSpec) field.ErrorList { - var allErrs field.ErrorList - - if rSpecs == nil { - allErrs = append(allErrs, field.Required(xgbReplicaSpecPath, "must be required")) - } - masterExists := false - for rType, rSpec := range rSpecs { - rolePath := xgbReplicaSpecPath.Key(string(rType)) - containersPath := rolePath.Child("template").Child("spec").Child("containers") - - // Make sure the replica type is valid. - validReplicaTypes := []trainingoperator.ReplicaType{ - trainingoperator.XGBoostJobReplicaTypeMaster, - trainingoperator.XGBoostJobReplicaTypeWorker, - } - if !slices.Contains(validReplicaTypes, rType) { - allErrs = append(allErrs, field.NotSupported(rolePath, rType, validReplicaTypes)) - } - - if rSpec == nil || len(rSpec.Template.Spec.Containers) == 0 { - allErrs = append(allErrs, field.Required(containersPath, "must be specified")) - } - - // Make sure the image is defined in the container - defaultContainerPresent := false - for idx, container := range rSpec.Template.Spec.Containers { - if container.Image == "" { - allErrs = append(allErrs, field.Required(containersPath.Index(idx).Child("image"), "must be required")) - } - if container.Name == trainingoperator.XGBoostJobDefaultContainerName { - defaultContainerPresent = true - } - } - // Make sure there has at least one container named "xgboost" - if !defaultContainerPresent { - allErrs = append(allErrs, field.Required(containersPath, fmt.Sprintf("must have at least one container with name %s", trainingoperator.XGBoostJobDefaultContainerName))) - } - if rType == trainingoperator.XGBoostJobReplicaTypeMaster { - masterExists = true - if rSpec.Replicas == nil || int(*rSpec.Replicas) != 1 { - allErrs = append(allErrs, field.Forbidden(rolePath.Child("replicas"), "must be 1")) - } - } - } - if !masterExists { - allErrs = append(allErrs, field.Required(xgbReplicaSpecPath.Key(string(trainingoperator.XGBoostJobReplicaTypeMaster)), "must be present")) - } - return allErrs -} diff --git a/pkg/webhooks/xgboost/xgboostjob_webhook_test.go b/pkg/webhooks/xgboost/xgboostjob_webhook_test.go deleted file mode 100644 index 3c1d410598..0000000000 --- a/pkg/webhooks/xgboost/xgboostjob_webhook_test.go +++ /dev/null @@ -1,265 +0,0 @@ -/* -Copyright 2024 The Kubeflow Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package xgboost - -import ( - "testing" - - "github.com/google/go-cmp/cmp" - "github.com/google/go-cmp/cmp/cmpopts" - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/util/sets" - "k8s.io/apimachinery/pkg/util/validation/field" - "k8s.io/utils/ptr" - - trainingoperator "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" -) - -func TestValidateXGBoostJob(t *testing.T) { - validXGBoostReplicaSpecs := map[trainingoperator.ReplicaType]*trainingoperator.ReplicaSpec{ - trainingoperator.XGBoostJobReplicaTypeMaster: { - Replicas: ptr.To[int32](1), - RestartPolicy: trainingoperator.RestartPolicyNever, - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{{ - Name: "xgboost", - Image: "docker.io/kubeflow/xgboost-dist-iris:latest", - Ports: []corev1.ContainerPort{{ - Name: "xgboostjob-port", - ContainerPort: 9991, - }}, - ImagePullPolicy: corev1.PullAlways, - Args: []string{ - "--job_type=Train", - "--xgboost_parameter=objective:multi:softprob,num_class:3", - "--n_estimators=10", - "--learning_rate=0.1", - "--model_path=/tmp/xgboost-model", - "--model_storage_type=local", - }, - }}, - }, - }, - }, - trainingoperator.XGBoostJobReplicaTypeWorker: { - Replicas: ptr.To[int32](2), - RestartPolicy: trainingoperator.RestartPolicyExitCode, - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{{ - Name: "xgboost", - Image: "docker.io/kubeflow/xgboost-dist-iris:latest", - Ports: []corev1.ContainerPort{{ - Name: "xgboostjob-port", - ContainerPort: 9991, - }}, - ImagePullPolicy: corev1.PullAlways, - Args: []string{ - "--job_type=Train", - "--xgboost_parameter=objective:multi:softprob,num_class:3", - "--n_estimators=10", - "--learning_rate=0.1", - }, - }}, - }, - }, - }, - } - - testCases := map[string]struct { - xgboostJob *trainingoperator.XGBoostJob - wantErr field.ErrorList - }{ - "valid XGBoostJob": { - xgboostJob: &trainingoperator.XGBoostJob{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test", - }, - Spec: trainingoperator.XGBoostJobSpec{ - RunPolicy: trainingoperator.RunPolicy{ - ManagedBy: ptr.To(trainingoperator.KubeflowJobsController), - }, - XGBReplicaSpecs: validXGBoostReplicaSpecs, - }, - }, - }, - "XGBoostJob name does not meet DNS1035": { - xgboostJob: &trainingoperator.XGBoostJob{ - ObjectMeta: metav1.ObjectMeta{ - Name: "-test", - }, - Spec: trainingoperator.XGBoostJobSpec{ - XGBReplicaSpecs: validXGBoostReplicaSpecs, - }, - }, - wantErr: field.ErrorList{ - field.Invalid(field.NewPath("metadata").Child("name"), "", ""), - }, - }, - "empty containers": { - xgboostJob: &trainingoperator.XGBoostJob{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test", - }, - Spec: trainingoperator.XGBoostJobSpec{ - XGBReplicaSpecs: map[trainingoperator.ReplicaType]*trainingoperator.ReplicaSpec{ - trainingoperator.XGBoostJobReplicaTypeMaster: { - Replicas: ptr.To[int32](1), - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{}, - }, - }, - }, - }, - }, - }, - wantErr: field.ErrorList{ - field.Required(xgbReplicaSpecPath.Key(string(trainingoperator.XGBoostJobReplicaTypeMaster)).Child("template").Child("spec").Child("containers"), ""), - field.Required(xgbReplicaSpecPath.Key(string(trainingoperator.XGBoostJobReplicaTypeMaster)).Child("template").Child("spec").Child("containers"), ""), - }, - }, - "image is empty": { - xgboostJob: &trainingoperator.XGBoostJob{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test", - }, - Spec: trainingoperator.XGBoostJobSpec{ - XGBReplicaSpecs: map[trainingoperator.ReplicaType]*trainingoperator.ReplicaSpec{ - trainingoperator.XGBoostJobReplicaTypeMaster: { - Replicas: ptr.To[int32](1), - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{{ - Name: "xgboost", - Image: "", - }}, - }, - }, - }, - }, - }, - }, - wantErr: field.ErrorList{ - field.Required(xgbReplicaSpecPath.Key(string(trainingoperator.XGBoostJobReplicaTypeMaster)).Child("template").Child("spec").Child("containers").Index(0).Child("image"), ""), - }, - }, - "xgboostJob default container name doesn't present": { - xgboostJob: &trainingoperator.XGBoostJob{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test", - }, - Spec: trainingoperator.XGBoostJobSpec{ - XGBReplicaSpecs: map[trainingoperator.ReplicaType]*trainingoperator.ReplicaSpec{ - trainingoperator.XGBoostJobReplicaTypeMaster: { - Replicas: ptr.To[int32](1), - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{{ - Name: "", - Image: "gcr.io/kubeflow-ci/xgboost-dist-mnist_test:1.0", - }}, - }, - }, - }, - }, - }, - }, - wantErr: field.ErrorList{ - field.Required(xgbReplicaSpecPath.Key(string(trainingoperator.XGBoostJobReplicaTypeMaster)).Child("template").Child("spec").Child("containers"), ""), - }, - }, - "the number of replicas in masterReplica is other than 1": { - xgboostJob: &trainingoperator.XGBoostJob{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test", - }, - Spec: trainingoperator.XGBoostJobSpec{ - XGBReplicaSpecs: map[trainingoperator.ReplicaType]*trainingoperator.ReplicaSpec{ - trainingoperator.XGBoostJobReplicaTypeMaster: { - Replicas: ptr.To[int32](2), - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{{ - Name: "xgboost", - Image: "gcr.io/kubeflow-ci/xgboost-dist-mnist_test:1.0", - }}, - }, - }, - }, - }, - }, - }, - wantErr: field.ErrorList{ - field.Forbidden(xgbReplicaSpecPath.Key(string(trainingoperator.XGBoostJobReplicaTypeMaster)).Child("replicas"), ""), - }, - }, - "masterReplica does not exist": { - xgboostJob: &trainingoperator.XGBoostJob{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test", - }, - Spec: trainingoperator.XGBoostJobSpec{ - XGBReplicaSpecs: map[trainingoperator.ReplicaType]*trainingoperator.ReplicaSpec{ - trainingoperator.XGBoostJobReplicaTypeWorker: { - Replicas: ptr.To[int32](1), - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{{ - Name: "xgboost", - Image: "gcr.io/kubeflow-ci/xgboost-dist-mnist_test:1.0", - }}, - }, - }, - }, - }, - }, - }, - wantErr: field.ErrorList{ - field.Required(xgbReplicaSpecPath.Key(string(trainingoperator.XGBoostJobReplicaTypeMaster)), ""), - }, - }, - "attempt to set unsupported managedBy controller name gets rejected": { - xgboostJob: &trainingoperator.XGBoostJob{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test", - }, - Spec: trainingoperator.XGBoostJobSpec{ - RunPolicy: trainingoperator.RunPolicy{ - ManagedBy: ptr.To("other-job-controller"), - }, - XGBReplicaSpecs: validXGBoostReplicaSpecs, - }, - }, - wantErr: field.ErrorList{ - field.NotSupported(field.NewPath("spec", "runPolicy", "managedBy"), "", sets.List(sets.New( - trainingoperator.MultiKueueController, - trainingoperator.KubeflowJobsController))), - }, - }, - } - for name, tc := range testCases { - t.Run(name, func(t *testing.T) { - got := validateXGBoostJob(nil, tc.xgboostJob) - if diff := cmp.Diff(tc.wantErr, got, cmpopts.IgnoreFields(field.Error{}, "Detail", "BadValue")); len(diff) != 0 { - t.Errorf("Unexpected errors (-want,+got):\n%s", diff) - } - }) - } -} diff --git a/prow_config.yaml b/prow_config.yaml deleted file mode 100644 index 903013cdbd..0000000000 --- a/prow_config.yaml +++ /dev/null @@ -1,48 +0,0 @@ -# This file configures the workflows to trigger in our Prow jobs. -# see https://github.com/kubeflow/testing/blob/master/py/kubeflow/testing/run_e2e_workflow.py -# -# The testWorkerImage should be the same for all workflows. -workflows: [] -# - app_dir: kubeflow/training-operator/test/workflows -# component: workflows -# name: v1 -# job_types: -# - presubmit -# include_dirs: -# - build/* -# - cmd/* -# - examples/* -# - hack/* -# - manifests/* -# - pkg/* -# - py/* -# - scripts/* -# - sdk/* -# - test/* -# - prow_config.yaml -# - go.* -# params: -# registry: "809251082950.dkr.ecr.us-west-2.amazonaws.com/training-operator" -# tfJobVersion: v1 -# testWorkerImage: public.ecr.aws/j1r0q0g6/kubeflow-testing:latest -# - app_dir: kubeflow/training-operator/test/workflows -# component: workflows -# name: v1 -# job_types: -# - postsubmit -# include_dirs: -# - build/* -# - cmd/* -# - examples/* -# - hack/* -# - manifests/* -# - pkg/* -# - py/* -# - scripts/* -# - sdk/* -# - test/* -# - prow_config.yaml -# params: -# registry: "public.ecr.aws/j1r0q0g6/training/training-operator" -# tfJobVersion: v1 -# testWorkerImage: public.ecr.aws/j1r0q0g6/kubeflow-testing:latest diff --git a/scripts/copy-to-gopath.sh b/scripts/copy-to-gopath.sh deleted file mode 100755 index 80bcf5494d..0000000000 --- a/scripts/copy-to-gopath.sh +++ /dev/null @@ -1,31 +0,0 @@ -#!/bin/bash - -# Copyright 2018 The Kubeflow Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# This shell script is used to build an image from our argo workflow - -set -o errexit -set -o nounset -set -o pipefail - -export PATH=${GOPATH}/bin:/usr/local/go/bin:${PATH} -GO_DIR=${GOPATH}/src/github.com/kubeflow/${REPO_NAME} - -# e2e test will run in go_dir. this is a required step. -echo "Create symlink to GOPATH" -# TODO(@Jeffwan): it should be ${REPO_OWNER}. Change it back later. -mkdir -p ${GOPATH}/src/github.com/kubeflow -ln -s ${PWD} ${GO_DIR} -cd ${GO_DIR} diff --git a/scripts/gha/build-image.sh b/scripts/gha/build-image.sh deleted file mode 100755 index cb4f0fc832..0000000000 --- a/scripts/gha/build-image.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/bin/bash - -# Copyright 2022 The Kubeflow Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# The script is used to build Kubeflow Training image. - - -set -o errexit -set -o nounset -set -o pipefail - -docker build . -t ${TRAINING_CI_IMAGE} -f build/images/training-operator/Dockerfile diff --git a/scripts/gha/build-jax-mnist-image.sh b/scripts/gha/build-jax-mnist-image.sh deleted file mode 100755 index b9a30fa18f..0000000000 --- a/scripts/gha/build-jax-mnist-image.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/bin/bash - -# Copyright 2025 The Kubeflow Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# The script is used to build images needed to run JAX Job E2E test. - - -set -o errexit -set -o nounset -set -o pipefail - -# Build Image for MNIST example with SPMD for JAX -docker build examples/jax/jax-dist-spmd-mnist -t ${JAX_JOB_CI_IMAGE} -f examples/jax/jax-dist-spmd-mnist/Dockerfile diff --git a/scripts/gha/build-storage-initializer.sh b/scripts/gha/build-storage-initializer.sh deleted file mode 100755 index 261e140a60..0000000000 --- a/scripts/gha/build-storage-initializer.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/bin/bash - -# Copyright 2024 The Kubeflow Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# The script is used to build Kubeflow Training image. - - -set -o errexit -set -o nounset -set -o pipefail - -docker build sdk/python/kubeflow/storage_initializer -t ${STORAGE_INITIALIZER_CI_IMAGE} -f sdk/python/kubeflow/storage_initializer/Dockerfile diff --git a/scripts/gha/build-trainer.sh b/scripts/gha/build-trainer.sh deleted file mode 100755 index bf7f4d3b94..0000000000 --- a/scripts/gha/build-trainer.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/bin/bash - -# Copyright 2024 The Kubeflow Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# The script is used to build Kubeflow Training image. - - -set -o errexit -set -o nounset -set -o pipefail - -docker build sdk/python/kubeflow/trainer -t ${TRAINER_CI_IMAGE} -f sdk/python/kubeflow/trainer/Dockerfile.cpu diff --git a/scripts/setup-tf-operator.sh b/scripts/setup-tf-operator.sh deleted file mode 100755 index 8b07d59c92..0000000000 --- a/scripts/setup-tf-operator.sh +++ /dev/null @@ -1,46 +0,0 @@ -#!/bin/bash - -# Copyright 2018 The Kubernetes Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# This shell script is used to build a cluster and create a namespace from our -# argo workflow - -set -o errexit -set -o nounset -set -o pipefail - -CLUSTER_NAME="${CLUSTER_NAME}" -REGION="${AWS_REGION:-us-west-2}" -REGISTRY="${ECR_REGISTRY:-public.ecr.aws/j1r0q0g6/training/training-operator}" -VERSION="${PULL_BASE_SHA}" -GO_DIR=${GOPATH}/src/github.com/${REPO_OWNER}/${REPO_NAME} - -echo "Configuring kubeconfig.." -aws eks update-kubeconfig --region=${REGION} --name=${CLUSTER_NAME} - -echo "Update Training Operator manifest with new name $REGISTRY and tag $VERSION" -cd manifests/overlays/standalone -kustomize edit set image public.ecr.aws/j1r0q0g6/training/training-operator=${REGISTRY}:${VERSION} - -echo "Installing Training Operator manifests" -kustomize build . | kubectl apply -f - - -TIMEOUT=30 -until kubectl get pods -n kubeflow | grep tf-job-operator | grep 1/1 || [[ $TIMEOUT -eq 1 ]]; do - sleep 10 - TIMEOUT=$((TIMEOUT - 1)) -done -kubectl describe all -n kubeflow -kubectl describe pods -n kubeflow diff --git a/scripts/setup-training-operator.sh b/scripts/setup-training-operator.sh deleted file mode 100755 index a1cda80e68..0000000000 --- a/scripts/setup-training-operator.sh +++ /dev/null @@ -1,48 +0,0 @@ -#!/bin/bash - -# Copyright 2021 The Kubernetes Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# This shell script is used to build a cluster and create a namespace from our -# argo workflow - - -set -o errexit -set -o nounset -set -o pipefail - -CLUSTER_NAME="${CLUSTER_NAME}" -REGION="${AWS_REGION:-us-west-2}" -REGISTRY="${ECR_REGISTRY:-public.ecr.aws/j1r0q0g6/training/training-operator}" -VERSION="${PULL_BASE_SHA}" -GO_DIR=${GOPATH}/src/github.com/${REPO_OWNER}/${REPO_NAME} - -echo "Configuring kubeconfig.." -aws eks update-kubeconfig --region=${REGION} --name=${CLUSTER_NAME} - -echo "Update training operator manifest with new name $REGISTRY and tag $VERSION" -cd manifests/overlays/standalone -#kustomize edit set image public.ecr.aws/j1r0q0g6/training/training-operator=${REGISTRY}:${VERSION} -kustomize edit set image kubeflow/training-operator=${REGISTRY}:${VERSION} - -echo "Installing training operator manifests" -kustomize build . | kubectl apply --server-side -f - - -TIMEOUT=30 -until kubectl get pods -n kubeflow | grep training-operator | grep 1/1 || [[ $TIMEOUT -eq 1 ]]; do - sleep 10 - TIMEOUT=$(( TIMEOUT - 1 )) -done -kubectl describe all -n kubeflow -kubectl describe pods -n kubeflow diff --git a/sdk/python/.gitignore b/sdk/python/.gitignore deleted file mode 100644 index 132d1c359c..0000000000 --- a/sdk/python/.gitignore +++ /dev/null @@ -1,72 +0,0 @@ -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -env/ -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -*.egg-info/ -.installed.cfg -*.egg - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*,cover -.hypothesis/ -venv/ -.python-version - -# Translations -*.mo -*.pot - -# Django stuff: -*.log - -# Sphinx documentation -docs/_build/ - -# PyBuilder -target/ - -#Ipython Notebook -.ipynb_checkpoints - -# Add more useless files -tox.ini -test-requirements.txt -git_push.sh -.travis.yml -.swagger-codegen -.swagger-codegen-ignore diff --git a/sdk/python/.openapi-generator-ignore b/sdk/python/.openapi-generator-ignore deleted file mode 100644 index c0ae241a85..0000000000 --- a/sdk/python/.openapi-generator-ignore +++ /dev/null @@ -1,33 +0,0 @@ -# OpenAPI Generator Ignore -# Generated by openapi-generator https://github.com/openapitools/openapi-generator - -# Use this file to prevent files from being overwritten by the generator. -# The patterns follow closely to .gitignore or .dockerignore. - -# As an example, the C# client generator defines ApiClient.cs. -# You can make changes and tell OpenAPI Generator to ignore just this file by uncommenting the following line: -#ApiClient.cs - -# You can match any string of characters against a directory, file or extension with a single asterisk (*): -#foo/*/qux -# The above matches foo/bar/qux and foo/baz/qux, but not foo/bar/baz/qux - -# You can recursively match patterns against a directory, file or extension with a double asterisk (**): -#foo/**/qux -# This matches foo/bar/qux, foo/baz/qux, and foo/bar/baz/qux - -# You can also negate patterns with an exclamation (!). -# For example, you can ignore all files in a docs folder with the file extension .md: -#docs/*.md -# Then explicitly reverse the ignore rule for a single file: -#!docs/README.md - -.gitignore -OWNERS -README.md -requirements.txt -setup.py -.gitlab-ci.yml -setup.cfg -git_push.sh -.travis.yml diff --git a/sdk/python/.openapi-generator/VERSION b/sdk/python/.openapi-generator/VERSION deleted file mode 100644 index ecedc98d1d..0000000000 --- a/sdk/python/.openapi-generator/VERSION +++ /dev/null @@ -1 +0,0 @@ -4.3.1 \ No newline at end of file diff --git a/sdk/python/Dockerfile.conformance b/sdk/python/Dockerfile.conformance deleted file mode 100644 index 2af8a23fbe..0000000000 --- a/sdk/python/Dockerfile.conformance +++ /dev/null @@ -1,30 +0,0 @@ -# Copyright 2023 The Kubeflow Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Dockerfile for building the source code of conformance tests -FROM python:3.10-alpine - -WORKDIR /kubeflow/training-operator/sdk/python - -COPY sdk/python . - -COPY sdk/python/conformance/run.sh . - -# Add test script. -RUN chmod +x run.sh - -RUN pip install pytest -RUN python -m pip install -e . - -ENTRYPOINT [ "./run.sh" ] diff --git a/sdk/python/OWNERS b/sdk/python/OWNERS deleted file mode 100644 index c564ae86b1..0000000000 --- a/sdk/python/OWNERS +++ /dev/null @@ -1,2 +0,0 @@ -approvers: - - jinchihe diff --git a/sdk/python/README.md b/sdk/python/README.md deleted file mode 100644 index c3ec6684b2..0000000000 --- a/sdk/python/README.md +++ /dev/null @@ -1,95 +0,0 @@ -# Kubeflow Training SDK - -Python SDK for Training Operator - -## Requirements. - -Python >= 3.8 - -Training Python SDK follows [Python release cycle](https://devguide.python.org/versions/#python-release-cycle) -for supported Python versions. - -## Installation & Usage - -### pip install - -```sh -pip install kubeflow-training -``` - -Then import the package: - -```python -from kubeflow import training -``` - -### Setuptools - -Install via [Setuptools](http://pypi.python.org/pypi/setuptools). - -```sh -python setup.py install --user -``` - -(or `sudo python setup.py install` to install the package for all users) - -## Getting Started - -Please follow the [Getting Started guide](https://www.kubeflow.org/docs/components/training/overview/#getting-started) -or check Training Operator [examples](../../examples). - -## Documentation for API Endpoints - -TODO(andreyvelich): These docs are outdated. Please track this issue for the status: -https://github.com/kubeflow/katib/issues/2081 - -Class | Method | Description ------------- | ------------- | ------------- -[TFJobClient](docs/TFJobClient.md) | [create](docs/TFJobClient.md#create) | Create TFJob| -[TFJobClient](docs/TFJobClient.md) | [get](docs/TFJobClient.md#get) | Get or watch the specified TFJob or all TFJob in the namespace | -[TFJobClient](docs/TFJobClient.md) | [patch](docs/TFJobClient.md#patch) | Patch the specified TFJob| -[TFJobClient](docs/TFJobClient.md) | [delete](docs/TFJobClient.md#delete) | Delete the specified TFJob | -[TFJobClient](docs/TFJobClient.md) | [wait_for_job](docs/TFJobClient.md#wait_for_job) | Wait for the specified job to finish | -[TFJobClient](docs/TFJobClient.md) | [wait_for_condition](docs/TFJobClient.md#wait_for_condition) | Waits until any of the specified conditions occur | -[TFJobClient](docs/TFJobClient.md) | [get_job_status](docs/TFJobClient.md#get_job_status) | Get the TFJob status| -[TFJobClient](docs/TFJobClient.md) | [is_job_running](docs/TFJobClient.md#is_job_running) | Check if the TFJob status is Running | -[TFJobClient](docs/TFJobClient.md) | [is_job_succeeded](docs/TFJobClient.md#is_job_succeeded) | Check if the TFJob status is Succeeded | -[TFJobClient](docs/TFJobClient.md) | [get_pod_names](docs/TFJobClient.md#get_pod_names) | Get pod names of TFJob | -[TFJobClient](docs/TFJobClient.md) | [get_logs](docs/TFJobClient.md#get_logs) | Get training logs of the TFJob | -[PyTorchJobClient](docs/PyTorchJobClient.md) | [create](docs/PyTorchJobClient.md#create) | Create PyTorchJob| -[PyTorchJobClient](docs/PyTorchJobClient.md) | [get](docs/PyTorchJobClient.md#get) | Get the specified PyTorchJob or all PyTorchJob in the namespace | -[PyTorchJobClient](docs/PyTorchJobClient.md) | [patch](docs/PyTorchJobClient.md#patch) | Patch the specified PyTorchJob| -[PyTorchJobClient](docs/PyTorchJobClient.md) | [delete](docs/PyTorchJobClient.md#delete) | Delete the specified PyTorchJob | -[PyTorchJobClient](docs/PyTorchJobClient.md) | [wait_for_job](docs/PyTorchJobClient.md#wait_for_job) | Wait for the specified job to finish | -[PyTorchJobClient](docs/PyTorchJobClient.md) | [wait_for_condition](docs/PyTorchJobClient.md#wait_for_condition) | Waits until any of the specified conditions occur | -[PyTorchJobClient](docs/PyTorchJobClient.md) | [get_job_status](docs/PyTorchJobClient.md#get_job_status) | Get the PyTorchJob status| -[PyTorchJobClient](docs/PyTorchJobClient.md) | [is_job_running](docs/PyTorchJobClient.md#is_job_running) | Check if the PyTorchJob running | -[PyTorchJobClient](docs/PyTorchJobClient.md) | [is_job_succeeded](docs/PyTorchJobClient.md#is_job_succeeded) | Check if the PyTorchJob Succeeded | -[PyTorchJobClient](docs/PyTorchJobClient.md) | [get_pod_names](docs/PyTorchJobClient.md#get_pod_names) | Get pod names of PyTorchJob | -[PyTorchJobClient](docs/PyTorchJobClient.md)| [get_logs](docs/PyTorchJobClient.md#get_logs) | Get training logs of the PyTorchJob | - -## Documentation For Models - -- [V1JobCondition](docs/V1JobCondition.md) -- [V1JobStatus](docs/V1JobStatus.md) -- [V1PyTorchJob](docs/KubeflowOrgV1PyTorchJob.md) -- [V1PyTorchJobList](docs/KubeflowOrgV1PyTorchJobList.md) -- [V1PyTorchJobSpec](docs/KubeflowOrgV1PyTorchJobSpec.md) -- [V1ReplicaSpec](docs/V1ReplicaSpec.md) -- [V1ReplicaStatus](docs/V1ReplicaStatus.md) -- [V1RunPolicy](docs/V1RunPolicy.md) -- [V1SchedulingPolicy](docs/V1SchedulingPolicy.md) -- [V1TFJob](docs/KubeflowOrgV1TFJob.md) -- [V1TFJobList](docs/KubeflowOrgV1TFJobList.md) -- [V1TFJobSpec](docs/KubeflowOrgV1TFJobSpec.md) -- [V1XGBoostJob](docs/KubeflowOrgV1XGBoostJob.md) -- [V1XGBoostJobList](docs/KubeflowOrgV1XGBoostJobList.md) -- [V1XGBoostJobSpec](docs/KubeflowOrgV1XGBoostJobSpec.md) - -## Building conformance tests - -Run - -``` -docker build . -f Dockerfile.conformance -t -``` diff --git a/sdk/python/conformance/run.sh b/sdk/python/conformance/run.sh deleted file mode 100644 index a73d998a9b..0000000000 --- a/sdk/python/conformance/run.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/sh - -# Run conformance test and generate test report. -pytest test/e2e/ --namespace=kf-conformance -v | tee /tmp/training-operator-conformance.log - -# Create the done file. -touch /tmp/training-operator-conformance.done -echo "Done..." - -# Keep the container running so the test logs can be downloaded. -while true; do sleep 10000; done diff --git a/sdk/python/docs/KubeflowOrgV1ElasticPolicy.md b/sdk/python/docs/KubeflowOrgV1ElasticPolicy.md deleted file mode 100644 index c39927a013..0000000000 --- a/sdk/python/docs/KubeflowOrgV1ElasticPolicy.md +++ /dev/null @@ -1,20 +0,0 @@ -# KubeflowOrgV1ElasticPolicy - -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**max_replicas** | **int** | upper limit for the number of pods that can be set by the autoscaler; cannot be smaller than MinReplicas, defaults to null. | [optional] -**max_restarts** | **int** | | [optional] -**metrics** | [**list[K8sIoApiAutoscalingV2MetricSpec]**](K8sIoApiAutoscalingV2MetricSpec.md) | Metrics contains the specifications which are used to calculate the desired replica count (the maximum replica count across all metrics will be used). The desired replica count is calculated with multiplying the ratio between the target value and the current value by the current number of pods. Ergo, metrics used must decrease as the pod count is increased, and vice-versa. See the individual metric source types for more information about how each type of metric must respond. If not set, the HPA will not be created. | [optional] -**min_replicas** | **int** | minReplicas is the lower limit for the number of replicas to which the training job can scale down. It defaults to null. | [optional] -**n_proc_per_node** | **int** | Number of workers per node; supported values: [auto, cpu, gpu, int]. Deprecated: This API is deprecated in v1.7+ Use .spec.nprocPerNode instead. | [optional] -**rdzv_backend** | **str** | | [optional] -**rdzv_conf** | [**list[KubeflowOrgV1RDZVConf]**](KubeflowOrgV1RDZVConf.md) | RDZVConf contains additional rendezvous configuration (<key1>=<value1>,<key2>=<value2>,...). | [optional] -**rdzv_host** | **str** | | [optional] -**rdzv_id** | **str** | | [optional] -**rdzv_port** | **int** | | [optional] -**standalone** | **bool** | Start a local standalone rendezvous backend that is represented by a C10d TCP store on port 29400. Useful when launching single-node, multi-worker job. If specified --rdzv_backend, --rdzv_endpoint, --rdzv_id are auto-assigned; any explicitly set values are ignored. | [optional] - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/KubeflowOrgV1JAXJob.md b/sdk/python/docs/KubeflowOrgV1JAXJob.md deleted file mode 100644 index b2f3833623..0000000000 --- a/sdk/python/docs/KubeflowOrgV1JAXJob.md +++ /dev/null @@ -1,15 +0,0 @@ -# KubeflowOrgV1JAXJob - -JAXJob Represents a JAXJob resource. -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**api_version** | **str** | APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources | [optional] -**kind** | **str** | Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds | [optional] -**metadata** | [**V1ObjectMeta**](V1ObjectMeta.md) | | [optional] -**spec** | [**KubeflowOrgV1JAXJobSpec**](KubeflowOrgV1JAXJobSpec.md) | | [optional] -**status** | [**KubeflowOrgV1JobStatus**](KubeflowOrgV1JobStatus.md) | | [optional] - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/KubeflowOrgV1JAXJobList.md b/sdk/python/docs/KubeflowOrgV1JAXJobList.md deleted file mode 100644 index 02442b547e..0000000000 --- a/sdk/python/docs/KubeflowOrgV1JAXJobList.md +++ /dev/null @@ -1,14 +0,0 @@ -# KubeflowOrgV1JAXJobList - -JAXJobList is a list of JAXJobs. -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**api_version** | **str** | APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources | [optional] -**items** | [**list[KubeflowOrgV1JAXJob]**](KubeflowOrgV1JAXJob.md) | List of JAXJobs. | -**kind** | **str** | Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds | [optional] -**metadata** | [**V1ListMeta**](V1ListMeta.md) | | [optional] - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/KubeflowOrgV1JAXJobSpec.md b/sdk/python/docs/KubeflowOrgV1JAXJobSpec.md deleted file mode 100644 index 29e5d97782..0000000000 --- a/sdk/python/docs/KubeflowOrgV1JAXJobSpec.md +++ /dev/null @@ -1,12 +0,0 @@ -# KubeflowOrgV1JAXJobSpec - -JAXJobSpec is a desired state description of the JAXJob. -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**jax_replica_specs** | [**dict(str, KubeflowOrgV1ReplicaSpec)**](KubeflowOrgV1ReplicaSpec.md) | A map of JAXReplicaType (type) to ReplicaSpec (value). Specifies the JAX cluster configuration. For example, { \"Worker\": JAXReplicaSpec, } | -**run_policy** | [**KubeflowOrgV1RunPolicy**](KubeflowOrgV1RunPolicy.md) | | - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/KubeflowOrgV1JobCondition.md b/sdk/python/docs/KubeflowOrgV1JobCondition.md deleted file mode 100644 index 9dabe76763..0000000000 --- a/sdk/python/docs/KubeflowOrgV1JobCondition.md +++ /dev/null @@ -1,16 +0,0 @@ -# KubeflowOrgV1JobCondition - -JobCondition describes the state of the job at a certain point. -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**last_transition_time** | [**datetime**](V1Time.md) | | [optional] -**last_update_time** | [**datetime**](V1Time.md) | | [optional] -**message** | **str** | A human readable message indicating details about the transition. | [optional] -**reason** | **str** | The reason for the condition's last transition. | [optional] -**status** | **str** | Status of the condition, one of True, False, Unknown. | [default to ''] -**type** | **str** | Type of job condition. | [default to ''] - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/KubeflowOrgV1JobStatus.md b/sdk/python/docs/KubeflowOrgV1JobStatus.md deleted file mode 100644 index 33fe6020bb..0000000000 --- a/sdk/python/docs/KubeflowOrgV1JobStatus.md +++ /dev/null @@ -1,15 +0,0 @@ -# KubeflowOrgV1JobStatus - -JobStatus represents the current observed state of the training Job. -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**completion_time** | [**datetime**](V1Time.md) | | [optional] -**conditions** | [**list[KubeflowOrgV1JobCondition]**](KubeflowOrgV1JobCondition.md) | Conditions is an array of current observed job conditions. | [optional] -**last_reconcile_time** | [**datetime**](V1Time.md) | | [optional] -**replica_statuses** | [**dict(str, KubeflowOrgV1ReplicaStatus)**](KubeflowOrgV1ReplicaStatus.md) | ReplicaStatuses is map of ReplicaType and ReplicaStatus, specifies the status of each replica. | [optional] -**start_time** | [**datetime**](V1Time.md) | | [optional] - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/KubeflowOrgV1MPIJob.md b/sdk/python/docs/KubeflowOrgV1MPIJob.md deleted file mode 100644 index 74ce8e51ec..0000000000 --- a/sdk/python/docs/KubeflowOrgV1MPIJob.md +++ /dev/null @@ -1,14 +0,0 @@ -# KubeflowOrgV1MPIJob - -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**api_version** | **str** | APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources | [optional] -**kind** | **str** | Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds | [optional] -**metadata** | [**V1ObjectMeta**](V1ObjectMeta.md) | | [optional] -**spec** | [**KubeflowOrgV1MPIJobSpec**](KubeflowOrgV1MPIJobSpec.md) | | [optional] -**status** | [**KubeflowOrgV1JobStatus**](KubeflowOrgV1JobStatus.md) | | [optional] - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/KubeflowOrgV1MPIJobList.md b/sdk/python/docs/KubeflowOrgV1MPIJobList.md deleted file mode 100644 index 5eb2500690..0000000000 --- a/sdk/python/docs/KubeflowOrgV1MPIJobList.md +++ /dev/null @@ -1,13 +0,0 @@ -# KubeflowOrgV1MPIJobList - -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**api_version** | **str** | APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources | [optional] -**items** | [**list[KubeflowOrgV1MPIJob]**](KubeflowOrgV1MPIJob.md) | | -**kind** | **str** | Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds | [optional] -**metadata** | [**V1ListMeta**](V1ListMeta.md) | | [optional] - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/KubeflowOrgV1MPIJobSpec.md b/sdk/python/docs/KubeflowOrgV1MPIJobSpec.md deleted file mode 100644 index 14c8d9ea6f..0000000000 --- a/sdk/python/docs/KubeflowOrgV1MPIJobSpec.md +++ /dev/null @@ -1,14 +0,0 @@ -# KubeflowOrgV1MPIJobSpec - -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**clean_pod_policy** | **str** | CleanPodPolicy defines the policy that whether to kill pods after the job completes. Defaults to None. | [optional] -**main_container** | **str** | MainContainer specifies name of the main container which executes the MPI code. | [optional] -**mpi_replica_specs** | [**dict(str, KubeflowOrgV1ReplicaSpec)**](KubeflowOrgV1ReplicaSpec.md) | `MPIReplicaSpecs` contains maps from `MPIReplicaType` to `ReplicaSpec` that specify the MPI replicas to run. | -**run_policy** | [**KubeflowOrgV1RunPolicy**](KubeflowOrgV1RunPolicy.md) | | [optional] -**slots_per_worker** | **int** | Specifies the number of slots per worker used in hostfile. Defaults to 1. | [optional] - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/KubeflowOrgV1PaddleElasticPolicy.md b/sdk/python/docs/KubeflowOrgV1PaddleElasticPolicy.md deleted file mode 100644 index 4daf9ddacf..0000000000 --- a/sdk/python/docs/KubeflowOrgV1PaddleElasticPolicy.md +++ /dev/null @@ -1,13 +0,0 @@ -# KubeflowOrgV1PaddleElasticPolicy - -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**max_replicas** | **int** | upper limit for the number of pods that can be set by the autoscaler; cannot be smaller than MinReplicas, defaults to null. | [optional] -**max_restarts** | **int** | MaxRestarts is the limit for restart times of pods in elastic mode. | [optional] -**metrics** | [**list[K8sIoApiAutoscalingV2MetricSpec]**](K8sIoApiAutoscalingV2MetricSpec.md) | Metrics contains the specifications which are used to calculate the desired replica count (the maximum replica count across all metrics will be used). The desired replica count is calculated with multiplying the ratio between the target value and the current value by the current number of pods. Ergo, metrics used must decrease as the pod count is increased, and vice-versa. See the individual metric source types for more information about how each type of metric must respond. If not set, the HPA will not be created. | [optional] -**min_replicas** | **int** | minReplicas is the lower limit for the number of replicas to which the training job can scale down. It defaults to null. | [optional] - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/KubeflowOrgV1PaddleJob.md b/sdk/python/docs/KubeflowOrgV1PaddleJob.md deleted file mode 100644 index 2688ec819a..0000000000 --- a/sdk/python/docs/KubeflowOrgV1PaddleJob.md +++ /dev/null @@ -1,15 +0,0 @@ -# KubeflowOrgV1PaddleJob - -PaddleJob Represents a PaddleJob resource. -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**api_version** | **str** | APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources | [optional] -**kind** | **str** | Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds | [optional] -**metadata** | [**V1ObjectMeta**](V1ObjectMeta.md) | | [optional] -**spec** | [**KubeflowOrgV1PaddleJobSpec**](KubeflowOrgV1PaddleJobSpec.md) | | [optional] -**status** | [**KubeflowOrgV1JobStatus**](KubeflowOrgV1JobStatus.md) | | [optional] - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/KubeflowOrgV1PaddleJobList.md b/sdk/python/docs/KubeflowOrgV1PaddleJobList.md deleted file mode 100644 index 2ff4e534d1..0000000000 --- a/sdk/python/docs/KubeflowOrgV1PaddleJobList.md +++ /dev/null @@ -1,14 +0,0 @@ -# KubeflowOrgV1PaddleJobList - -PaddleJobList is a list of PaddleJobs. -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**api_version** | **str** | APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources | [optional] -**items** | [**list[KubeflowOrgV1PaddleJob]**](KubeflowOrgV1PaddleJob.md) | List of PaddleJobs. | -**kind** | **str** | Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds | [optional] -**metadata** | [**V1ListMeta**](V1ListMeta.md) | | [optional] - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/KubeflowOrgV1PaddleJobSpec.md b/sdk/python/docs/KubeflowOrgV1PaddleJobSpec.md deleted file mode 100644 index 5060d38997..0000000000 --- a/sdk/python/docs/KubeflowOrgV1PaddleJobSpec.md +++ /dev/null @@ -1,13 +0,0 @@ -# KubeflowOrgV1PaddleJobSpec - -PaddleJobSpec is a desired state description of the PaddleJob. -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**elastic_policy** | [**KubeflowOrgV1PaddleElasticPolicy**](KubeflowOrgV1PaddleElasticPolicy.md) | | [optional] -**paddle_replica_specs** | [**dict(str, KubeflowOrgV1ReplicaSpec)**](KubeflowOrgV1ReplicaSpec.md) | A map of PaddleReplicaType (type) to ReplicaSpec (value). Specifies the Paddle cluster configuration. For example, { \"Master\": PaddleReplicaSpec, \"Worker\": PaddleReplicaSpec, } | -**run_policy** | [**KubeflowOrgV1RunPolicy**](KubeflowOrgV1RunPolicy.md) | | - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/KubeflowOrgV1PyTorchJob.md b/sdk/python/docs/KubeflowOrgV1PyTorchJob.md deleted file mode 100644 index b117b25d23..0000000000 --- a/sdk/python/docs/KubeflowOrgV1PyTorchJob.md +++ /dev/null @@ -1,15 +0,0 @@ -# KubeflowOrgV1PyTorchJob - -PyTorchJob Represents a PyTorchJob resource. -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**api_version** | **str** | APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources | [optional] -**kind** | **str** | Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds | [optional] -**metadata** | [**V1ObjectMeta**](V1ObjectMeta.md) | | [optional] -**spec** | [**KubeflowOrgV1PyTorchJobSpec**](KubeflowOrgV1PyTorchJobSpec.md) | | [optional] -**status** | [**KubeflowOrgV1JobStatus**](KubeflowOrgV1JobStatus.md) | | [optional] - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/KubeflowOrgV1PyTorchJobList.md b/sdk/python/docs/KubeflowOrgV1PyTorchJobList.md deleted file mode 100644 index b352d4b5c7..0000000000 --- a/sdk/python/docs/KubeflowOrgV1PyTorchJobList.md +++ /dev/null @@ -1,14 +0,0 @@ -# KubeflowOrgV1PyTorchJobList - -PyTorchJobList is a list of PyTorchJobs. -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**api_version** | **str** | APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources | [optional] -**items** | [**list[KubeflowOrgV1PyTorchJob]**](KubeflowOrgV1PyTorchJob.md) | List of PyTorchJobs. | -**kind** | **str** | Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds | [optional] -**metadata** | [**V1ListMeta**](V1ListMeta.md) | | [optional] - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/KubeflowOrgV1PyTorchJobSpec.md b/sdk/python/docs/KubeflowOrgV1PyTorchJobSpec.md deleted file mode 100644 index 6e24755a14..0000000000 --- a/sdk/python/docs/KubeflowOrgV1PyTorchJobSpec.md +++ /dev/null @@ -1,14 +0,0 @@ -# KubeflowOrgV1PyTorchJobSpec - -PyTorchJobSpec is a desired state description of the PyTorchJob. -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**elastic_policy** | [**KubeflowOrgV1ElasticPolicy**](KubeflowOrgV1ElasticPolicy.md) | | [optional] -**nproc_per_node** | **str** | Number of workers per node; supported values: [auto, cpu, gpu, int]. For more, https://github.com/pytorch/pytorch/blob/26f7f470df64d90e092081e39507e4ac751f55d6/torch/distributed/run.py#L629-L658. Defaults to auto. | [optional] -**pytorch_replica_specs** | [**dict(str, KubeflowOrgV1ReplicaSpec)**](KubeflowOrgV1ReplicaSpec.md) | A map of PyTorchReplicaType (type) to ReplicaSpec (value). Specifies the PyTorch cluster configuration. For example, { \"Master\": PyTorchReplicaSpec, \"Worker\": PyTorchReplicaSpec, } | -**run_policy** | [**KubeflowOrgV1RunPolicy**](KubeflowOrgV1RunPolicy.md) | | - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/KubeflowOrgV1RDZVConf.md b/sdk/python/docs/KubeflowOrgV1RDZVConf.md deleted file mode 100644 index 2dce54c9b3..0000000000 --- a/sdk/python/docs/KubeflowOrgV1RDZVConf.md +++ /dev/null @@ -1,11 +0,0 @@ -# KubeflowOrgV1RDZVConf - -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**key** | **str** | | [optional] -**value** | **str** | | [optional] - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/KubeflowOrgV1ReplicaSpec.md b/sdk/python/docs/KubeflowOrgV1ReplicaSpec.md deleted file mode 100644 index ed9c8968f5..0000000000 --- a/sdk/python/docs/KubeflowOrgV1ReplicaSpec.md +++ /dev/null @@ -1,13 +0,0 @@ -# KubeflowOrgV1ReplicaSpec - -ReplicaSpec is a description of the replica -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**replicas** | **int** | Replicas is the desired number of replicas of the given template. If unspecified, defaults to 1. | [optional] -**restart_policy** | **str** | Restart policy for all replicas within the job. One of Always, OnFailure, Never and ExitCode. Default to Never. | [optional] -**template** | [**V1PodTemplateSpec**](V1PodTemplateSpec.md) | | [optional] - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/KubeflowOrgV1ReplicaStatus.md b/sdk/python/docs/KubeflowOrgV1ReplicaStatus.md deleted file mode 100644 index 33b96c3639..0000000000 --- a/sdk/python/docs/KubeflowOrgV1ReplicaStatus.md +++ /dev/null @@ -1,15 +0,0 @@ -# KubeflowOrgV1ReplicaStatus - -ReplicaStatus represents the current observed state of the replica. -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**active** | **int** | The number of actively running pods. | [optional] -**failed** | **int** | The number of pods which reached phase Failed. | [optional] -**label_selector** | [**V1LabelSelector**](V1LabelSelector.md) | | [optional] -**selector** | **str** | A Selector is a label query over a set of resources. The result of matchLabels and matchExpressions are ANDed. An empty Selector matches all objects. A null Selector matches no objects. | [optional] -**succeeded** | **int** | The number of pods which reached phase Succeeded. | [optional] - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/KubeflowOrgV1RunPolicy.md b/sdk/python/docs/KubeflowOrgV1RunPolicy.md deleted file mode 100644 index d3b32c70c7..0000000000 --- a/sdk/python/docs/KubeflowOrgV1RunPolicy.md +++ /dev/null @@ -1,17 +0,0 @@ -# KubeflowOrgV1RunPolicy - -RunPolicy encapsulates various runtime policies of the distributed training job, for example how to clean up resources and how long the job can stay active. -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**active_deadline_seconds** | **int** | Specifies the duration in seconds relative to the startTime that the job may be active before the system tries to terminate it; value must be positive integer. | [optional] -**backoff_limit** | **int** | Optional number of retries before marking this job failed. | [optional] -**clean_pod_policy** | **str** | CleanPodPolicy defines the policy to kill pods after the job completes. Default to None. | [optional] -**managed_by** | **str** | ManagedBy is used to indicate the controller or entity that manages a job. The value must be either an empty, 'kubeflow.org/training-operator' or 'kueue.x-k8s.io/multikueue'. The training-operator reconciles a job which doesn't have this field at all or the field value is the reserved string 'kubeflow.org/training-operator', but delegates reconciling the job with 'kueue.x-k8s.io/multikueue' to the Kueue. The field is immutable. | [optional] -**scheduling_policy** | [**KubeflowOrgV1SchedulingPolicy**](KubeflowOrgV1SchedulingPolicy.md) | | [optional] -**suspend** | **bool** | suspend specifies whether the Job controller should create Pods or not. If a Job is created with suspend set to true, no Pods are created by the Job controller. If a Job is suspended after creation (i.e. the flag goes from false to true), the Job controller will delete all active Pods and PodGroups associated with this Job. Users must design their workload to gracefully handle this. Suspending a Job will reset the StartTime field of the Job. Defaults to false. | [optional] -**ttl_seconds_after_finished** | **int** | TTLSecondsAfterFinished is the TTL to clean up jobs. It may take extra ReconcilePeriod seconds for the cleanup, since reconcile gets called periodically. Default to infinite. | [optional] - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/KubeflowOrgV1SchedulingPolicy.md b/sdk/python/docs/KubeflowOrgV1SchedulingPolicy.md deleted file mode 100644 index ca2c5070cc..0000000000 --- a/sdk/python/docs/KubeflowOrgV1SchedulingPolicy.md +++ /dev/null @@ -1,15 +0,0 @@ -# KubeflowOrgV1SchedulingPolicy - -SchedulingPolicy encapsulates various scheduling policies of the distributed training job, for example `minAvailable` for gang-scheduling. -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**min_available** | **int** | | [optional] -**min_resources** | [**dict(str, Quantity)**](Quantity.md) | | [optional] -**priority_class** | **str** | | [optional] -**queue** | **str** | | [optional] -**schedule_timeout_seconds** | **int** | | [optional] - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/KubeflowOrgV1TFJob.md b/sdk/python/docs/KubeflowOrgV1TFJob.md deleted file mode 100644 index 6e4389f046..0000000000 --- a/sdk/python/docs/KubeflowOrgV1TFJob.md +++ /dev/null @@ -1,15 +0,0 @@ -# KubeflowOrgV1TFJob - -TFJob represents a TFJob resource. -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**api_version** | **str** | APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources | [optional] -**kind** | **str** | Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds | [optional] -**metadata** | [**V1ObjectMeta**](V1ObjectMeta.md) | | [optional] -**spec** | [**KubeflowOrgV1TFJobSpec**](KubeflowOrgV1TFJobSpec.md) | | [optional] -**status** | [**KubeflowOrgV1JobStatus**](KubeflowOrgV1JobStatus.md) | | [optional] - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/KubeflowOrgV1TFJobList.md b/sdk/python/docs/KubeflowOrgV1TFJobList.md deleted file mode 100644 index 73023fc7f3..0000000000 --- a/sdk/python/docs/KubeflowOrgV1TFJobList.md +++ /dev/null @@ -1,14 +0,0 @@ -# KubeflowOrgV1TFJobList - -TFJobList is a list of TFJobs. -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**api_version** | **str** | APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources | [optional] -**items** | [**list[KubeflowOrgV1TFJob]**](KubeflowOrgV1TFJob.md) | List of TFJobs. | -**kind** | **str** | Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds | [optional] -**metadata** | [**V1ListMeta**](V1ListMeta.md) | | [optional] - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/KubeflowOrgV1TFJobSpec.md b/sdk/python/docs/KubeflowOrgV1TFJobSpec.md deleted file mode 100644 index cbea624dec..0000000000 --- a/sdk/python/docs/KubeflowOrgV1TFJobSpec.md +++ /dev/null @@ -1,14 +0,0 @@ -# KubeflowOrgV1TFJobSpec - -TFJobSpec is a desired state description of the TFJob. -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**enable_dynamic_worker** | **bool** | A switch to enable dynamic worker | [optional] -**run_policy** | [**KubeflowOrgV1RunPolicy**](KubeflowOrgV1RunPolicy.md) | | -**success_policy** | **str** | SuccessPolicy defines the policy to mark the TFJob as succeeded. Default to \"\", using the default rules. | [optional] -**tf_replica_specs** | [**dict(str, KubeflowOrgV1ReplicaSpec)**](KubeflowOrgV1ReplicaSpec.md) | A map of TFReplicaType (type) to ReplicaSpec (value). Specifies the TF cluster configuration. For example, { \"PS\": ReplicaSpec, \"Worker\": ReplicaSpec, } | - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/KubeflowOrgV1XGBoostJob.md b/sdk/python/docs/KubeflowOrgV1XGBoostJob.md deleted file mode 100644 index b1d433f033..0000000000 --- a/sdk/python/docs/KubeflowOrgV1XGBoostJob.md +++ /dev/null @@ -1,15 +0,0 @@ -# KubeflowOrgV1XGBoostJob - -XGBoostJob is the Schema for the xgboostjobs API -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**api_version** | **str** | APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources | [optional] -**kind** | **str** | Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds | [optional] -**metadata** | [**V1ObjectMeta**](V1ObjectMeta.md) | | [optional] -**spec** | [**KubeflowOrgV1XGBoostJobSpec**](KubeflowOrgV1XGBoostJobSpec.md) | | [optional] -**status** | [**KubeflowOrgV1JobStatus**](KubeflowOrgV1JobStatus.md) | | [optional] - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/KubeflowOrgV1XGBoostJobList.md b/sdk/python/docs/KubeflowOrgV1XGBoostJobList.md deleted file mode 100644 index 2821faacb8..0000000000 --- a/sdk/python/docs/KubeflowOrgV1XGBoostJobList.md +++ /dev/null @@ -1,14 +0,0 @@ -# KubeflowOrgV1XGBoostJobList - -XGBoostJobList contains a list of XGBoostJob -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**api_version** | **str** | APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources | [optional] -**items** | [**list[KubeflowOrgV1XGBoostJob]**](KubeflowOrgV1XGBoostJob.md) | | -**kind** | **str** | Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds | [optional] -**metadata** | [**V1ListMeta**](V1ListMeta.md) | | [optional] - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/KubeflowOrgV1XGBoostJobSpec.md b/sdk/python/docs/KubeflowOrgV1XGBoostJobSpec.md deleted file mode 100644 index e42d004652..0000000000 --- a/sdk/python/docs/KubeflowOrgV1XGBoostJobSpec.md +++ /dev/null @@ -1,12 +0,0 @@ -# KubeflowOrgV1XGBoostJobSpec - -XGBoostJobSpec defines the desired state of XGBoostJob -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**run_policy** | [**KubeflowOrgV1RunPolicy**](KubeflowOrgV1RunPolicy.md) | | -**xgb_replica_specs** | [**dict(str, KubeflowOrgV1ReplicaSpec)**](KubeflowOrgV1ReplicaSpec.md) | | - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/RuntimeTypeMeta.md b/sdk/python/docs/RuntimeTypeMeta.md deleted file mode 100644 index 4c975bd8cf..0000000000 --- a/sdk/python/docs/RuntimeTypeMeta.md +++ /dev/null @@ -1,12 +0,0 @@ -# RuntimeTypeMeta - -TypeMeta is shared by all top level objects. The proper way to use it is to inline it in your type, like this: type MyAwesomeAPIObject struct { runtime.TypeMeta `json:\",inline\"` ... // other fields } func (obj *MyAwesomeAPIObject) SetGroupVersionKind(gvk *metav1.GroupVersionKind) { metav1.UpdateTypeMeta(obj,gvk) }; GroupVersionKind() *GroupVersionKind TypeMeta is provided here for convenience. You may use it directly from this package or define your own with the same fields. -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**api_version** | **str** | | [optional] -**kind** | **str** | | [optional] - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/RuntimeUnknown.md b/sdk/python/docs/RuntimeUnknown.md deleted file mode 100644 index 2249381662..0000000000 --- a/sdk/python/docs/RuntimeUnknown.md +++ /dev/null @@ -1,14 +0,0 @@ -# RuntimeUnknown - -Unknown allows api objects with unknown types to be passed-through. This can be used to deal with the API objects from a plug-in. Unknown objects still have functioning TypeMeta features-- kind, version, etc. metadata and field mutatation. -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**content_encoding** | **str** | ContentEncoding is encoding used to encode 'Raw' data. Unspecified means no encoding. | [default to ''] -**content_type** | **str** | ContentType is serialization method used to serialize 'Raw'. Unspecified means ContentTypeJSON. | [default to ''] -**api_version** | **str** | | [optional] -**kind** | **str** | | [optional] - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/V1APIGroup.md b/sdk/python/docs/V1APIGroup.md deleted file mode 100644 index 6dfeeb94a9..0000000000 --- a/sdk/python/docs/V1APIGroup.md +++ /dev/null @@ -1,16 +0,0 @@ -# V1APIGroup - -APIGroup contains the name, the supported versions, and the preferred version of a group. -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**api_version** | **str** | APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources | [optional] -**kind** | **str** | Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds | [optional] -**name** | **str** | name is the name of the group. | [default to ''] -**preferred_version** | [**V1GroupVersionForDiscovery**](V1GroupVersionForDiscovery.md) | | [optional] -**server_address_by_client_cid_rs** | [**list[V1ServerAddressByClientCIDR]**](V1ServerAddressByClientCIDR.md) | a map of client CIDR to server address that is serving this group. This is to help clients reach servers in the most network-efficient way possible. Clients can use the appropriate server address as per the CIDR that they match. In case of multiple matches, clients should use the longest matching CIDR. The server returns only those CIDRs that it thinks that the client can match. For example: the master will return an internal IP CIDR only, if the client reaches the server using an internal IP. Server looks at X-Forwarded-For header or X-Real-Ip header or request.RemoteAddr (in that order) to get the client IP. | [optional] -**versions** | [**list[V1GroupVersionForDiscovery]**](V1GroupVersionForDiscovery.md) | versions are the versions supported in this group. | - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/V1APIGroupList.md b/sdk/python/docs/V1APIGroupList.md deleted file mode 100644 index 6cbd031dfd..0000000000 --- a/sdk/python/docs/V1APIGroupList.md +++ /dev/null @@ -1,13 +0,0 @@ -# V1APIGroupList - -APIGroupList is a list of APIGroup, to allow clients to discover the API at /apis. -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**api_version** | **str** | APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources | [optional] -**groups** | [**list[V1APIGroup]**](V1APIGroup.md) | groups is a list of APIGroup. | -**kind** | **str** | Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds | [optional] - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/V1APIResource.md b/sdk/python/docs/V1APIResource.md deleted file mode 100644 index 091a797fc0..0000000000 --- a/sdk/python/docs/V1APIResource.md +++ /dev/null @@ -1,20 +0,0 @@ -# V1APIResource - -APIResource specifies the name of a resource and whether it is namespaced. -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**categories** | **list[str]** | categories is a list of the grouped resources this resource belongs to (e.g. 'all') | [optional] -**group** | **str** | group is the preferred group of the resource. Empty implies the group of the containing resource list. For subresources, this may have a different value, for example: Scale\". | [optional] -**kind** | **str** | kind is the kind for the resource (e.g. 'Foo' is the kind for a resource 'foo') | [default to ''] -**name** | **str** | name is the plural name of the resource. | [default to ''] -**namespaced** | **bool** | namespaced indicates if a resource is namespaced or not. | [default to False] -**short_names** | **list[str]** | shortNames is a list of suggested short names of the resource. | [optional] -**singular_name** | **str** | singularName is the singular name of the resource. This allows clients to handle plural and singular opaquely. The singularName is more correct for reporting status on a single item and both singular and plural are allowed from the kubectl CLI interface. | [default to ''] -**storage_version_hash** | **str** | The hash value of the storage version, the version this resource is converted to when written to the data store. Value must be treated as opaque by clients. Only equality comparison on the value is valid. This is an alpha feature and may change or be removed in the future. The field is populated by the apiserver only if the StorageVersionHash feature gate is enabled. This field will remain optional even if it graduates. | [optional] -**verbs** | **list[str]** | verbs is a list of supported kube verbs (this includes get, list, watch, create, update, patch, delete, deletecollection, and proxy) | -**version** | **str** | version is the preferred version of the resource. Empty implies the version of the containing resource list For subresources, this may have a different value, for example: v1 (while inside a v1beta1 version of the core resource's group)\". | [optional] - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/V1APIResourceList.md b/sdk/python/docs/V1APIResourceList.md deleted file mode 100644 index 135b87326f..0000000000 --- a/sdk/python/docs/V1APIResourceList.md +++ /dev/null @@ -1,14 +0,0 @@ -# V1APIResourceList - -APIResourceList is a list of APIResource, it is used to expose the name of the resources supported in a specific group and version, and if the resource is namespaced. -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**api_version** | **str** | APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources | [optional] -**group_version** | **str** | groupVersion is the group and version this APIResourceList is for. | [default to ''] -**kind** | **str** | Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds | [optional] -**resources** | [**list[V1APIResource]**](V1APIResource.md) | resources contains the name of the resources and if they are namespaced. | - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/V1APIVersions.md b/sdk/python/docs/V1APIVersions.md deleted file mode 100644 index f4502839ca..0000000000 --- a/sdk/python/docs/V1APIVersions.md +++ /dev/null @@ -1,14 +0,0 @@ -# V1APIVersions - -APIVersions lists the versions that are available, to allow clients to discover the API at /api, which is the root path of the legacy v1 API. -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**api_version** | **str** | APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources | [optional] -**kind** | **str** | Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds | [optional] -**server_address_by_client_cid_rs** | [**list[V1ServerAddressByClientCIDR]**](V1ServerAddressByClientCIDR.md) | a map of client CIDR to server address that is serving this group. This is to help clients reach servers in the most network-efficient way possible. Clients can use the appropriate server address as per the CIDR that they match. In case of multiple matches, clients should use the longest matching CIDR. The server returns only those CIDRs that it thinks that the client can match. For example: the master will return an internal IP CIDR only, if the client reaches the server using an internal IP. Server looks at X-Forwarded-For header or X-Real-Ip header or request.RemoteAddr (in that order) to get the client IP. | -**versions** | **list[str]** | versions are the api versions that are available. | - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/V1ApplyOptions.md b/sdk/python/docs/V1ApplyOptions.md deleted file mode 100644 index 9637344351..0000000000 --- a/sdk/python/docs/V1ApplyOptions.md +++ /dev/null @@ -1,15 +0,0 @@ -# V1ApplyOptions - -ApplyOptions may be provided when applying an API object. FieldManager is required for apply requests. ApplyOptions is equivalent to PatchOptions. It is provided as a convenience with documentation that speaks specifically to how the options fields relate to apply. -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**api_version** | **str** | APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources | [optional] -**dry_run** | **list[str]** | When present, indicates that modifications should not be persisted. An invalid or unrecognized dryRun directive will result in an error response and no further processing of the request. Valid values are: - All: all dry run stages will be processed | [optional] -**field_manager** | **str** | fieldManager is a name associated with the actor or entity that is making these changes. The value must be less than or 128 characters long, and only contain printable characters, as defined by https://golang.org/pkg/unicode/#IsPrint. This field is required. | [default to ''] -**force** | **bool** | Force is going to \"force\" Apply requests. It means user will re-acquire conflicting fields owned by other people. | [default to False] -**kind** | **str** | Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds | [optional] - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/V1Condition.md b/sdk/python/docs/V1Condition.md deleted file mode 100644 index e71828c9e8..0000000000 --- a/sdk/python/docs/V1Condition.md +++ /dev/null @@ -1,16 +0,0 @@ -# V1Condition - -Condition contains details for one aspect of the current state of this API Resource. -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**last_transition_time** | **datetime** | Time is a wrapper around time.Time which supports correct marshaling to YAML and JSON. Wrappers are provided for many of the factory methods that the time package offers. | -**message** | **str** | message is a human readable message indicating details about the transition. This may be an empty string. | [default to ''] -**observed_generation** | **int** | observedGeneration represents the .metadata.generation that the condition was set based upon. For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date with respect to the current state of the instance. | [optional] -**reason** | **str** | reason contains a programmatic identifier indicating the reason for the condition's last transition. Producers of specific condition types may define expected values and meanings for this field, and whether the values are considered a guaranteed API. The value should be a CamelCase string. This field may not be empty. | [default to ''] -**status** | **str** | status of the condition, one of True, False, Unknown. | [default to ''] -**type** | **str** | type of condition in CamelCase or in foo.example.com/CamelCase. | [default to ''] - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/V1CreateOptions.md b/sdk/python/docs/V1CreateOptions.md deleted file mode 100644 index eedf5ed420..0000000000 --- a/sdk/python/docs/V1CreateOptions.md +++ /dev/null @@ -1,15 +0,0 @@ -# V1CreateOptions - -CreateOptions may be provided when creating an API object. -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**api_version** | **str** | APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources | [optional] -**dry_run** | **list[str]** | When present, indicates that modifications should not be persisted. An invalid or unrecognized dryRun directive will result in an error response and no further processing of the request. Valid values are: - All: all dry run stages will be processed | [optional] -**field_manager** | **str** | fieldManager is a name associated with the actor or entity that is making these changes. The value must be less than or 128 characters long, and only contain printable characters, as defined by https://golang.org/pkg/unicode/#IsPrint. | [optional] -**field_validation** | **str** | fieldValidation instructs the server on how to handle objects in the request (POST/PUT/PATCH) containing unknown or duplicate fields. Valid values are: - Ignore: This will ignore any unknown fields that are silently dropped from the object, and will ignore all but the last duplicate field that the decoder encounters. This is the default behavior prior to v1.23. - Warn: This will send a warning via the standard warning response header for each unknown field that is dropped from the object, and for each duplicate field that is encountered. The request will still succeed if there are no other errors, and will only persist the last of any duplicate fields. This is the default in v1.23+ - Strict: This will fail the request with a BadRequest error if any unknown fields would be dropped from the object, or if any duplicate fields are present. The error returned from the server will contain all unknown and duplicate fields encountered. | [optional] -**kind** | **str** | Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds | [optional] - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/V1DeleteOptions.md b/sdk/python/docs/V1DeleteOptions.md deleted file mode 100644 index 04baafad0a..0000000000 --- a/sdk/python/docs/V1DeleteOptions.md +++ /dev/null @@ -1,17 +0,0 @@ -# V1DeleteOptions - -DeleteOptions may be provided when deleting an API object. -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**api_version** | **str** | APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources | [optional] -**dry_run** | **list[str]** | When present, indicates that modifications should not be persisted. An invalid or unrecognized dryRun directive will result in an error response and no further processing of the request. Valid values are: - All: all dry run stages will be processed | [optional] -**grace_period_seconds** | **int** | The duration in seconds before the object should be deleted. Value must be non-negative integer. The value zero indicates delete immediately. If this value is nil, the default grace period for the specified type will be used. Defaults to a per object value if not specified. zero means delete immediately. | [optional] -**kind** | **str** | Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds | [optional] -**orphan_dependents** | **bool** | Deprecated: please use the PropagationPolicy, this field will be deprecated in 1.7. Should the dependent objects be orphaned. If true/false, the \"orphan\" finalizer will be added to/removed from the object's finalizers list. Either this field or PropagationPolicy may be set, but not both. | [optional] -**preconditions** | [**V1Preconditions**](V1Preconditions.md) | | [optional] -**propagation_policy** | **str** | Whether and how garbage collection will be performed. Either this field or OrphanDependents may be set, but not both. The default policy is decided by the existing finalizer set in the metadata.finalizers and the resource-specific default policy. Acceptable values are: 'Orphan' - orphan the dependents; 'Background' - allow the garbage collector to delete the dependents in the background; 'Foreground' - a cascading policy that deletes all dependents in the foreground. | [optional] - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/V1FieldSelectorRequirement.md b/sdk/python/docs/V1FieldSelectorRequirement.md deleted file mode 100644 index cecf603be6..0000000000 --- a/sdk/python/docs/V1FieldSelectorRequirement.md +++ /dev/null @@ -1,13 +0,0 @@ -# V1FieldSelectorRequirement - -FieldSelectorRequirement is a selector that contains values, a key, and an operator that relates the key and values. -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**key** | **str** | key is the field selector key that the requirement applies to. | [default to ''] -**operator** | **str** | operator represents a key's relationship to a set of values. Valid operators are In, NotIn, Exists, DoesNotExist. The list of operators may grow in the future. | [default to ''] -**values** | **list[str]** | values is an array of string values. If the operator is In or NotIn, the values array must be non-empty. If the operator is Exists or DoesNotExist, the values array must be empty. | [optional] - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/V1GetOptions.md b/sdk/python/docs/V1GetOptions.md deleted file mode 100644 index ea380ebac5..0000000000 --- a/sdk/python/docs/V1GetOptions.md +++ /dev/null @@ -1,13 +0,0 @@ -# V1GetOptions - -GetOptions is the standard query options to the standard REST get call. -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**api_version** | **str** | APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources | [optional] -**kind** | **str** | Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds | [optional] -**resource_version** | **str** | resourceVersion sets a constraint on what resource versions a request may be served from. See https://kubernetes.io/docs/reference/using-api/api-concepts/#resource-versions for details. Defaults to unset | [optional] - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/V1GroupKind.md b/sdk/python/docs/V1GroupKind.md deleted file mode 100644 index 992668f005..0000000000 --- a/sdk/python/docs/V1GroupKind.md +++ /dev/null @@ -1,12 +0,0 @@ -# V1GroupKind - -GroupKind specifies a Group and a Kind, but does not force a version. This is useful for identifying concepts during lookup stages without having partially valid types -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**group** | **str** | | [default to ''] -**kind** | **str** | | [default to ''] - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/V1GroupResource.md b/sdk/python/docs/V1GroupResource.md deleted file mode 100644 index f89d5b62a3..0000000000 --- a/sdk/python/docs/V1GroupResource.md +++ /dev/null @@ -1,12 +0,0 @@ -# V1GroupResource - -GroupResource specifies a Group and a Resource, but does not force a version. This is useful for identifying concepts during lookup stages without having partially valid types -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**group** | **str** | | [default to ''] -**resource** | **str** | | [default to ''] - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/V1GroupVersion.md b/sdk/python/docs/V1GroupVersion.md deleted file mode 100644 index 57645af705..0000000000 --- a/sdk/python/docs/V1GroupVersion.md +++ /dev/null @@ -1,12 +0,0 @@ -# V1GroupVersion - -GroupVersion contains the \"group\" and the \"version\", which uniquely identifies the API. -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**group** | **str** | | [default to ''] -**version** | **str** | | [default to ''] - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/V1GroupVersionForDiscovery.md b/sdk/python/docs/V1GroupVersionForDiscovery.md deleted file mode 100644 index b1592b33b5..0000000000 --- a/sdk/python/docs/V1GroupVersionForDiscovery.md +++ /dev/null @@ -1,12 +0,0 @@ -# V1GroupVersionForDiscovery - -GroupVersion contains the \"group/version\" and \"version\" string of a version. It is made a struct to keep extensibility. -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**group_version** | **str** | groupVersion specifies the API group and version in the form \"group/version\" | [default to ''] -**version** | **str** | version specifies the version in the form of \"version\". This is to save the clients the trouble of splitting the GroupVersion. | [default to ''] - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/V1GroupVersionKind.md b/sdk/python/docs/V1GroupVersionKind.md deleted file mode 100644 index a1bb411108..0000000000 --- a/sdk/python/docs/V1GroupVersionKind.md +++ /dev/null @@ -1,13 +0,0 @@ -# V1GroupVersionKind - -GroupVersionKind unambiguously identifies a kind. It doesn't anonymously include GroupVersion to avoid automatic coercion. It doesn't use a GroupVersion to avoid custom marshalling -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**group** | **str** | | [default to ''] -**kind** | **str** | | [default to ''] -**version** | **str** | | [default to ''] - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/V1GroupVersionResource.md b/sdk/python/docs/V1GroupVersionResource.md deleted file mode 100644 index ad553a2c16..0000000000 --- a/sdk/python/docs/V1GroupVersionResource.md +++ /dev/null @@ -1,13 +0,0 @@ -# V1GroupVersionResource - -GroupVersionResource unambiguously identifies a resource. It doesn't anonymously include GroupVersion to avoid automatic coercion. It doesn't use a GroupVersion to avoid custom marshalling -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**group** | **str** | | [default to ''] -**resource** | **str** | | [default to ''] -**version** | **str** | | [default to ''] - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/V1InternalEvent.md b/sdk/python/docs/V1InternalEvent.md deleted file mode 100644 index 1ebffc9c52..0000000000 --- a/sdk/python/docs/V1InternalEvent.md +++ /dev/null @@ -1,12 +0,0 @@ -# V1InternalEvent - -InternalEvent makes watch.Event versioned -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**object** | [**RuntimeObject**](RuntimeObject.md) | | -**type** | **str** | | [default to ''] - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/V1LabelSelector.md b/sdk/python/docs/V1LabelSelector.md deleted file mode 100644 index 64515ca74d..0000000000 --- a/sdk/python/docs/V1LabelSelector.md +++ /dev/null @@ -1,12 +0,0 @@ -# V1LabelSelector - -A label selector is a label query over a set of resources. The result of matchLabels and matchExpressions are ANDed. An empty label selector matches all objects. A null label selector matches no objects. -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**match_expressions** | [**list[V1LabelSelectorRequirement]**](V1LabelSelectorRequirement.md) | matchExpressions is a list of label selector requirements. The requirements are ANDed. | [optional] -**match_labels** | **dict(str, str)** | matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels map is equivalent to an element of matchExpressions, whose key field is \"key\", the operator is \"In\", and the values array contains only \"value\". The requirements are ANDed. | [optional] - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/V1LabelSelectorRequirement.md b/sdk/python/docs/V1LabelSelectorRequirement.md deleted file mode 100644 index 547d85e568..0000000000 --- a/sdk/python/docs/V1LabelSelectorRequirement.md +++ /dev/null @@ -1,13 +0,0 @@ -# V1LabelSelectorRequirement - -A label selector requirement is a selector that contains values, a key, and an operator that relates the key and values. -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**key** | **str** | key is the label key that the selector applies to. | [default to ''] -**operator** | **str** | operator represents a key's relationship to a set of values. Valid operators are In, NotIn, Exists and DoesNotExist. | [default to ''] -**values** | **list[str]** | values is an array of string values. If the operator is In or NotIn, the values array must be non-empty. If the operator is Exists or DoesNotExist, the values array must be empty. This array is replaced during a strategic merge patch. | [optional] - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/V1List.md b/sdk/python/docs/V1List.md deleted file mode 100644 index dabe02e529..0000000000 --- a/sdk/python/docs/V1List.md +++ /dev/null @@ -1,14 +0,0 @@ -# V1List - -List holds a list of objects, which may not be known by the server. -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**api_version** | **str** | APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources | [optional] -**items** | **list[object]** | List of objects | -**kind** | **str** | Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds | [optional] -**metadata** | [**V1ListMeta**](V1ListMeta.md) | | [optional] - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/V1ListMeta.md b/sdk/python/docs/V1ListMeta.md deleted file mode 100644 index 8c27e3ddc0..0000000000 --- a/sdk/python/docs/V1ListMeta.md +++ /dev/null @@ -1,14 +0,0 @@ -# V1ListMeta - -ListMeta describes metadata that synthetic resources must have, including lists and various status objects. A resource may have only one of {ObjectMeta, ListMeta}. -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**_continue** | **str** | continue may be set if the user set a limit on the number of items returned, and indicates that the server has more data available. The value is opaque and may be used to issue another request to the endpoint that served this list to retrieve the next set of available objects. Continuing a consistent list may not be possible if the server configuration has changed or more than a few minutes have passed. The resourceVersion field returned when using this continue value will be identical to the value in the first response, unless you have received this token from an error message. | [optional] -**remaining_item_count** | **int** | remainingItemCount is the number of subsequent items in the list which are not included in this list response. If the list request contained label or field selectors, then the number of remaining items is unknown and the field will be left unset and omitted during serialization. If the list is complete (either because it is not chunking or because this is the last chunk), then there are no more remaining items and this field will be left unset and omitted during serialization. Servers older than v1.15 do not set this field. The intended use of the remainingItemCount is *estimating* the size of a collection. Clients should not rely on the remainingItemCount to be set or to be exact. | [optional] -**resource_version** | **str** | String that identifies the server's internal version of this object that can be used by clients to determine when objects have changed. Value must be treated as opaque by clients and passed unmodified back to the server. Populated by the system. Read-only. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency | [optional] -**self_link** | **str** | Deprecated: selfLink is a legacy read-only field that is no longer populated by the system. | [optional] - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/V1ListOptions.md b/sdk/python/docs/V1ListOptions.md deleted file mode 100644 index 8e5dbf95f3..0000000000 --- a/sdk/python/docs/V1ListOptions.md +++ /dev/null @@ -1,22 +0,0 @@ -# V1ListOptions - -ListOptions is the query options to a standard REST list call. -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**allow_watch_bookmarks** | **bool** | allowWatchBookmarks requests watch events with type \"BOOKMARK\". Servers that do not implement bookmarks may ignore this flag and bookmarks are sent at the server's discretion. Clients should not assume bookmarks are returned at any specific interval, nor may they assume the server will send any BOOKMARK event during a session. If this is not a watch, this field is ignored. | [optional] -**api_version** | **str** | APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources | [optional] -**_continue** | **str** | The continue option should be set when retrieving more results from the server. Since this value is server defined, clients may only use the continue value from a previous query result with identical query parameters (except for the value of continue) and the server may reject a continue value it does not recognize. If the specified continue value is no longer valid whether due to expiration (generally five to fifteen minutes) or a configuration change on the server, the server will respond with a 410 ResourceExpired error together with a continue token. If the client needs a consistent list, it must restart their list without the continue field. Otherwise, the client may send another list request with the token received with the 410 error, the server will respond with a list starting from the next key, but from the latest snapshot, which is inconsistent from the previous list results - objects that are created, modified, or deleted after the first list request will be included in the response, as long as their keys are after the \"next key\". This field is not supported when watch is true. Clients may start a watch from the last resourceVersion value returned by the server and not miss any modifications. | [optional] -**field_selector** | **str** | A selector to restrict the list of returned objects by their fields. Defaults to everything. | [optional] -**kind** | **str** | Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds | [optional] -**label_selector** | **str** | A selector to restrict the list of returned objects by their labels. Defaults to everything. | [optional] -**limit** | **int** | limit is a maximum number of responses to return for a list call. If more items exist, the server will set the `continue` field on the list metadata to a value that can be used with the same initial query to retrieve the next set of results. Setting a limit may return fewer than the requested amount of items (up to zero items) in the event all requested objects are filtered out and clients should only use the presence of the continue field to determine whether more results are available. Servers may choose not to support the limit argument and will return all of the available results. If limit is specified and the continue field is empty, clients may assume that no more results are available. This field is not supported if watch is true. The server guarantees that the objects returned when using continue will be identical to issuing a single list call without a limit - that is, no objects created, modified, or deleted after the first request is issued will be included in any subsequent continued requests. This is sometimes referred to as a consistent snapshot, and ensures that a client that is using limit to receive smaller chunks of a very large result can ensure they see all possible objects. If objects are updated during a chunked list the version of the object that was present at the time the first list result was calculated is returned. | [optional] -**resource_version** | **str** | resourceVersion sets a constraint on what resource versions a request may be served from. See https://kubernetes.io/docs/reference/using-api/api-concepts/#resource-versions for details. Defaults to unset | [optional] -**resource_version_match** | **str** | resourceVersionMatch determines how resourceVersion is applied to list calls. It is highly recommended that resourceVersionMatch be set for list calls where resourceVersion is set See https://kubernetes.io/docs/reference/using-api/api-concepts/#resource-versions for details. Defaults to unset | [optional] -**send_initial_events** | **bool** | `sendInitialEvents=true` may be set together with `watch=true`. In that case, the watch stream will begin with synthetic events to produce the current state of objects in the collection. Once all such events have been sent, a synthetic \"Bookmark\" event will be sent. The bookmark will report the ResourceVersion (RV) corresponding to the set of objects, and be marked with `\"k8s.io/initial-events-end\": \"true\"` annotation. Afterwards, the watch stream will proceed as usual, sending watch events corresponding to changes (subsequent to the RV) to objects watched. When `sendInitialEvents` option is set, we require `resourceVersionMatch` option to also be set. The semantic of the watch request is as following: - `resourceVersionMatch` = NotOlderThan is interpreted as \"data at least as new as the provided `resourceVersion`\" and the bookmark event is send when the state is synced to a `resourceVersion` at least as fresh as the one provided by the ListOptions. If `resourceVersion` is unset, this is interpreted as \"consistent read\" and the bookmark event is send when the state is synced at least to the moment when request started being processed. - `resourceVersionMatch` set to any other value or unset Invalid error is returned. Defaults to true if `resourceVersion=\"\"` or `resourceVersion=\"0\"` (for backward compatibility reasons) and to false otherwise. | [optional] -**timeout_seconds** | **int** | Timeout for the list/watch call. This limits the duration of the call, regardless of any activity or inactivity. | [optional] -**watch** | **bool** | Watch for changes to the described resources and return them as a stream of add, update, and remove notifications. Specify resourceVersion. | [optional] - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/V1ManagedFieldsEntry.md b/sdk/python/docs/V1ManagedFieldsEntry.md deleted file mode 100644 index 169a5f4cb5..0000000000 --- a/sdk/python/docs/V1ManagedFieldsEntry.md +++ /dev/null @@ -1,17 +0,0 @@ -# V1ManagedFieldsEntry - -ManagedFieldsEntry is a workflow-id, a FieldSet and the group version of the resource that the fieldset applies to. -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**api_version** | **str** | APIVersion defines the version of this resource that this field set applies to. The format is \"group/version\" just like the top-level APIVersion field. It is necessary to track the version of a field set because it cannot be automatically converted. | [optional] -**fields_type** | **str** | FieldsType is the discriminator for the different fields format and version. There is currently only one possible value: \"FieldsV1\" | [optional] -**fields_v1** | [**object**](.md) | FieldsV1 stores a set of fields in a data structure like a Trie, in JSON format. Each key is either a '.' representing the field itself, and will always map to an empty set, or a string representing a sub-field or item. The string will follow one of these four formats: 'f:<name>', where <name> is the name of a field in a struct, or key in a map 'v:<value>', where <value> is the exact json formatted value of a list item 'i:<index>', where <index> is position of a item in a list 'k:<keys>', where <keys> is a map of a list item's key fields to their unique values If a key maps to an empty Fields value, the field that key represents is part of the set. The exact format is defined in sigs.k8s.io/structured-merge-diff | [optional] -**manager** | **str** | Manager is an identifier of the workflow managing these fields. | [optional] -**operation** | **str** | Operation is the type of operation which lead to this ManagedFieldsEntry being created. The only valid values for this field are 'Apply' and 'Update'. | [optional] -**subresource** | **str** | Subresource is the name of the subresource used to update that object, or empty string if the object was updated through the main resource. The value of this field is used to distinguish between managers, even if they share the same name. For example, a status update will be distinct from a regular update using the same manager name. Note that the APIVersion field is not related to the Subresource field and it always corresponds to the version of the main resource. | [optional] -**time** | **datetime** | Time is a wrapper around time.Time which supports correct marshaling to YAML and JSON. Wrappers are provided for many of the factory methods that the time package offers. | [optional] - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/V1ObjectMeta.md b/sdk/python/docs/V1ObjectMeta.md deleted file mode 100644 index 9c23c5272e..0000000000 --- a/sdk/python/docs/V1ObjectMeta.md +++ /dev/null @@ -1,25 +0,0 @@ -# V1ObjectMeta - -ObjectMeta is metadata that all persisted resources must have, which includes all objects users must create. -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**annotations** | **dict(str, str)** | Annotations is an unstructured key value map stored with a resource that may be set by external tools to store and retrieve arbitrary metadata. They are not queryable and should be preserved when modifying objects. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations | [optional] -**creation_timestamp** | **datetime** | Time is a wrapper around time.Time which supports correct marshaling to YAML and JSON. Wrappers are provided for many of the factory methods that the time package offers. | [optional] -**deletion_grace_period_seconds** | **int** | Number of seconds allowed for this object to gracefully terminate before it will be removed from the system. Only set when deletionTimestamp is also set. May only be shortened. Read-only. | [optional] -**deletion_timestamp** | **datetime** | Time is a wrapper around time.Time which supports correct marshaling to YAML and JSON. Wrappers are provided for many of the factory methods that the time package offers. | [optional] -**finalizers** | **list[str]** | Must be empty before the object is deleted from the registry. Each entry is an identifier for the responsible component that will remove the entry from the list. If the deletionTimestamp of the object is non-nil, entries in this list can only be removed. Finalizers may be processed and removed in any order. Order is NOT enforced because it introduces significant risk of stuck finalizers. finalizers is a shared field, any actor with permission can reorder it. If the finalizer list is processed in order, then this can lead to a situation in which the component responsible for the first finalizer in the list is waiting for a signal (field value, external system, or other) produced by a component responsible for a finalizer later in the list, resulting in a deadlock. Without enforced ordering finalizers are free to order amongst themselves and are not vulnerable to ordering changes in the list. | [optional] -**generate_name** | **str** | GenerateName is an optional prefix, used by the server, to generate a unique name ONLY IF the Name field has not been provided. If this field is used, the name returned to the client will be different than the name passed. This value will also be combined with a unique suffix. The provided value has the same validation rules as the Name field, and may be truncated by the length of the suffix required to make the value unique on the server. If this field is specified and the generated name exists, the server will return a 409. Applied only if Name is not specified. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#idempotency | [optional] -**generation** | **int** | A sequence number representing a specific generation of the desired state. Populated by the system. Read-only. | [optional] -**labels** | **dict(str, str)** | Map of string keys and values that can be used to organize and categorize (scope and select) objects. May match selectors of replication controllers and services. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels | [optional] -**managed_fields** | [**list[V1ManagedFieldsEntry]**](V1ManagedFieldsEntry.md) | ManagedFields maps workflow-id and version to the set of fields that are managed by that workflow. This is mostly for internal housekeeping, and users typically shouldn't need to set or understand this field. A workflow can be the user's name, a controller's name, or the name of a specific apply path like \"ci-cd\". The set of fields is always in the version that the workflow used when modifying the object. | [optional] -**name** | **str** | Name must be unique within a namespace. Is required when creating resources, although some resources may allow a client to request the generation of an appropriate name automatically. Name is primarily intended for creation idempotence and configuration definition. Cannot be updated. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names#names | [optional] -**namespace** | **str** | Namespace defines the space within which each name must be unique. An empty namespace is equivalent to the \"default\" namespace, but \"default\" is the canonical representation. Not all objects are required to be scoped to a namespace - the value of this field for those objects will be empty. Must be a DNS_LABEL. Cannot be updated. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces | [optional] -**owner_references** | [**list[V1OwnerReference]**](V1OwnerReference.md) | List of objects depended by this object. If ALL objects in the list have been deleted, this object will be garbage collected. If this object is managed by a controller, then an entry in this list will point to this controller, with the controller field set to true. There cannot be more than one managing controller. | [optional] -**resource_version** | **str** | An opaque value that represents the internal version of this object that can be used by clients to determine when objects have changed. May be used for optimistic concurrency, change detection, and the watch operation on a resource or set of resources. Clients must treat these values as opaque and passed unmodified back to the server. They may only be valid for a particular resource or set of resources. Populated by the system. Read-only. Value must be treated as opaque by clients and . More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency | [optional] -**self_link** | **str** | Deprecated: selfLink is a legacy read-only field that is no longer populated by the system. | [optional] -**uid** | **str** | UID is the unique in time and space value for this object. It is typically generated by the server on successful creation of a resource and is not allowed to change on PUT operations. Populated by the system. Read-only. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names#uids | [optional] - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/V1OwnerReference.md b/sdk/python/docs/V1OwnerReference.md deleted file mode 100644 index 5ead5a960d..0000000000 --- a/sdk/python/docs/V1OwnerReference.md +++ /dev/null @@ -1,16 +0,0 @@ -# V1OwnerReference - -OwnerReference contains enough information to let you identify an owning object. An owning object must be in the same namespace as the dependent, or be cluster-scoped, so there is no namespace field. -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**api_version** | **str** | API version of the referent. | [default to ''] -**block_owner_deletion** | **bool** | If true, AND if the owner has the \"foregroundDeletion\" finalizer, then the owner cannot be deleted from the key-value store until this reference is removed. See https://kubernetes.io/docs/concepts/architecture/garbage-collection/#foreground-deletion for how the garbage collector interacts with this field and enforces the foreground deletion. Defaults to false. To set this field, a user needs \"delete\" permission of the owner, otherwise 422 (Unprocessable Entity) will be returned. | [optional] -**controller** | **bool** | If true, this reference points to the managing controller. | [optional] -**kind** | **str** | Kind of the referent. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds | [default to ''] -**name** | **str** | Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names#names | [default to ''] -**uid** | **str** | UID of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names#uids | [default to ''] - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/V1PartialObjectMetadata.md b/sdk/python/docs/V1PartialObjectMetadata.md deleted file mode 100644 index 68ef1281cd..0000000000 --- a/sdk/python/docs/V1PartialObjectMetadata.md +++ /dev/null @@ -1,13 +0,0 @@ -# V1PartialObjectMetadata - -PartialObjectMetadata is a generic representation of any object with ObjectMeta. It allows clients to get access to a particular ObjectMeta schema without knowing the details of the version. -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**api_version** | **str** | APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources | [optional] -**kind** | **str** | Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds | [optional] -**metadata** | [**V1ObjectMeta**](V1ObjectMeta.md) | | [optional] - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/V1PartialObjectMetadataList.md b/sdk/python/docs/V1PartialObjectMetadataList.md deleted file mode 100644 index b1813a2ce6..0000000000 --- a/sdk/python/docs/V1PartialObjectMetadataList.md +++ /dev/null @@ -1,14 +0,0 @@ -# V1PartialObjectMetadataList - -PartialObjectMetadataList contains a list of objects containing only their metadata -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**api_version** | **str** | APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources | [optional] -**items** | [**list[V1PartialObjectMetadata]**](V1PartialObjectMetadata.md) | items contains each of the included items. | -**kind** | **str** | Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds | [optional] -**metadata** | [**V1ListMeta**](V1ListMeta.md) | | [optional] - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/V1PatchOptions.md b/sdk/python/docs/V1PatchOptions.md deleted file mode 100644 index 22649242aa..0000000000 --- a/sdk/python/docs/V1PatchOptions.md +++ /dev/null @@ -1,16 +0,0 @@ -# V1PatchOptions - -PatchOptions may be provided when patching an API object. PatchOptions is meant to be a superset of UpdateOptions. -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**api_version** | **str** | APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources | [optional] -**dry_run** | **list[str]** | When present, indicates that modifications should not be persisted. An invalid or unrecognized dryRun directive will result in an error response and no further processing of the request. Valid values are: - All: all dry run stages will be processed | [optional] -**field_manager** | **str** | fieldManager is a name associated with the actor or entity that is making these changes. The value must be less than or 128 characters long, and only contain printable characters, as defined by https://golang.org/pkg/unicode/#IsPrint. This field is required for apply requests (application/apply-patch) but optional for non-apply patch types (JsonPatch, MergePatch, StrategicMergePatch). | [optional] -**field_validation** | **str** | fieldValidation instructs the server on how to handle objects in the request (POST/PUT/PATCH) containing unknown or duplicate fields. Valid values are: - Ignore: This will ignore any unknown fields that are silently dropped from the object, and will ignore all but the last duplicate field that the decoder encounters. This is the default behavior prior to v1.23. - Warn: This will send a warning via the standard warning response header for each unknown field that is dropped from the object, and for each duplicate field that is encountered. The request will still succeed if there are no other errors, and will only persist the last of any duplicate fields. This is the default in v1.23+ - Strict: This will fail the request with a BadRequest error if any unknown fields would be dropped from the object, or if any duplicate fields are present. The error returned from the server will contain all unknown and duplicate fields encountered. | [optional] -**force** | **bool** | Force is going to \"force\" Apply requests. It means user will re-acquire conflicting fields owned by other people. Force flag must be unset for non-apply patch requests. | [optional] -**kind** | **str** | Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds | [optional] - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/V1Preconditions.md b/sdk/python/docs/V1Preconditions.md deleted file mode 100644 index 2762345f38..0000000000 --- a/sdk/python/docs/V1Preconditions.md +++ /dev/null @@ -1,12 +0,0 @@ -# V1Preconditions - -Preconditions must be fulfilled before an operation (update, delete, etc.) is carried out. -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**resource_version** | **str** | Specifies the target ResourceVersion | [optional] -**uid** | **str** | Specifies the target UID. | [optional] - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/V1RootPaths.md b/sdk/python/docs/V1RootPaths.md deleted file mode 100644 index 422830b289..0000000000 --- a/sdk/python/docs/V1RootPaths.md +++ /dev/null @@ -1,11 +0,0 @@ -# V1RootPaths - -RootPaths lists the paths available at root. For example: \"/healthz\", \"/apis\". -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**paths** | **list[str]** | paths are the paths available at root. | - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/V1ServerAddressByClientCIDR.md b/sdk/python/docs/V1ServerAddressByClientCIDR.md deleted file mode 100644 index 76855a62cf..0000000000 --- a/sdk/python/docs/V1ServerAddressByClientCIDR.md +++ /dev/null @@ -1,12 +0,0 @@ -# V1ServerAddressByClientCIDR - -ServerAddressByClientCIDR helps the client to determine the server address that they should use, depending on the clientCIDR that they match. -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**client_cidr** | **str** | The CIDR with which clients can match their IP to figure out the server address that they should use. | [default to ''] -**server_address** | **str** | Address of this server, suitable for a client that matches the above CIDR. This can be a hostname, hostname:port, IP or IP:port. | [default to ''] - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/V1Status.md b/sdk/python/docs/V1Status.md deleted file mode 100644 index 86124ab428..0000000000 --- a/sdk/python/docs/V1Status.md +++ /dev/null @@ -1,18 +0,0 @@ -# V1Status - -Status is a return value for calls that don't return other objects. -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**api_version** | **str** | APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources | [optional] -**code** | **int** | Suggested HTTP return code for this status, 0 if not set. | [optional] -**details** | [**V1StatusDetails**](V1StatusDetails.md) | | [optional] -**kind** | **str** | Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds | [optional] -**message** | **str** | A human-readable description of the status of this operation. | [optional] -**metadata** | [**V1ListMeta**](V1ListMeta.md) | | [optional] -**reason** | **str** | A machine-readable description of why this operation is in the \"Failure\" status. If this value is empty there is no information available. A Reason clarifies an HTTP status code but does not override it. | [optional] -**status** | **str** | Status of the operation. One of: \"Success\" or \"Failure\". More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#spec-and-status | [optional] - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/V1StatusCause.md b/sdk/python/docs/V1StatusCause.md deleted file mode 100644 index 82cd99431c..0000000000 --- a/sdk/python/docs/V1StatusCause.md +++ /dev/null @@ -1,13 +0,0 @@ -# V1StatusCause - -StatusCause provides more information about an api.Status failure, including cases when multiple errors are encountered. -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**field** | **str** | The field of the resource that has caused this error, as named by its JSON serialization. May include dot and postfix notation for nested attributes. Arrays are zero-indexed. Fields may appear more than once in an array of causes due to fields having multiple errors. Optional. Examples: \"name\" - the field \"name\" on the current resource \"items[0].name\" - the field \"name\" on the first array entry in \"items\" | [optional] -**message** | **str** | A human-readable description of the cause of the error. This field may be presented as-is to a reader. | [optional] -**reason** | **str** | A machine-readable description of the cause of the error. If this value is empty there is no information available. | [optional] - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/V1StatusDetails.md b/sdk/python/docs/V1StatusDetails.md deleted file mode 100644 index 190933a56a..0000000000 --- a/sdk/python/docs/V1StatusDetails.md +++ /dev/null @@ -1,16 +0,0 @@ -# V1StatusDetails - -StatusDetails is a set of additional properties that MAY be set by the server to provide additional information about a response. The Reason field of a Status object defines what attributes will be set. Clients must ignore fields that do not match the defined type of each attribute, and should assume that any attribute may be empty, invalid, or under defined. -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**causes** | [**list[V1StatusCause]**](V1StatusCause.md) | The Causes array includes more details associated with the StatusReason failure. Not all StatusReasons may provide detailed causes. | [optional] -**group** | **str** | The group attribute of the resource associated with the status StatusReason. | [optional] -**kind** | **str** | The kind attribute of the resource associated with the status StatusReason. On some operations may differ from the requested resource Kind. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds | [optional] -**name** | **str** | The name attribute of the resource associated with the status StatusReason (when there is a single name which can be described). | [optional] -**retry_after_seconds** | **int** | If specified, the time in seconds before the operation should be retried. Some errors may indicate the client must take an alternate action - for those errors this field may indicate how long to wait before taking the alternate action. | [optional] -**uid** | **str** | UID of the resource. (when there is a single resource which can be described). More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names#uids | [optional] - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/V1Table.md b/sdk/python/docs/V1Table.md deleted file mode 100644 index fd770c8456..0000000000 --- a/sdk/python/docs/V1Table.md +++ /dev/null @@ -1,15 +0,0 @@ -# V1Table - -Table is a tabular representation of a set of API resources. The server transforms the object into a set of preferred columns for quickly reviewing the objects. -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**api_version** | **str** | APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources | [optional] -**column_definitions** | [**list[V1TableColumnDefinition]**](V1TableColumnDefinition.md) | columnDefinitions describes each column in the returned items array. The number of cells per row will always match the number of column definitions. | -**kind** | **str** | Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds | [optional] -**metadata** | [**V1ListMeta**](V1ListMeta.md) | | [optional] -**rows** | [**list[V1TableRow]**](V1TableRow.md) | rows is the list of items in the table. | - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/V1TableColumnDefinition.md b/sdk/python/docs/V1TableColumnDefinition.md deleted file mode 100644 index e2d9e0c901..0000000000 --- a/sdk/python/docs/V1TableColumnDefinition.md +++ /dev/null @@ -1,15 +0,0 @@ -# V1TableColumnDefinition - -TableColumnDefinition contains information about a column returned in the Table. -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**description** | **str** | description is a human readable description of this column. | [default to ''] -**format** | **str** | format is an optional OpenAPI type modifier for this column. A format modifies the type and imposes additional rules, like date or time formatting for a string. The 'name' format is applied to the primary identifier column which has type 'string' to assist in clients identifying column is the resource name. See https://github.com/OAI/OpenAPI-Specification/blob/master/versions/2.0.md#data-types for more. | [default to ''] -**name** | **str** | name is a human readable name for the column. | [default to ''] -**priority** | **int** | priority is an integer defining the relative importance of this column compared to others. Lower numbers are considered higher priority. Columns that may be omitted in limited space scenarios should be given a higher priority. | [default to 0] -**type** | **str** | type is an OpenAPI type definition for this column, such as number, integer, string, or array. See https://github.com/OAI/OpenAPI-Specification/blob/master/versions/2.0.md#data-types for more. | [default to ''] - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/V1TableOptions.md b/sdk/python/docs/V1TableOptions.md deleted file mode 100644 index c92b5827cd..0000000000 --- a/sdk/python/docs/V1TableOptions.md +++ /dev/null @@ -1,13 +0,0 @@ -# V1TableOptions - -TableOptions are used when a Table is requested by the caller. -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**api_version** | **str** | APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources | [optional] -**include_object** | **str** | includeObject decides whether to include each object along with its columnar information. Specifying \"None\" will return no object, specifying \"Object\" will return the full object contents, and specifying \"Metadata\" (the default) will return the object's metadata in the PartialObjectMetadata kind in version v1beta1 of the meta.k8s.io API group. | [optional] -**kind** | **str** | Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds | [optional] - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/V1TableRow.md b/sdk/python/docs/V1TableRow.md deleted file mode 100644 index 11f009744e..0000000000 --- a/sdk/python/docs/V1TableRow.md +++ /dev/null @@ -1,13 +0,0 @@ -# V1TableRow - -TableRow is an individual row in a table. -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**cells** | **list[object]** | cells will be as wide as the column definitions array and may contain strings, numbers (float64 or int64), booleans, simple maps, lists, or null. See the type field of the column definition for a more detailed description. | -**conditions** | [**list[V1TableRowCondition]**](V1TableRowCondition.md) | conditions describe additional status of a row that are relevant for a human user. These conditions apply to the row, not to the object, and will be specific to table output. The only defined condition type is 'Completed', for a row that indicates a resource that has run to completion and can be given less visual priority. | [optional] -**object** | [**object**](.md) | RawExtension is used to hold extensions in external versions. To use this, make a field which has RawExtension as its type in your external, versioned struct, and Object in your internal struct. You also need to register your various plugin types. // Internal package: type MyAPIObject struct { runtime.TypeMeta `json:\",inline\"` MyPlugin runtime.Object `json:\"myPlugin\"` } type PluginA struct { AOption string `json:\"aOption\"` } // External package: type MyAPIObject struct { runtime.TypeMeta `json:\",inline\"` MyPlugin runtime.RawExtension `json:\"myPlugin\"` } type PluginA struct { AOption string `json:\"aOption\"` } // On the wire, the JSON will look something like this: { \"kind\":\"MyAPIObject\", \"apiVersion\":\"v1\", \"myPlugin\": { \"kind\":\"PluginA\", \"aOption\":\"foo\", }, } So what happens? Decode first uses json or yaml to unmarshal the serialized data into your external MyAPIObject. That causes the raw JSON to be stored, but not unpacked. The next step is to copy (using pkg/conversion) into the internal struct. The runtime package's DefaultScheme has conversion functions installed which will unpack the JSON stored in RawExtension, turning it into the correct object type, and storing it in the Object. (TODO: In the case where the object is of an unknown type, a runtime.Unknown object will be created and stored.) | [optional] - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/V1TableRowCondition.md b/sdk/python/docs/V1TableRowCondition.md deleted file mode 100644 index 8e0b151840..0000000000 --- a/sdk/python/docs/V1TableRowCondition.md +++ /dev/null @@ -1,14 +0,0 @@ -# V1TableRowCondition - -TableRowCondition allows a row to be marked with additional information. -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**message** | **str** | Human readable message indicating details about last transition. | [optional] -**reason** | **str** | (brief) machine readable reason for the condition's last transition. | [optional] -**status** | **str** | Status of the condition, one of True, False, Unknown. | [default to ''] -**type** | **str** | Type of row condition. The only defined value is 'Completed' indicating that the object this row represents has reached a completed state and may be given less visual priority than other rows. Clients are not required to honor any conditions but should be consistent where possible about handling the conditions. | [default to ''] - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/V1Timestamp.md b/sdk/python/docs/V1Timestamp.md deleted file mode 100644 index 8ba8ec8acb..0000000000 --- a/sdk/python/docs/V1Timestamp.md +++ /dev/null @@ -1,12 +0,0 @@ -# V1Timestamp - -Timestamp is a struct that is equivalent to Time, but intended for protobuf marshalling/unmarshalling. It is generated into a serialization that matches Time. Do not use in Go structs. -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**nanos** | **int** | Non-negative fractions of a second at nanosecond resolution. Negative second values with fractions must still have non-negative nanos values that count forward in time. Must be from 0 to 999,999,999 inclusive. This field may be limited in precision depending on context. | [default to 0] -**seconds** | **int** | Represents seconds of UTC time since Unix epoch 1970-01-01T00:00:00Z. Must be from 0001-01-01T00:00:00Z to 9999-12-31T23:59:59Z inclusive. | [default to 0] - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/V1TypeMeta.md b/sdk/python/docs/V1TypeMeta.md deleted file mode 100644 index 590b64243b..0000000000 --- a/sdk/python/docs/V1TypeMeta.md +++ /dev/null @@ -1,12 +0,0 @@ -# V1TypeMeta - -TypeMeta describes an individual object in an API response or request with strings representing the type of the object and its API schema version. Structures that are versioned or persisted should inline TypeMeta. -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**api_version** | **str** | APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources | [optional] -**kind** | **str** | Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds | [optional] - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/V1UpdateOptions.md b/sdk/python/docs/V1UpdateOptions.md deleted file mode 100644 index 211f8c4dba..0000000000 --- a/sdk/python/docs/V1UpdateOptions.md +++ /dev/null @@ -1,15 +0,0 @@ -# V1UpdateOptions - -UpdateOptions may be provided when updating an API object. All fields in UpdateOptions should also be present in PatchOptions. -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**api_version** | **str** | APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources | [optional] -**dry_run** | **list[str]** | When present, indicates that modifications should not be persisted. An invalid or unrecognized dryRun directive will result in an error response and no further processing of the request. Valid values are: - All: all dry run stages will be processed | [optional] -**field_manager** | **str** | fieldManager is a name associated with the actor or entity that is making these changes. The value must be less than or 128 characters long, and only contain printable characters, as defined by https://golang.org/pkg/unicode/#IsPrint. | [optional] -**field_validation** | **str** | fieldValidation instructs the server on how to handle objects in the request (POST/PUT/PATCH) containing unknown or duplicate fields. Valid values are: - Ignore: This will ignore any unknown fields that are silently dropped from the object, and will ignore all but the last duplicate field that the decoder encounters. This is the default behavior prior to v1.23. - Warn: This will send a warning via the standard warning response header for each unknown field that is dropped from the object, and for each duplicate field that is encountered. The request will still succeed if there are no other errors, and will only persist the last of any duplicate fields. This is the default in v1.23+ - Strict: This will fail the request with a BadRequest error if any unknown fields would be dropped from the object, or if any duplicate fields are present. The error returned from the server will contain all unknown and duplicate fields encountered. | [optional] -**kind** | **str** | Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds | [optional] - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/V1WatchEvent.md b/sdk/python/docs/V1WatchEvent.md deleted file mode 100644 index 5eb5887284..0000000000 --- a/sdk/python/docs/V1WatchEvent.md +++ /dev/null @@ -1,12 +0,0 @@ -# V1WatchEvent - -Event represents a single event to a watched resource. -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**object** | [**object**](.md) | RawExtension is used to hold extensions in external versions. To use this, make a field which has RawExtension as its type in your external, versioned struct, and Object in your internal struct. You also need to register your various plugin types. // Internal package: type MyAPIObject struct { runtime.TypeMeta `json:\",inline\"` MyPlugin runtime.Object `json:\"myPlugin\"` } type PluginA struct { AOption string `json:\"aOption\"` } // External package: type MyAPIObject struct { runtime.TypeMeta `json:\",inline\"` MyPlugin runtime.RawExtension `json:\"myPlugin\"` } type PluginA struct { AOption string `json:\"aOption\"` } // On the wire, the JSON will look something like this: { \"kind\":\"MyAPIObject\", \"apiVersion\":\"v1\", \"myPlugin\": { \"kind\":\"PluginA\", \"aOption\":\"foo\", }, } So what happens? Decode first uses json or yaml to unmarshal the serialized data into your external MyAPIObject. That causes the raw JSON to be stored, but not unpacked. The next step is to copy (using pkg/conversion) into the internal struct. The runtime package's DefaultScheme has conversion functions installed which will unpack the JSON stored in RawExtension, turning it into the correct object type, and storing it in the Object. (TODO: In the case where the object is of an unknown type, a runtime.Unknown object will be created and stored.) | -**type** | **str** | | [default to ''] - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/VersionInfo.md b/sdk/python/docs/VersionInfo.md deleted file mode 100644 index 2ae7684577..0000000000 --- a/sdk/python/docs/VersionInfo.md +++ /dev/null @@ -1,19 +0,0 @@ -# VersionInfo - -Info contains versioning information. how we'll want to distribute that information. -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -**build_date** | **str** | | [default to ''] -**compiler** | **str** | | [default to ''] -**git_commit** | **str** | | [default to ''] -**git_tree_state** | **str** | | [default to ''] -**git_version** | **str** | | [default to ''] -**go_version** | **str** | | [default to ''] -**major** | **str** | | [default to ''] -**minor** | **str** | | [default to ''] -**platform** | **str** | | [default to ''] - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/kubeflow/__init__.py b/sdk/python/kubeflow/__init__.py deleted file mode 100644 index 69e3be50da..0000000000 --- a/sdk/python/kubeflow/__init__.py +++ /dev/null @@ -1 +0,0 @@ -__path__ = __import__('pkgutil').extend_path(__path__, __name__) diff --git a/sdk/python/kubeflow/storage_initializer/Dockerfile b/sdk/python/kubeflow/storage_initializer/Dockerfile deleted file mode 100644 index 75bd667c87..0000000000 --- a/sdk/python/kubeflow/storage_initializer/Dockerfile +++ /dev/null @@ -1,17 +0,0 @@ -# Use an official Python runtime as a parent image -FROM python:3.11 - -# Set the working directory in the container -WORKDIR /app - -# Copy the requirements.txt file into the container -COPY requirements.txt /app/requirements.txt - -# Install any needed packages specified in requirements.txt -RUN pip install --no-cache-dir -r requirements.txt - -# Copy the Python package and its source code into the container -COPY . /app/storage_initializer - -# Run storage.py when the container launches -ENTRYPOINT ["python", "-m", "storage_initializer.storage"] diff --git a/sdk/python/kubeflow/storage_initializer/__init__.py b/sdk/python/kubeflow/storage_initializer/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/sdk/python/kubeflow/storage_initializer/abstract_dataset_provider.py b/sdk/python/kubeflow/storage_initializer/abstract_dataset_provider.py deleted file mode 100644 index 3f75faf0a2..0000000000 --- a/sdk/python/kubeflow/storage_initializer/abstract_dataset_provider.py +++ /dev/null @@ -1,11 +0,0 @@ -from abc import ABC, abstractmethod - - -class datasetProvider(ABC): - @abstractmethod - def load_config(self): - pass - - @abstractmethod - def download_dataset(self): - pass diff --git a/sdk/python/kubeflow/storage_initializer/abstract_model_provider.py b/sdk/python/kubeflow/storage_initializer/abstract_model_provider.py deleted file mode 100644 index 392478a346..0000000000 --- a/sdk/python/kubeflow/storage_initializer/abstract_model_provider.py +++ /dev/null @@ -1,11 +0,0 @@ -from abc import ABC, abstractmethod - - -class modelProvider(ABC): - @abstractmethod - def load_config(self): - pass - - @abstractmethod - def download_model_and_tokenizer(self): - pass diff --git a/sdk/python/kubeflow/storage_initializer/constants.py b/sdk/python/kubeflow/storage_initializer/constants.py deleted file mode 100644 index 19b36523de..0000000000 --- a/sdk/python/kubeflow/storage_initializer/constants.py +++ /dev/null @@ -1,3 +0,0 @@ -INIT_CONTAINER_MOUNT_PATH = "/workspace" -VOLUME_PATH_DATASET = INIT_CONTAINER_MOUNT_PATH + "/dataset" -VOLUME_PATH_MODEL = INIT_CONTAINER_MOUNT_PATH + "/model" diff --git a/sdk/python/kubeflow/storage_initializer/hugging_face.py b/sdk/python/kubeflow/storage_initializer/hugging_face.py deleted file mode 100644 index bb6eb6a1c0..0000000000 --- a/sdk/python/kubeflow/storage_initializer/hugging_face.py +++ /dev/null @@ -1,107 +0,0 @@ -import json -import logging -from dataclasses import dataclass, field -from typing import Optional, Union -from urllib.parse import urlparse - -import transformers -from peft import LoraConfig - -from .abstract_dataset_provider import datasetProvider -from .abstract_model_provider import modelProvider -from .constants import VOLUME_PATH_DATASET, VOLUME_PATH_MODEL - -TRANSFORMER_TYPES = Union[ - transformers.AutoModelForSequenceClassification, - transformers.AutoModelForTokenClassification, - transformers.AutoModelForQuestionAnswering, - transformers.AutoModelForCausalLM, - transformers.AutoModelForMaskedLM, - transformers.AutoModelForImageClassification, -] - - -# Configure logger. -log_formatter = logging.Formatter( - "%(asctime)s %(levelname)-8s %(message)s", "%Y-%m-%dT%H:%M:%SZ" -) -logger = logging.getLogger(__file__) -console_handler = logging.StreamHandler() -console_handler.setFormatter(log_formatter) -logger.addHandler(console_handler) -logger.setLevel(logging.INFO) - - -@dataclass -class HuggingFaceModelParams: - model_uri: str - transformer_type: TRANSFORMER_TYPES - access_token: str = None - num_labels: Optional[int] = None - - def __post_init__(self): - # Custom checks or validations can be added here - if self.model_uri == "" or self.model_uri is None: - raise ValueError("model_uri cannot be empty.") - - -@dataclass -class HuggingFaceTrainerParams: - training_parameters: transformers.TrainingArguments = field( - default_factory=transformers.TrainingArguments - ) - lora_config: LoraConfig = field(default_factory=LoraConfig) - - -class HuggingFace(modelProvider): - def load_config(self, serialised_args): - # implementation for loading the config - self.config = HuggingFaceModelParams(**json.loads(serialised_args)) - - def download_model_and_tokenizer(self): - # implementation for downloading the model - logger.info("Downloading model") - logger.info("-" * 40) - transformer_type_class = getattr(transformers, self.config.transformer_type) - parsed_uri = urlparse(self.config.model_uri) - self.model = parsed_uri.netloc + parsed_uri.path - transformer_type_class.from_pretrained( - self.model, - token=self.config.access_token, - cache_dir=VOLUME_PATH_MODEL, - trust_remote_code=True, - ) - transformers.AutoTokenizer.from_pretrained( - self.model, cache_dir=VOLUME_PATH_MODEL - ) - - -@dataclass -class HuggingFaceDatasetParams: - repo_id: str - access_token: Optional[str] = None - # TODO (andreyvelich): Discuss where we should specify dataset preprocess parameters. - split: Optional[str] = None - - def __post_init__(self): - # Custom checks or validations can be added here - if self.repo_id == "" or self.repo_id is None: - raise ValueError("repo_id is None") - - -class HuggingFaceDataset(datasetProvider): - def load_config(self, serialised_args): - self.config = HuggingFaceDatasetParams(**json.loads(serialised_args)) - - def download_dataset(self): - logger.info("Downloading dataset") - logger.info("-" * 40) - import huggingface_hub - from datasets import load_dataset - - if self.config.access_token: - huggingface_hub.login(self.config.access_token) - - # Load dataset and save to disk. - dataset = load_dataset(self.config.repo_id, split=self.config.split) - dataset.save_to_disk(VOLUME_PATH_DATASET) diff --git a/sdk/python/kubeflow/storage_initializer/requirements.txt b/sdk/python/kubeflow/storage_initializer/requirements.txt deleted file mode 100644 index 4aa157c00c..0000000000 --- a/sdk/python/kubeflow/storage_initializer/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -peft==0.3.0 -datasets==2.21.0 -transformers==4.38.0 -boto3==1.33.9 diff --git a/sdk/python/kubeflow/storage_initializer/s3.py b/sdk/python/kubeflow/storage_initializer/s3.py deleted file mode 100644 index 4015eb568d..0000000000 --- a/sdk/python/kubeflow/storage_initializer/s3.py +++ /dev/null @@ -1,73 +0,0 @@ -import json -import os -from dataclasses import dataclass -from urllib.parse import urlparse - -from .abstract_dataset_provider import datasetProvider -from .constants import VOLUME_PATH_DATASET - - -@dataclass -class S3DatasetParams: - endpoint_url: str - bucket_name: str - file_key: str - region_name: str = None - access_key: str = None - secret_key: str = None - - def is_valid_url(self, url): - try: - parsed_url = urlparse(url) - print(parsed_url) - return all([parsed_url.scheme, parsed_url.netloc]) - except ValueError: - return False - - def __post_init__(self): - # Custom checks or validations can be added here - if ( - self.bucket_name is None - or self.endpoint_url is None - or self.file_key is None - ): - raise ValueError("bucket_name or endpoint_url or file_key is None") - self.is_valid_url(self.endpoint_url) - - -class S3(datasetProvider): - def load_config(self, serialised_args): - self.config = S3DatasetParams(**json.loads(serialised_args)) - - def download_dataset(self): - import boto3 - - # Create an S3 client for Nutanix Object Store/S3 - s3_client = boto3.Session( - aws_access_key_id=self.config.access_key, - aws_secret_access_key=self.config.secret_key, - region_name=self.config.region_name, - ) - s3_resource = s3_client.resource("s3", endpoint_url=self.config.endpoint_url) - # Get the bucket object - bucket = s3_resource.Bucket(self.config.bucket_name) - - # Filter objects with the specified prefix - objects = bucket.objects.filter(Prefix=self.config.file_key) - # Iterate over filtered objects - for obj in objects: - # Extract the object key (filename) - obj_key = obj.key - path_components = obj_key.split(os.path.sep) - path_excluded_first_last_parts = os.path.sep.join(path_components[1:-1]) - - # Create directories if they don't exist - os.makedirs( - os.path.join(VOLUME_PATH_DATASET, path_excluded_first_last_parts), - exist_ok=True, - ) - - # Download the file - file_path = os.path.sep.join(path_components[1:]) - bucket.download_file(obj_key, os.path.join(VOLUME_PATH_DATASET, file_path)) - print("Files downloaded") diff --git a/sdk/python/kubeflow/storage_initializer/storage.py b/sdk/python/kubeflow/storage_initializer/storage.py deleted file mode 100644 index b753379522..0000000000 --- a/sdk/python/kubeflow/storage_initializer/storage.py +++ /dev/null @@ -1,51 +0,0 @@ -import argparse - -from .hugging_face import HuggingFace, HuggingFaceDataset -from .s3 import S3 - - -def model_factory(model_provider, model_provider_parameters): - match model_provider: - case "hf": - hf = HuggingFace() - hf.load_config(model_provider_parameters) - hf.download_model_and_tokenizer() - case _: - return "This is the default case" - - -def dataset_factory(dataset_provider, dataset_provider_parameters): - match dataset_provider: - case "s3": - s3 = S3() - s3.load_config(dataset_provider_parameters) - s3.download_dataset() - case "hf": - hf = HuggingFaceDataset() - hf.load_config(dataset_provider_parameters) - hf.download_dataset() - case _: - return "This is the default case" - - -if __name__ == "__main__": - parser = argparse.ArgumentParser( - description="script for downloading model and datasets to PVC." - ) - parser.add_argument("--model_provider", type=str, help="name of model provider") - parser.add_argument( - "--model_provider_parameters", - type=str, - help="model provider serialised arguments", - ) - - parser.add_argument("--dataset_provider", type=str, help="name of dataset provider") - parser.add_argument( - "--dataset_provider_parameters", - type=str, - help="dataset provider serialized arguments", - ) - args = parser.parse_args() - - model_factory(args.model_provider, args.model_provider_parameters) - dataset_factory(args.dataset_provider, args.dataset_provider_parameters) diff --git a/sdk/python/kubeflow/trainer/Dockerfile b/sdk/python/kubeflow/trainer/Dockerfile deleted file mode 100644 index 6b98e3de31..0000000000 --- a/sdk/python/kubeflow/trainer/Dockerfile +++ /dev/null @@ -1,17 +0,0 @@ -# Use an official Pytorch runtime as a parent image -FROM nvcr.io/nvidia/pytorch:24.06-py3 - -# Set the working directory in the container -WORKDIR /app - -# Copy the requirements.txt file into the container -COPY requirements.txt /app/requirements.txt - -# Install any needed packages specified in requirements.txt -RUN pip install --no-cache-dir -r requirements.txt - -# Copy the Python package and its source code into the container -COPY . /app - -# Run storage.py when the container launches -ENTRYPOINT ["torchrun", "hf_llm_training.py"] diff --git a/sdk/python/kubeflow/trainer/Dockerfile.cpu b/sdk/python/kubeflow/trainer/Dockerfile.cpu deleted file mode 100644 index 20ae09ceb2..0000000000 --- a/sdk/python/kubeflow/trainer/Dockerfile.cpu +++ /dev/null @@ -1,17 +0,0 @@ -# Use an official Python runtime as a parent image -FROM python:3.11 - -# Set the working directory in the container -WORKDIR /app - -# Copy the requirements.txt file into the container -COPY requirements.txt /app/requirements.txt - -# Install any needed packages specified in requirements.txt -RUN pip install --no-cache-dir -r requirements.txt - -# Copy the Python package and its source code into the container -COPY . /app - -# Run storage.py when the container launches -ENTRYPOINT ["torchrun", "hf_llm_training.py"] diff --git a/sdk/python/kubeflow/trainer/hf_llm_training.py b/sdk/python/kubeflow/trainer/hf_llm_training.py deleted file mode 100644 index a79445bae2..0000000000 --- a/sdk/python/kubeflow/trainer/hf_llm_training.py +++ /dev/null @@ -1,205 +0,0 @@ -import argparse -import json -import logging -import os -from urllib.parse import urlparse - -import transformers -from datasets import Dataset, load_from_disk -from datasets.distributed import split_dataset_by_node -from peft import LoraConfig, get_peft_model -from transformers import ( - AutoModelForCausalLM, - AutoModelForImageClassification, - AutoTokenizer, - DataCollatorForLanguageModeling, - Trainer, - TrainingArguments, -) - -# Configure logger. -log_formatter = logging.Formatter( - "%(asctime)s %(levelname)-8s %(message)s", "%Y-%m-%dT%H:%M:%SZ" -) -logger = logging.getLogger(__file__) -console_handler = logging.StreamHandler() -console_handler.setFormatter(log_formatter) -logger.addHandler(console_handler) -logger.setLevel(logging.INFO) - - -def setup_model_and_tokenizer(model_uri, transformer_type, model_dir, num_labels): - # Set up the model and tokenizer - parsed_uri = urlparse(model_uri) - model_name = parsed_uri.netloc + parsed_uri.path - - if num_labels != "None": - model = transformer_type.from_pretrained( - pretrained_model_name_or_path=model_name, - cache_dir=model_dir, - local_files_only=True, - trust_remote_code=True, - num_labels=int(num_labels), - ) - else: - model = transformer_type.from_pretrained( - pretrained_model_name_or_path=model_name, - cache_dir=model_dir, - local_files_only=True, - trust_remote_code=True, - ) - - tokenizer = AutoTokenizer.from_pretrained( - pretrained_model_name_or_path=model_name, - cache_dir=model_dir, - local_files_only=True, - ) - - # Freeze model parameters - for param in model.parameters(): - param.requires_grad = False - - return model, tokenizer - - -def load_and_preprocess_data(dataset_dir, transformer_type, tokenizer): - # Load and preprocess the dataset - logger.info("Load and preprocess dataset") - - if transformer_type != AutoModelForImageClassification: - dataset = load_from_disk(dataset_dir) - - logger.info(f"Dataset specification: {dataset}") - logger.info("-" * 40) - - logger.info("Tokenize dataset") - # TODO (andreyvelich): Discuss how user should set the tokenizer function. - dataset = dataset.map( - lambda x: tokenizer(x["text"], padding="max_length", truncation=True), - batched=True, - ) - else: - dataset = load_from_disk(dataset_dir) - - # Check if dataset contains `train` key. Otherwise, load full dataset to train_data. - if "train" in dataset: - train_data = dataset["train"] - else: - train_data = dataset - - try: - eval_data = dataset["eval"] - except Exception: - eval_data = None - logger.info("Evaluation dataset is not found") - - # Distribute dataset across PyTorchJob workers. - RANK = int(os.environ["RANK"]) - WORLD_SIZE = int(os.environ["WORLD_SIZE"]) - logger.info( - f"Distributed dataset across PyTorchJob workers. WORLD_SIZE: {WORLD_SIZE}, RANK: {RANK}" - ) - if isinstance(train_data, Dataset): - train_data = split_dataset_by_node( - train_data, - rank=RANK, - world_size=WORLD_SIZE, - ) - if isinstance(eval_data, Dataset): - eval_data = split_dataset_by_node( - eval_data, - rank=RANK, - world_size=WORLD_SIZE, - ) - - return train_data, eval_data - - -def setup_peft_model(model, lora_config): - # Set up the PEFT model - lora_config = LoraConfig(**json.loads(lora_config)) - reference_lora_config = LoraConfig() - for key, val in lora_config.__dict__.items(): - old_attr = getattr(reference_lora_config, key, None) - if old_attr is not None: - val = type(old_attr)(val) - setattr(lora_config, key, val) - - model.enable_input_require_grads() - model = get_peft_model(model, lora_config) - return model - - -def train_model(model, transformer_type, train_data, eval_data, tokenizer, train_args): - # Setup the Trainer. - trainer = Trainer( - model=model, - train_dataset=train_data, - eval_dataset=eval_data, - args=train_args, - ) - - # TODO (andreyvelich): Currently, data collator is supported only for casual LM Transformer. - if transformer_type == AutoModelForCausalLM: - logger.info("Add data collector for language modeling") - logger.info("-" * 40) - trainer.data_collator = DataCollatorForLanguageModeling( - tokenizer, - pad_to_multiple_of=8, - mlm=False, - ) - - # Train the model. - trainer.train() - - -def parse_arguments(): - parser = argparse.ArgumentParser( - description="Script for training a model with PEFT configuration." - ) - - parser.add_argument("--model_uri", help="model uri") - parser.add_argument("--transformer_type", help="model transformer type") - parser.add_argument("--num_labels", default="None", help="number of classes") - parser.add_argument("--model_dir", help="directory containing model") - parser.add_argument("--dataset_dir", help="directory containing dataset") - parser.add_argument("--lora_config", help="lora_config") - parser.add_argument( - "--training_parameters", help="hugging face training parameters" - ) - - return parser.parse_args() - - -if __name__ == "__main__": - logger.info("Starting HuggingFace LLM Trainer") - args = parse_arguments() - train_args = TrainingArguments(**json.loads(args.training_parameters)) - reference_train_args = transformers.TrainingArguments( - output_dir=train_args.output_dir - ) - for key, val in train_args.to_dict().items(): - old_attr = getattr(reference_train_args, key, None) - if old_attr is not None: - val = type(old_attr)(val) - setattr(train_args, key, val) - - transformer_type = getattr(transformers, args.transformer_type) - - logger.info("Setup model and tokenizer") - model, tokenizer = setup_model_and_tokenizer( - args.model_uri, transformer_type, args.model_dir, args.num_labels - ) - - logger.info("Preprocess dataset") - train_data, eval_data = load_and_preprocess_data( - args.dataset_dir, transformer_type, tokenizer - ) - - logger.info("Setup LoRA config for model") - model = setup_peft_model(model, args.lora_config) - - logger.info("Start model training") - train_model(model, transformer_type, train_data, eval_data, tokenizer, train_args) - - logger.info("Training is complete") diff --git a/sdk/python/kubeflow/trainer/requirements.txt b/sdk/python/kubeflow/trainer/requirements.txt deleted file mode 100644 index f820ccddc8..0000000000 --- a/sdk/python/kubeflow/trainer/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -peft==0.3.0 -datasets==2.21.0 -transformers==4.38.0 -accelerate==0.28.0 diff --git a/sdk/python/kubeflow/training/__init__.py b/sdk/python/kubeflow/training/__init__.py deleted file mode 100644 index 004f5cb009..0000000000 --- a/sdk/python/kubeflow/training/__init__.py +++ /dev/null @@ -1,59 +0,0 @@ -# coding: utf-8 - -# flake8: noqa - -""" - Kubeflow Training SDK - - Python SDK for Kubeflow Training # noqa: E501 - - The version of the OpenAPI document: v1.7.0 - Generated by: https://openapi-generator.tech -""" - - -from __future__ import absolute_import - -__version__ = "1.7.0" - -# import apis into sdk package - -# import ApiClient -from kubeflow.training.api_client import ApiClient -from kubeflow.training.configuration import Configuration -from kubeflow.training.exceptions import OpenApiException -from kubeflow.training.exceptions import ApiTypeError -from kubeflow.training.exceptions import ApiValueError -from kubeflow.training.exceptions import ApiKeyError -from kubeflow.training.exceptions import ApiException -# import models into sdk package -from kubeflow.training.models.kubeflow_org_v1_elastic_policy import KubeflowOrgV1ElasticPolicy -from kubeflow.training.models.kubeflow_org_v1_jax_job import KubeflowOrgV1JAXJob -from kubeflow.training.models.kubeflow_org_v1_jax_job_list import KubeflowOrgV1JAXJobList -from kubeflow.training.models.kubeflow_org_v1_jax_job_spec import KubeflowOrgV1JAXJobSpec -from kubeflow.training.models.kubeflow_org_v1_job_condition import KubeflowOrgV1JobCondition -from kubeflow.training.models.kubeflow_org_v1_job_status import KubeflowOrgV1JobStatus -from kubeflow.training.models.kubeflow_org_v1_mpi_job import KubeflowOrgV1MPIJob -from kubeflow.training.models.kubeflow_org_v1_mpi_job_list import KubeflowOrgV1MPIJobList -from kubeflow.training.models.kubeflow_org_v1_mpi_job_spec import KubeflowOrgV1MPIJobSpec -from kubeflow.training.models.kubeflow_org_v1_paddle_elastic_policy import KubeflowOrgV1PaddleElasticPolicy -from kubeflow.training.models.kubeflow_org_v1_paddle_job import KubeflowOrgV1PaddleJob -from kubeflow.training.models.kubeflow_org_v1_paddle_job_list import KubeflowOrgV1PaddleJobList -from kubeflow.training.models.kubeflow_org_v1_paddle_job_spec import KubeflowOrgV1PaddleJobSpec -from kubeflow.training.models.kubeflow_org_v1_py_torch_job import KubeflowOrgV1PyTorchJob -from kubeflow.training.models.kubeflow_org_v1_py_torch_job_list import KubeflowOrgV1PyTorchJobList -from kubeflow.training.models.kubeflow_org_v1_py_torch_job_spec import KubeflowOrgV1PyTorchJobSpec -from kubeflow.training.models.kubeflow_org_v1_rdzv_conf import KubeflowOrgV1RDZVConf -from kubeflow.training.models.kubeflow_org_v1_replica_spec import KubeflowOrgV1ReplicaSpec -from kubeflow.training.models.kubeflow_org_v1_replica_status import KubeflowOrgV1ReplicaStatus -from kubeflow.training.models.kubeflow_org_v1_run_policy import KubeflowOrgV1RunPolicy -from kubeflow.training.models.kubeflow_org_v1_scheduling_policy import KubeflowOrgV1SchedulingPolicy -from kubeflow.training.models.kubeflow_org_v1_tf_job import KubeflowOrgV1TFJob -from kubeflow.training.models.kubeflow_org_v1_tf_job_list import KubeflowOrgV1TFJobList -from kubeflow.training.models.kubeflow_org_v1_tf_job_spec import KubeflowOrgV1TFJobSpec -from kubeflow.training.models.kubeflow_org_v1_xg_boost_job import KubeflowOrgV1XGBoostJob -from kubeflow.training.models.kubeflow_org_v1_xg_boost_job_list import KubeflowOrgV1XGBoostJobList -from kubeflow.training.models.kubeflow_org_v1_xg_boost_job_spec import KubeflowOrgV1XGBoostJobSpec - -from kubeflow.training.api.training_client import TrainingClient -from kubeflow.training.constants import constants diff --git a/sdk/python/kubeflow/training/api/__init__.py b/sdk/python/kubeflow/training/api/__init__.py deleted file mode 100644 index 36dce7fe22..0000000000 --- a/sdk/python/kubeflow/training/api/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -from __future__ import absolute_import - -# flake8: noqa - -# import apis into api package diff --git a/sdk/python/kubeflow/training/api/training_client.py b/sdk/python/kubeflow/training/api/training_client.py deleted file mode 100644 index 901a9e9028..0000000000 --- a/sdk/python/kubeflow/training/api/training_client.py +++ /dev/null @@ -1,1418 +0,0 @@ -# Copyright 2023 The Kubeflow Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import json -import logging -import multiprocessing -import queue -import time -from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union - -from kubeflow.storage_initializer.constants import ( - VOLUME_PATH_DATASET, - VOLUME_PATH_MODEL, -) -from kubeflow.training import models -from kubeflow.training.api_client import ApiClient -from kubeflow.training.constants import constants -from kubeflow.training.utils import utils -from kubernetes import client, config, watch - -logger = logging.getLogger(__name__) - -status_logger = utils.StatusLogger( - header="{:<30.30} {:<20.20} {}".format("NAME", "STATE", "TIME"), - column_format="{:<30.30} {:<20.20} {}", -) - - -class TrainingClient(object): - def __init__( - self, - config_file: Optional[str] = None, - context: Optional[str] = None, - client_configuration: Optional[client.Configuration] = None, - namespace: str = utils.get_default_target_namespace(), - job_kind: str = constants.PYTORCHJOB_KIND, - ): - """TrainingClient constructor. Configure logging in your application - as follows to see detailed information from the TrainingClient APIs: - .. code-block:: python - import logging - logging.basicConfig() - log = logging.getLogger("kubeflow.training.api.training_client") - log.setLevel(logging.DEBUG) - - Args: - config_file: Path to the kube-config file. Defaults to ~/.kube/config. - context: Set the active context. Defaults to current_context from the kube-config. - client_configuration: Client configuration for cluster authentication. - You have to provide valid configuration with Bearer token or - with username and password. You can find an example here: - https://github.com/kubernetes-client/python/blob/67f9c7a97081b4526470cad53576bc3b71fa6fcc/examples/remote_cluster.py#L31 - namespace: Target Kubernetes namespace. By default it takes namespace - from `/var/run/secrets/kubernetes.io/serviceaccount/namespace` location - or set as `default`. Namespace can be overridden during method invocations. - job_kind: Target Training Job kind (e.g. `TFJob`, `PyTorchJob`, `MPIJob`). - Job kind can be overridden during method invocations. - The default Job kind is `PyTorchJob`. - - Raises: - ValueError: Job kind is invalid. - """ - - # If client configuration is not set, use kube-config to access Kubernetes APIs. - if client_configuration is None: - # Load kube-config or in-cluster config. - if config_file or not utils.is_running_in_k8s(): - config.load_kube_config(config_file=config_file, context=context) - else: - config.load_incluster_config() - - k8s_client = client.ApiClient(client_configuration) - self.custom_api = client.CustomObjectsApi(k8s_client) - self.core_api = client.CoreV1Api(k8s_client) - self.api_client = ApiClient() - - self.namespace = namespace - if job_kind not in constants.JOB_PARAMETERS: - raise ValueError( - f"Job kind must be one of these: {list(constants.JOB_PARAMETERS.keys())}" - ) - self.job_kind = job_kind - - def train( - self, - name: str, - namespace: Optional[str] = None, - num_workers: int = 1, - num_procs_per_worker: int = 1, - resources_per_worker: Union[dict, client.V1ResourceRequirements, None] = None, - model_provider_parameters=None, - dataset_provider_parameters=None, - trainer_parameters=None, - init_env_vars: Optional[ - Union[Dict[str, str], List[Union[models.V1EnvVar, models.V1EnvVar]]] - ] = None, - env_vars: Optional[ - Union[Dict[str, str], List[Union[models.V1EnvVar, models.V1EnvVar]]] - ] = None, - storage_config: Dict[str, Optional[Union[str, List[str]]]] = { - "size": constants.PVC_DEFAULT_SIZE, - "storage_class": None, - "access_modes": constants.PVC_DEFAULT_ACCESS_MODES, - }, - ): - """High level API to fine-tune LLMs with distributed PyTorchJob. Follow this guide - for more information about this feature: TODO (andreyvelich): Add link. - - It uses the pre-created Storage Initializer to download pre-trained model and dataset, and - Trainer to fine-tune LLM. Your cluster should support PVC with ReadOnlyMany access mode - to distribute data across PyTorchJob workers. - - It uses `torchrun` CLI to fine-tune model in distributed mode with multiple PyTorchJob - workers. Follow this guide to know more about `torchrun` CLI: - https://pytorch.org/docs/stable/elastic/run.html - - This feature is in alpha stage and Kubeflow community is looking for your feedback. - Please use #kubeflow-training Slack channel or Kubeflow Training Operator GitHub - for your questions or suggestions. - - Args: - name: Name of the PyTorchJob. - namespace: Namespace for the PyTorchJob. By default namespace is taken from - `TrainingClient` object. - num_workers: Number of PyTorchJob workers. - num_procs_per_worker: Number of processes per PyTorchJob worker for `torchrun` CLI. You - should use this parameter if you want to use more than 1 GPU per PyTorchJob worker. - resources_per_worker: A parameter that lets you specify how much - resources each PyTorchJob worker container should have. You can either specify a - kubernetes.client.V1ResourceRequirements object (documented here: - https://github.com/kubernetes-client/python/blob/master/kubernetes/docs/V1ResourceRequirements.md) - or a dictionary that includes one or more of the following keys: - `cpu`, `memory`, or `gpu` (other keys will be ignored). Appropriate - values for these keys are documented here: - https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/. - For example: - ``` - { - "cpu": "1", - "memory": "2Gi", - "gpu": "1", - } - ``` - Please note, `gpu` specifies a resource request with a key of - `nvidia.com/gpu`, i.e. an NVIDIA GPU. If you need a different type - of GPU, pass in a V1ResourceRequirement instance instead, since it's - more flexible. This parameter is optional and defaults to None. - model_provider_parameters: Parameters for the model provider in the Storage Initializer. - For example, HuggingFace model name and Transformer type for that model, like: - AutoModelForSequenceClassification. This argument must be the type of - `kubeflow.storage_initializer.hugging_face.HuggingFaceModelParams` - dataset_provider_parameters: Parameters for the dataset provider in the - Storage Initializer. For example, name of the HuggingFace dataset or - AWS S3 configuration. This argument must be the type of - `kubeflow.storage_initializer.hugging_face.HuggingFaceDatasetParams` or - `kubeflow.storage_initializer.s3.S3DatasetParams` - trainer_parameters: Parameters for LLM Trainer that will fine-tune pre-trained model - with the given dataset. For example, LoRA config for parameter-efficient fine-tuning - and HuggingFace training arguments like optimizer or number of training epochs. - This argument must be the type of - `kubeflow.storage_initializer.HuggingFaceTrainerParams` - init_env_vars: Environment variable(s) to be attached to init container. - You can specify a dictionary as a mapping object representing the environment - variables. Otherwise, you can specify a list, in which the element can either - be a kubernetes.client.models.V1EnvVar (documented here: - https://github.com/kubernetes-client/python/blob/master/kubernetes/docs/V1EnvVar.md) - or a kubernetes.client.models.V1EnvFromSource (documented here: - https://github.com/kubernetes-client/python/blob/master/kubernetes/docs/V1EnvFromSource.md) - env_vars: Environment variable(s) to be attached to training container. - You can specify a dictionary as a mapping object representing the environment - variables. Otherwise, you can specify a list, in which the element can either - be a kubernetes.client.models.V1EnvVar (documented here: - https://github.com/kubernetes-client/python/blob/master/kubernetes/docs/V1EnvVar.md) - or a kubernetes.client.models.V1EnvFromSource (documented here: - https://github.com/kubernetes-client/python/blob/master/kubernetes/docs/V1EnvFromSource.md) - storage_config: Configuration for Storage Initializer PVC to download pre-trained model - and dataset. You can configure PVC size and storage class name in this argument. - """ - try: - import peft # noqa: F401 - import transformers # noqa: F401 - except ImportError: - raise ImportError( - "Train API dependencies not installed. " - + "Run: pip install -U 'kubeflow-training[huggingface]' " - ) - - # fmt: off - - from kubeflow.storage_initializer.hugging_face import ( - HuggingFaceDatasetParams, - HuggingFaceModelParams, - ) - from kubeflow.storage_initializer.s3 import S3DatasetParams - - # fmt: on - - print( - "Thank you for using `train` API for LLMs fine-tuning. This feature is in alpha stage " - "Kubeflow community is looking for your feedback. Please share your experience " - "via #kubeflow-training Slack channel or Kubeflow Training Operator GitHub." - ) - - if ( - not name - or not model_provider_parameters - or not dataset_provider_parameters - or not trainer_parameters - ): - raise ValueError("One of the required parameters is None") - - namespace = namespace or self.namespace - - # TODO (andreyvelich): PVC Creation should be part of Training Operator Controller. - # Ref issue: https://github.com/kubeflow/training-operator/issues/1971 - try: - self.core_api.create_namespaced_persistent_volume_claim( - namespace=namespace, - body=utils.get_pvc_spec( - pvc_name=name, - namespace=namespace, - storage_config=storage_config, - ), - ) - except Exception as e: - pvc_list = self.core_api.list_namespaced_persistent_volume_claim(namespace) - # Check if the PVC with the specified name exists - for pvc in pvc_list.items: - if pvc.metadata.name == name: - print(f"PVC '{name}' already exists in namespace " f"{namespace}.") - break - else: - raise RuntimeError(f"failed to create PVC. Error: {e}") - - if isinstance(model_provider_parameters, HuggingFaceModelParams): - mp = "hf" - else: - raise ValueError( - f"Invalid model provider parameters {model_provider_parameters}" - ) - - if isinstance(dataset_provider_parameters, S3DatasetParams): - dp = "s3" - elif isinstance(dataset_provider_parameters, HuggingFaceDatasetParams): - dp = "hf" - else: - raise ValueError( - f"Invalid dataset provider parameters {dataset_provider_parameters}" - ) - - # create init container spec - init_container_spec = utils.get_container_spec( - name=constants.STORAGE_INITIALIZER, - base_image=constants.STORAGE_INITIALIZER_IMAGE, - args=[ - "--model_provider", - mp, - "--model_provider_parameters", - json.dumps(model_provider_parameters.__dict__, cls=utils.SetEncoder), - "--dataset_provider", - dp, - "--dataset_provider_parameters", - json.dumps(dataset_provider_parameters.__dict__), - ], - volume_mounts=[constants.STORAGE_INITIALIZER_VOLUME_MOUNT], - env_vars=init_env_vars, - ) - - # create app container spec - container_spec = utils.get_container_spec( - name=constants.JOB_PARAMETERS[constants.PYTORCHJOB_KIND]["container"], - base_image=constants.TRAINER_TRANSFORMER_IMAGE, - args=[ - "--model_uri", - model_provider_parameters.model_uri, - "--transformer_type", - model_provider_parameters.transformer_type.__name__, - "--num_labels", - str(model_provider_parameters.num_labels), - "--model_dir", - VOLUME_PATH_MODEL, - "--dataset_dir", - VOLUME_PATH_DATASET, - "--lora_config", - json.dumps( - trainer_parameters.lora_config.__dict__, cls=utils.SetEncoder - ), - "--training_parameters", - json.dumps(trainer_parameters.training_parameters.to_dict()), - ], - volume_mounts=[constants.STORAGE_INITIALIZER_VOLUME_MOUNT], - resources=resources_per_worker, - env_vars=env_vars, - ) - - storage_initializer_volume = models.V1Volume( - name=constants.STORAGE_INITIALIZER, - persistent_volume_claim=models.V1PersistentVolumeClaimVolumeSource( - claim_name=name - ), - ) - - # create worker pod spec - worker_pod_template_spec = utils.get_pod_template_spec( - containers=[container_spec], - volumes=[storage_initializer_volume], - ) - - # create master pod spec - master_pod_template_spec = utils.get_pod_template_spec( - containers=[container_spec], - init_containers=[init_container_spec], - volumes=[storage_initializer_volume], - ) - - job = utils.get_pytorchjob_template( - name=name, - namespace=namespace, - master_pod_template_spec=master_pod_template_spec, - worker_pod_template_spec=worker_pod_template_spec, - num_workers=num_workers, - num_procs_per_worker=num_procs_per_worker, - ) - - self.create_job(job, namespace=namespace) - - def create_job( - self, - job: Optional[constants.JOB_MODELS_TYPE] = None, - name: Optional[str] = None, - namespace: Optional[str] = None, - job_kind: Optional[str] = None, - base_image: Optional[str] = None, - train_func: Optional[Callable] = None, - parameters: Optional[Dict[str, Any]] = None, - num_workers: Optional[int] = 1, - num_procs_per_worker: Optional[Union[int, str]] = None, - resources_per_worker: Union[dict, models.V1ResourceRequirements, None] = None, - num_chief_replicas: Optional[int] = None, - num_ps_replicas: Optional[int] = None, - packages_to_install: Optional[List[str]] = None, - pip_index_url: str = constants.DEFAULT_PIP_INDEX_URL, - env_vars: Optional[ - Union[Dict[str, str], List[Union[models.V1EnvVar, models.V1EnvVar]]] - ] = None, - ): - """Create the Training Job. - Job can be created using one of the following options: - - - Define custom resource object in `job` parameter (e.g. TFJob or PyTorchJob). - - Define training function in `train_func` parameter and number of workers. - - Define Docker image in `base_image` parameter and number of workers. - - Args: - job: Job object. Object must be one of these types: KubeflowOrgV1TFJob, - KubeflowOrgV1PyTorchJob, etc. - name: Name for the Job. It must be set if `job` parameter is omitted. - namespace: Namespace for the Job. By default namespace is taken from - `TrainingClient` object. - job_kind: Kind for the Job (e.g. `TFJob` or `PyTorchJob`). It must be set if - `job` parameter is omitted. By default Job kind is taken from - `TrainingClient` object. - base_image: Image that Job uses to train the model on each training replica. - If `train_func` parameter is set, this image is used to execute the training - function. The `constants` module contains some base images, the default image - is `docker.io/pytorch/pytorch:1.12.1-cuda11.3-cudnn8-runtime` - train_func: Function that Job uses to train the model on each training replica. - This function must be Callable. Optionally, this function might have one dict - argument to define input parameters for the function. If `train_func` is - set, Base Image must support `bash` CLI to execute the training script. - parameters: Dict of input parameters that training function might receive. - num_workers: Number of Worker replicas for the Job. - num_procs_per_worker: Number of processes per PyTorchJob worker for `torchrun` CLI. You - should use this parameter if you want to use more than 1 GPU per PyTorchJob worker. - Set to "auto" to automatically use available GPU/CPU PyTorch resources. - resources_per_worker: A parameter that lets you specify how much - resources each Worker container should have. You can either specify a - kubernetes.client.V1ResourceRequirements object (documented here: - https://github.com/kubernetes-client/python/blob/master/kubernetes/docs/V1ResourceRequirements.md) - or a dictionary that includes one or more of the following keys: - `cpu`, `memory`, or `gpu` (other keys will be ignored). Appropriate - values for these keys are documented here: - https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/. - For example: - ``` - { - "cpu": "1", - "memory": "2Gi", - "gpu": "1", - } - ``` - Please note, `gpu` specifies a resource request with a key of - `nvidia.com/gpu`, i.e. an NVIDIA GPU. If you need a different type - of GPU, pass in a V1ResourceRequirement instance instead, since it's - more flexible. This parameter is optional and defaults to None. - num_chief_replicas: Number of Chief replicas for the TFJob. Number - of Chief replicas can't be more than 1. - num_ps_replicas: Number of Parameter Server replicas for the TFJob. - packages_to_install: List of Python packages to install in addition - to the base image packages if `train_func` parameter is set. - These packages are installed before executing the objective function. - pip_index_url: The PyPI url from which to install Python packages. - env_vars: Environment variable(s) to be attached to training container. - You can specify a dictionary as a mapping object representing the environment - variables. Otherwise, you can specify a list, in which the element can either - be a kubernetes.client.models.V1EnvVar (documented here: - https://github.com/kubernetes-client/python/blob/master/kubernetes/docs/V1EnvVar.md) - or a kubernetes.client.models.V1EnvFromSource (documented here: - https://github.com/kubernetes-client/python/blob/master/kubernetes/docs/V1EnvFromSource.md) - - Raises: - ValueError: Invalid input parameters. - TimeoutError: Timeout to create Job. - RuntimeError: Failed to create Job. - """ - - # When Job is set, only namespace arg is allowed. - if job is not None: - for key, value in locals().items(): - if ( - key - not in ["self", "job", "namespace", "pip_index_url", "num_workers"] - and value is not None - ): - raise ValueError( - "If `job` is set only `namespace` argument is allowed. " - f"Argument `{key}` must be None." - ) - - namespace = namespace or self.namespace - job_kind = job_kind or self.job_kind - if job is not None: - job_kind = str(job.kind) - - if job_kind not in constants.JOB_PARAMETERS: - raise ValueError( - f"Job kind must be one of these: {constants.JOB_PARAMETERS.keys()}" - ) - - # If Training function or base image is set, configure Job template. - if job is None and (train_func is not None or base_image is not None): - # Job name must be set to configure Job template. - if name is None: - raise ValueError( - "Job name must be set to configure Job from function or image" - ) - - # Check if at least one Worker is set. - # TODO (andreyvelich): Remove this check once we have CEL validation. - # Ref: https://github.com/kubeflow/training-operator/issues/1708 - if num_workers is None or num_workers < 1: - raise ValueError(f"At least one Worker for {job_kind} must be set") - - # Assign the default base image. - # TODO (andreyvelich): Add base image for other Job kinds. - if base_image is None: - base_image = constants.JOB_PARAMETERS[job_kind]["base_image"] - - # By default we don't set command and args for the training container. - command, args = None, None - - # If training function is set get the command and args. - if train_func is not None: - # Use `torchrun` for distributed PyTorch training, otherwise use `python` - if job_kind == constants.PYTORCHJOB_KIND and ( - num_workers > 1 or num_procs_per_worker is not None - ): - entrypoint = constants.ENTRYPOINT_TORCH - else: - entrypoint = constants.ENTRYPOINT_PYTHON - - command, args = utils.get_command_using_train_func( - train_func=train_func, - entrypoint=entrypoint, - train_func_parameters=parameters, - packages_to_install=packages_to_install, - pip_index_url=pip_index_url, - ) - - # Get Training Container template. - container_spec = utils.get_container_spec( - name=constants.JOB_PARAMETERS[job_kind]["container"], - base_image=base_image, - command=command, - args=args, - resources=resources_per_worker, - env_vars=env_vars, - ) - - # Get Pod template spec using the above container. - pod_template_spec = utils.get_pod_template_spec( - containers=[container_spec], - ) - - # Configure template for different Jobs. - # TODO (andreyvelich): Add support for other kinds (e.g. MPIJob). - if job_kind == constants.TFJOB_KIND: - if num_procs_per_worker is not None: - raise ValueError( - f"num_procs_per_worker can't be set for {constants.TFJOB_KIND}" - ) - job = utils.get_tfjob_template( - name=name, - namespace=namespace, - pod_template_spec=pod_template_spec, - num_workers=num_workers, - num_chief_replicas=num_chief_replicas, - num_ps_replicas=num_ps_replicas, - ) - elif job_kind == constants.PYTORCHJOB_KIND: - if num_chief_replicas is not None or num_ps_replicas is not None: - raise ValueError( - "num_chief_replicas and num_ps_replicas can't be set for " - f"{constants.PYTORCHJOB_KIND}" - ) - job = utils.get_pytorchjob_template( - name=name, - namespace=namespace, - worker_pod_template_spec=pod_template_spec, - num_workers=num_workers, - num_procs_per_worker=num_procs_per_worker, - ) - else: - raise ValueError( - f"Job kind {job_kind} can't be created using function or image. " - + "Number of Workers must be set." - ) - - # Verify Job object type. - if not isinstance( - job, - getattr(models, constants.JOB_PARAMETERS[job_kind]["model"]), - ): - raise ValueError( - f"Job must be one of these types: {constants.JOB_MODELS}, but Job is: {type(job)}" - ) - - # Create the Training Job. - try: - self.custom_api.create_namespaced_custom_object( - constants.GROUP, - constants.VERSION, - namespace, - constants.JOB_PARAMETERS[job_kind]["plural"], - job, - ) - except multiprocessing.TimeoutError: - raise TimeoutError( - f"Timeout to create {job_kind}: {namespace}/{job.metadata.name}" - ) - except Exception: - raise RuntimeError( - f"Failed to create {job_kind}: {namespace}/{job.metadata.name}" - ) - - logger.debug(f"{job_kind} {namespace}/{job.metadata.name} has been created") - - def get_job( - self, - name: str, - namespace: Optional[str] = None, - job_kind: Optional[str] = None, - timeout: int = constants.DEFAULT_TIMEOUT, - ) -> constants.JOB_MODELS_TYPE: - """Get the Training Job. - - Args: - name: Name for the Job. - namespace: Namespace for the Job. By default namespace is taken from - `TrainingClient` object. - job_kind: Kind for the Job (e.g. `TFJob` or `PyTorchJob`). By default Job kind - is taken from `TrainingClient` object. - timeout: Kubernetes API server timeout in seconds to execute the request. - - Returns: - object: Job object. For example: KubeflowOrgV1PyTorchJob - - Raises: - TimeoutError: Timeout to get Job. - RuntimeError: Failed to get Job. - """ - - namespace = namespace or self.namespace - job_kind = job_kind or self.job_kind - - if job_kind not in constants.JOB_PARAMETERS: - raise ValueError( - f"Job kind must be one of these: {constants.JOB_PARAMETERS.keys()}" - ) - - try: - thread = self.custom_api.get_namespaced_custom_object( - constants.GROUP, - constants.VERSION, - namespace, - constants.JOB_PARAMETERS[job_kind]["plural"], - name, - async_req=True, - ) - response = utils.FakeResponse(thread.get(timeout)) - job = self.api_client.deserialize( - response, constants.JOB_PARAMETERS[job_kind]["model"] - ) - - except multiprocessing.TimeoutError: - raise TimeoutError(f"Timeout to get {job_kind}: {namespace}/{name}") - except Exception: - raise RuntimeError(f"Failed to get {job_kind}: {namespace}/{name}") - - return job - - def list_jobs( - self, - namespace: Optional[str] = None, - job_kind: Optional[str] = None, - timeout: int = constants.DEFAULT_TIMEOUT, - ) -> List[constants.JOB_MODELS_TYPE]: - """List of all Training Jobs with specific kind in namespace. - - Args: - namespace: Namespace to list the Jobs. By default namespace is taken from - `TrainingClient` object. - job_kind: Kind for the Job (e.g. `TFJob` or `PyTorchJob`). By default Job kind - is taken from `TrainingClient` object. - timeout: Kubernetes API server timeout in seconds to execute the request. - - Returns: - list[object]: List of Job objects. - For example: list of KubeflowOrgV1PyTorchJob objects. It returns empty list - if Jobs can't be found. - - Raises: - TimeoutError: Timeout to list Jobs - RuntimeError: Failed to list Jobs - """ - - namespace = namespace or self.namespace - job_kind = job_kind or self.job_kind - - if job_kind not in constants.JOB_PARAMETERS: - raise ValueError( - f"Job kind must be one of these: {constants.JOB_PARAMETERS.keys()}" - ) - - result = [] - try: - thread = self.custom_api.list_namespaced_custom_object( - constants.GROUP, - constants.VERSION, - namespace, - constants.JOB_PARAMETERS[job_kind]["plural"], - async_req=True, - ) - response = thread.get(timeout) - result = [ - self.api_client.deserialize( - utils.FakeResponse(item), - constants.JOB_PARAMETERS[job_kind]["model"], - ) - for item in response.get("items") - ] - except multiprocessing.TimeoutError: - raise TimeoutError(f"Timeout to list {job_kind}s in namespace: {namespace}") - except Exception: - raise RuntimeError(f"Failed to list {job_kind}s in namespace: {namespace}") - - return result - - def get_job_conditions( - self, - name: Optional[str] = None, - namespace: Optional[str] = None, - job_kind: Optional[str] = None, - job: Optional[constants.JOB_MODELS_TYPE] = None, - timeout: int = constants.DEFAULT_TIMEOUT, - ) -> List[models.V1JobCondition]: - """Get the Training Job conditions. Training Job is in the condition when - `status=True` for the appropriate condition `type`. For example, - Training Job is Succeeded when `status=True` and `type=Succeeded`. - - Args: - name: Name for the Job. - namespace: Namespace for the Job. By default namespace is taken from - `TrainingClient` object. - job_kind: Kind for the Job (e.g. `TFJob` or `PyTorchJob`). By default Job kind - is taken from `TrainingClient` object. - job: Job object can be set to get the conditions. Object must be one of - these types: KubeflowOrgV1TFJob, KubeflowOrgV1PyTorchJob, etc. - If this parameter is omitted, it gets Job with the given name and kind. - timeout: Kubernetes API server timeout in seconds to execute the request. - - Returns: - list[V1JobCondition]: List of Job conditions with - last transition time, last update time, message, reason, type, and - status. It returns empty list if Job does not have any - conditions yet. - - Raises: - ValueError: Invalid input parameters. - TimeoutError: Timeout to get Job. - RuntimeError: Failed to get Job. - """ - - namespace = namespace or self.namespace - job_kind = job_kind or self.job_kind - - if job_kind not in constants.JOB_PARAMETERS: - raise ValueError( - f"Job kind must be one of these: {constants.JOB_PARAMETERS.keys()}" - ) - - if job is not None and not isinstance( - job, getattr(models, constants.JOB_PARAMETERS[job_kind]["model"]) - ): - raise ValueError(f"Job must be one of these types: {constants.JOB_MODELS}") - - # If Job is not set, get the Training Job. - if job is None: - # Job name must be set when Job object is not set. - if name is None: - raise ValueError( - "Job name must be set to configure Job from function or image" - ) - - job = self.get_job( - name=name, - namespace=namespace, - job_kind=job_kind, - timeout=timeout, - ) - if job.status and job.status.conditions and len(job.status.conditions) > 0: - return job.status.conditions - return [] - - def is_job_created( - self, - name: Optional[str] = None, - namespace: Optional[str] = None, - job_kind: Optional[str] = None, - job: Optional[constants.JOB_MODELS_TYPE] = None, - timeout: int = constants.DEFAULT_TIMEOUT, - ) -> bool: - """Check if Training Job is Created. - - Args: - name: Name for the Job. - namespace: Namespace for the Job. By default namespace is taken from - `TrainingClient` object. - job_kind: Kind for the Job (e.g. `TFJob` or `PyTorchJob`). By default Job kind - is taken from `TrainingClient` object. - job: Job object can be set to get the conditions. Object must be one of - these types: KubeflowOrgV1TFJob, KubeflowOrgV1PyTorchJob, etc. - If this parameter is omitted, it gets Job with the given name and kind. - timeout: Kubernetes API server timeout in seconds to execute the request. - - Returns: - bool: True if Job is Created, else False. - - Raises: - ValueError: Invalid input parameters. - TimeoutError: Timeout to get Job. - RuntimeError: Failed to get Job. - """ - - return utils.has_condition( - self.get_job_conditions(name, namespace, job_kind, job, timeout), - constants.JOB_CONDITION_CREATED, - ) - - def is_job_running( - self, - name: Optional[str] = None, - namespace: Optional[str] = None, - job_kind: Optional[str] = None, - job: Optional[constants.JOB_MODELS_TYPE] = None, - timeout: int = constants.DEFAULT_TIMEOUT, - ) -> bool: - """Check if Training Job is Running. - - Args: - name: Name for the Job. - namespace: Namespace for the Job. By default namespace is taken from - `TrainingClient` object. - job_kind: Kind for the Job (e.g. `TFJob` or `PyTorchJob`). By default Job kind - is taken from `TrainingClient` object. - job: Job object can be set to get the conditions. Object must be one of - these types: KubeflowOrgV1TFJob, KubeflowOrgV1PyTorchJob, etc. - If this parameter is omitted, it gets Job with the given name and kind. - timeout: Kubernetes API server timeout in seconds to execute the request. - - Returns: - bool: True if Job is Running, else False. - - Raises: - ValueError: Invalid input parameters. - TimeoutError: Timeout to get Job. - RuntimeError: Failed to get Job. - """ - - return utils.has_condition( - self.get_job_conditions(name, namespace, job_kind, job, timeout), - constants.JOB_CONDITION_RUNNING, - ) - - def is_job_restarting( - self, - name: Optional[str] = None, - namespace: Optional[str] = None, - job_kind: Optional[str] = None, - job: Optional[constants.JOB_MODELS_TYPE] = None, - timeout: int = constants.DEFAULT_TIMEOUT, - ) -> bool: - """Check if Training Job is Restarting. - - Args: - name: Name for the Job. - namespace: Namespace for the Job. By default namespace is taken from - `TrainingClient` object. - job_kind: Kind for the Job (e.g. `TFJob` or `PyTorchJob`). By default Job kind - is taken from `TrainingClient` object. - job: Job object can be set to get the conditions. Object must be one of - these types: KubeflowOrgV1TFJob, KubeflowOrgV1PyTorchJob, etc. - If this parameter is omitted, it gets Job with the given name and kind. - timeout: Kubernetes API server timeout in seconds to execute the request. - - Returns: - bool: True if Job is Restarting, else False. - - Raises: - ValueError: Invalid input parameters. - TimeoutError: Timeout to get Job. - RuntimeError: Failed to get Job. - """ - - return utils.has_condition( - self.get_job_conditions(name, namespace, job_kind, job, timeout), - constants.JOB_CONDITION_RESTARTING, - ) - - def is_job_succeeded( - self, - name: Optional[str] = None, - namespace: Optional[str] = None, - job_kind: Optional[str] = None, - job: Optional[constants.JOB_MODELS_TYPE] = None, - timeout: int = constants.DEFAULT_TIMEOUT, - ) -> bool: - """Check if Training Job is Succeeded. - - Args: - name: Name for the Job. - namespace: Namespace for the Job. By default namespace is taken from - `TrainingClient` object. - job_kind: Kind for the Job (e.g. `TFJob` or `PyTorchJob`). By default Job kind - is taken from `TrainingClient` object. - job: Job object can be set to get the conditions. Object must be one of - these types: KubeflowOrgV1TFJob, KubeflowOrgV1PyTorchJob, etc. - If this parameter is omitted, it gets Job with the given name and kind. - timeout: Kubernetes API server timeout in seconds to execute the request. - - Returns: - bool: True if Job is Succeeded, else False. - - Raises: - ValueError: Invalid input parameters. - TimeoutError: Timeout to get Job. - RuntimeError: Failed to get Job. - """ - - return utils.has_condition( - self.get_job_conditions(name, namespace, job_kind, job, timeout), - constants.JOB_CONDITION_SUCCEEDED, - ) - - def is_job_failed( - self, - name: Optional[str] = None, - namespace: Optional[str] = None, - job_kind: Optional[str] = None, - job: Optional[constants.JOB_MODELS_TYPE] = None, - timeout: int = constants.DEFAULT_TIMEOUT, - ) -> bool: - """Check if Training Job is Failed. - - Args: - name: Name for the Job. - namespace: Namespace for the Job. By default namespace is taken from - `TrainingClient` object. - job_kind: Kind for the Job (e.g. `TFJob` or `PyTorchJob`). By default Job kind - is taken from `TrainingClient` object. - job: Job object can be set to get the conditions. Object must be one of - these types: KubeflowOrgV1TFJob, KubeflowOrgV1PyTorchJob, etc. - If this parameter is omitted, it gets Job with the given name and kind. - timeout: Kubernetes API server timeout in seconds to execute the request. - - Returns: - bool: True if Job is Failed, else False. - - Raises: - ValueError: Invalid input parameters. - TimeoutError: Timeout to get Job. - RuntimeError: Failed to get Job. - """ - - return utils.has_condition( - self.get_job_conditions(name, namespace, job_kind, job, timeout), - constants.JOB_CONDITION_FAILED, - ) - - def wait_for_job_conditions( - self, - name: str, - namespace: Optional[str] = None, - job_kind: Optional[str] = None, - expected_conditions: Set = {constants.JOB_CONDITION_SUCCEEDED}, - wait_timeout: int = 600, - polling_interval: int = 15, - callback: Optional[Callable] = None, - timeout: int = constants.DEFAULT_TIMEOUT, - ) -> constants.JOB_MODELS_TYPE: - """Wait until Training Job reaches any of the specified conditions. - By default it waits for the Succeeded condition. - - Args: - name: Name for the Job. - namespace: Namespace for the Job. By default namespace is taken from - `TrainingClient` object. - job_kind: Kind for the Job (e.g. `TFJob` or `PyTorchJob`). By default Job kind - is taken from `TrainingClient` object. - expected_conditions: Set of expected conditions. It must be subset of this: - `{"Created", "Running", "Restarting", "Succeeded", "Failed"}` - wait_timeout: How many seconds to wait until Job reaches one of - the expected conditions. - polling_interval: The polling interval in seconds to get Job status. - callback: Callback function that is invoked after Job - status is polled. This function takes a single argument which - is current Job object. - timeout: Kubernetes API server timeout in seconds to execute the request. - - Returns: - object: Job object. For example: KubeflowOrgV1PyTorchJob - - Raises: - ValueError: Invalid input parameters. - TimeoutError: Timeout to get Job. - RuntimeError: Failed to get Job, or Job reaches Failed condition and - Failed is not in `expected_conditions` set. - """ - - namespace = namespace or self.namespace - job_kind = job_kind or self.job_kind - - if not expected_conditions.issubset(constants.JOB_CONDITIONS): - raise ValueError( - f"Expected conditions: {expected_conditions} must be subset of \ - {constants.JOB_CONDITIONS}" - ) - for _ in range(round(wait_timeout / polling_interval)): - # We should get Job only once per cycle and check the statuses. - job = self.get_job( - name=name, - namespace=namespace, - job_kind=job_kind, - timeout=timeout, - ) - - # Get Job conditions. - conditions = self.get_job_conditions( - job=job, timeout=timeout, job_kind=job_kind - ) - if len(conditions) > 0: - status_logger( - name, - conditions[-1].type, - conditions[-1].last_transition_time, - ) - - # Execute callback function is it is set. - if callback: - callback(job) - - # Raise an exception if Job is Failed and Failed is not the expected condition. - if ( - constants.JOB_CONDITION_FAILED not in expected_conditions - and utils.has_condition(conditions, constants.JOB_CONDITION_FAILED) - ): - raise RuntimeError( - f"{job_kind} {namespace}/{name} is Failed. " - f"{job_kind} conditions: {job.status.conditions}" - ) - - # Return Job when it reaches expected condition. - for expected_condition in expected_conditions: - if utils.has_condition(conditions, expected_condition): - return job - - time.sleep(polling_interval) - - raise TimeoutError( - f"Timeout waiting for {job_kind}: {namespace}/{name} to reach expected conditions: \ - {expected_conditions}" - ) - - def get_job_pods( - self, - name: str, - namespace: Optional[str] = None, - is_master: bool = False, - replica_type: Optional[str] = None, - replica_index: Optional[int] = None, - timeout: int = constants.DEFAULT_TIMEOUT, - ) -> List[models.V1Pod]: - """Get pods for the Training Job. - - Args: - name: Name for the Job. - namespace: Namespace for the Job. By default namespace is taken from - `TrainingClient` object. - is_master: Whether to get pods only with the label - `training.kubeflow.org/job-role: master`. - replica_type: Type of the Job replica. - For TFJob one of `Chief`, `PS`, or `worker`. - - For PyTorchJob one of `master` or `worker`. - - For XGBoostJob one of `master` or `worker`. - - For MPIJob one of `launcher` or `worker`. - - For PaddleJob one of `master` or `worker`. - - For JAXJob `worker`. - - replica_index: Index for the Job replica. - timeout: Kubernetes API server timeout in seconds to execute the request. - - Returns: - list[V1Pod]: List of the Job pods. - - Raises: - ValueError: Job replica type is invalid. - TimeoutError: Timeout to get Job pods. - RuntimeError: Failed to get Job pods. - """ - - namespace = namespace or self.namespace - - if ( - replica_type is not None - and replica_type not in constants.TFJOB_REPLICA_TYPES - and replica_type not in constants.PYTORCHJOB_REPLICA_TYPES - and replica_type not in constants.XGBOOSTJOB_REPLICA_TYPES - and replica_type not in constants.MPIJOB_REPLICA_TYPES - and replica_type not in constants.PADDLEJOB_REPLICA_TYPES - and replica_type not in constants.JAXJOB_REPLICA_TYPES - ): - raise ValueError( - f"TFJob replica type must be one of {constants.TFJOB_REPLICA_TYPES}\n" - f"PyTorchJob replica type must be one of {constants.PYTORCHJOB_REPLICA_TYPES}\n" - f"XGBoostJob replica type must be one of {constants.XGBOOSTJOB_REPLICA_TYPES}\n" - f"MPIJob replica type must be one of {constants.MPIJOB_REPLICA_TYPES}\n" - f"PaddleJob replica type must be one of {constants.PADDLEJOB_REPLICA_TYPES}" - f"JAXJob replica type must be one of {constants.PADDLEJOB_REPLICA_TYPES}" - ) - - label_selector = f"{constants.JOB_NAME_LABEL}={name}" - - # Add Job role label if that is required. - if is_master: - label_selector += f",{constants.JOB_ROLE_LABEL}={constants.JOB_ROLE_MASTER}" - - # Add Replica type label if that is required. - if replica_type: - label_selector += ( - f",{constants.REPLICA_TYPE_LABEL}={str.lower(replica_type)}" - ) - - # Add Replica index label if that is required. - if replica_index is not None: - label_selector += f",{constants.REPLICA_INDEX_LABEL}={replica_index}" - - # Return list of Training Job pods. - try: - thread = self.core_api.list_namespaced_pod( - namespace, - label_selector=label_selector, - async_req=True, - ) - return thread.get(timeout).items - except multiprocessing.TimeoutError: - raise TimeoutError(f"Timeout to list pods for Job: {namespace}/{name}") - except Exception: - raise RuntimeError(f"Failed to list pods for Job: {namespace}/{name}") - - def get_job_pod_names( - self, - name: str, - namespace: Optional[str] = None, - is_master: bool = False, - replica_type: Optional[str] = None, - replica_index: Optional[int] = None, - timeout: int = constants.DEFAULT_TIMEOUT, - ) -> List[str]: - """Get pod names for the Training Job. - - Args: - name: Name for the Job. - namespace: Namespace for the Job. By default namespace is taken from - `TrainingClient` object. - is_master: Whether to get pods only with the label - `training.kubeflow.org/job-role: master`. - replica_type: Type of the Job replica. - For TFJob one of `Chief`, `PS`, or `worker`. - - For PyTorchJob one of `master` or `worker`. - - For XGBoostJob one of `master` or `worker`. - - For MPIJob one of `launcher` or `worker`. - - For PaddleJob one of `master` or `worker`. - - For JAXJob `worker`. - - replica_index: Index for the Job replica. - timeout: Kubernetes API server timeout in seconds to execute the request. - - Returns: - list[str]: List of the Job pod names. - - Raises: - ValueError: Job replica type is invalid. - TimeoutError: Timeout to get Job pods. - RuntimeError: Failed to get Job pods. - """ - - namespace = namespace or self.namespace - - pods = self.get_job_pods( - name=name, - namespace=namespace, - is_master=is_master, - replica_type=replica_type, - replica_index=replica_index, - timeout=timeout, - ) - pod_names = [] - for pod in pods: - pod_names.append(pod.metadata.name) - return pod_names - - def get_job_logs( - self, - name: str, - namespace: Optional[str] = None, - job_kind: Optional[str] = None, - is_master: bool = True, - replica_type: Optional[str] = None, - replica_index: Optional[int] = None, - follow: bool = False, - timeout: int = constants.DEFAULT_TIMEOUT, - verbose: bool = False, - ) -> Tuple[Dict[str, str], Dict[str, List[str]]]: - """Get the logs for every Training Job pod. By default it returns logs from - the `master` pod. Logs are returned in this format: { "pod-name": "Log data" }. - - Args: - name: Name for the Job. - namespace: Namespace for the Job. By default namespace is taken from - `TrainingClient` object. - job_kind: Kind for the Job (e.g. `TFJob` or `PyTorchJob`). By default Job kind - is taken from `TrainingClient` object. - is_master: Whether to get logs for the pod with the label - `training.kubeflow.org/job-role: master`. - replica_type: Optional, type of the Job replica. - For TFJob one of `chief`, `ps`, or `worker`. - - For PyTorchJob one of `master` or `worker`. - - For XGBoostJob one of `master` or `worker`. - - For MPIJob one of `launcher` or `worker`. - - For PaddleJob one of `master` or `worker`. - - For JAXJob `worker`. - replica_index: Optional, index for the Job replica. - container: Pod container to get the logs. - follow: Whether to follow the log stream of the pod and print logs to StdOut. - timeout: Optional, Kubernetes API server timeout in seconds - to execute the request. - verbose: Whether to get Kubernetes events for Job and corresponding pods. - If you need to get events from all PyTorchJob's Pods, set `isMaster = False`. - - Returns: - Dict[str, str]: A dictionary in which the keys are pod names and the - values are the corresponding logs. - Dict[str, str]: A dictionary in which the keys are object kind and name, and the - values are list of the corresponding Kubernetes events with their timestamps. This - value is returned only if `verbose = True`. For example: - ```json - { - "PyTorchJob train-mnist": [ - "2024-01-05 22:58:20 Created pod: train-mnist-worker-0" - ], - "Pod train-mnist-worker-0": [ - "2024-01-05 22:58:20 Created container init-pytorch" - ] - } - ``` - - Raises: - ValueError: Job replica type is invalid. - TimeoutError: Timeout to get Job or Job's pods - RuntimeError: Failed to get Job or Job's pods. - """ - - namespace = namespace or self.namespace - job_kind = job_kind or self.job_kind - - pods = self.get_job_pods( - name=name, - namespace=namespace, - is_master=is_master, - replica_type=replica_type, - replica_index=replica_index, - timeout=timeout, - ) - - logs_dict = {} - events_dict = {} - if pods and follow: - log_streams = [] - for pod in pods: - if ( - pod.status is not None - and pod.status.phase != constants.POD_PHASE_PENDING - ): - log_streams.append( - watch.Watch().stream( - self.core_api.read_namespaced_pod_log, - name=pod.metadata.name, - namespace=namespace, - container=constants.JOB_PARAMETERS[job_kind]["container"], - ) - ) - finished = [False for _ in log_streams] - - # Create thread and queue per stream, for non-blocking iteration - log_queue_pool = utils.get_log_queue_pool(log_streams) - - # Iterate over every watching pods' log queue - while True: - for index, log_queue in enumerate(log_queue_pool): - if all(finished): - break - if finished[index]: - continue - # grouping the every 50 log lines of the same pod - for _ in range(50): - try: - logline = log_queue.get(timeout=1) - if logline is None: - finished[index] = True - break - - # Print logs to the StdOut - print(f"[Pod {pods[index].metadata.name}]: {logline}") - # Add logs to the results dict. - if pods[index].metadata.name not in logs_dict: - logs_dict[pods[index].metadata.name] = logline - else: - logs_dict[pods[index].metadata.name] += logline - except queue.Empty: - break - if all(finished): - break - elif pods: - for pod in pods: - if ( - pod.status is not None - and pod.status.phase != constants.POD_PHASE_PENDING - ): - try: - pod_logs = self.core_api.read_namespaced_pod_log( - name=pod.metadata.name, - namespace=namespace, - container=constants.JOB_PARAMETERS[job_kind]["container"], - ) - logs_dict[pod.metadata.name] = pod_logs - except Exception: - raise RuntimeError( - f"Failed to read logs for pod {namespace}/{pod.metadata.name}" - ) - # If verbose is set, return Kubernetes events for Job and pods. - if verbose: - job = self.get_job(name=name, namespace=namespace) - events = self.core_api.list_namespaced_event(namespace=namespace) - - # Get events for the Job and Job's pods. - for event in events.items: - utils.add_event_to_dict( - events_dict=events_dict, - event=event, - object_kind=job_kind, - object_name=name, - object_creation_timestamp=job.metadata.creation_timestamp, - ) - if pods: - for pod in pods: - utils.add_event_to_dict( - events_dict=events_dict, - event=event, - object_kind=constants.POD_KIND, - object_name=pod.metadata.name, - object_creation_timestamp=pod.metadata.creation_timestamp, - ) - - return logs_dict, events_dict - - def update_job( - self, - job: constants.JOB_MODELS_TYPE, - name: str, - namespace: Optional[str] = None, - job_kind: Optional[str] = None, - ): - """Update the Training Job by using patch Kubernetes API. - - Args: - job: Job object. For example, object with type - KubeflowOrgV1TFJob or KubeflowOrgV1PyTorchJob. - name: Name for the Job. - namespace: Namespace for the Job. By default namespace is taken from - `TrainingClient` object. - job_kind: Kind for the Job (e.g. `TFJob` or `PyTorchJob`). By default Job kind - is taken from `TrainingClient` object. - Raises: - TimeoutError: Timeout to update Job - RuntimeError: Failed to update Job - """ - - namespace = namespace or self.namespace - job_kind = job_kind or self.job_kind - - if job_kind not in constants.JOB_PARAMETERS: - raise ValueError( - f"Job kind must be one of these: {constants.JOB_PARAMETERS.keys()}" - ) - - try: - self.custom_api.patch_namespaced_custom_object( - constants.GROUP, - constants.VERSION, - namespace, - constants.JOB_PARAMETERS[job_kind]["plural"], - name, - job, - ) - except multiprocessing.TimeoutError: - raise TimeoutError(f"Timeout to update {job_kind}: {namespace}/{name}") - except Exception: - raise RuntimeError(f"Failed to update {job_kind}: {namespace}/{name}") - - logger.debug(f"{job_kind} {namespace}/{name} has been updated") - - def delete_job( - self, - name: str, - namespace: Optional[str] = None, - job_kind: Optional[str] = None, - delete_options: Optional[models.V1DeleteOptions] = None, - ): - """Delete the Training Job - - Args: - name: Name for the Job. - namespace: Namespace for the Job. By default namespace is taken from - `TrainingClient` object. - job_kind: Kind for the Job (e.g. `TFJob` or `PyTorchJob`). By default Job kind - is taken from `TrainingClient` object. - delete_options: Optional, V1DeleteOptions to set while deleting - the Job. For example, grace period seconds. - - Raises: - TimeoutError: Timeout to delete Job. - RuntimeError: Failed to delete Job. - """ - - namespace = namespace or self.namespace - job_kind = job_kind or self.job_kind - - try: - self.custom_api.delete_namespaced_custom_object( - constants.GROUP, - constants.VERSION, - namespace, - constants.JOB_PARAMETERS[job_kind]["plural"], - name=name, - body=delete_options, - ) - except multiprocessing.TimeoutError: - raise TimeoutError(f"Timeout to delete {job_kind}: {namespace}/{name}") - except Exception: - raise RuntimeError(f"Failed to delete {job_kind}: {namespace}/{name}") - - logger.debug(f"{job_kind} {namespace}/{name} has been deleted") diff --git a/sdk/python/kubeflow/training/api/training_client_test.py b/sdk/python/kubeflow/training/api/training_client_test.py deleted file mode 100644 index bc5366f078..0000000000 --- a/sdk/python/kubeflow/training/api/training_client_test.py +++ /dev/null @@ -1,1777 +0,0 @@ -import multiprocessing -import queue -from datetime import datetime, timedelta -from unittest.mock import Mock, patch - -import pytest -from kubeflow.training import ( - KubeflowOrgV1JobCondition, - KubeflowOrgV1JobStatus, - KubeflowOrgV1PyTorchJob, - KubeflowOrgV1PyTorchJobSpec, - KubeflowOrgV1ReplicaSpec, - KubeflowOrgV1RunPolicy, - TrainingClient, - constants, -) -from kubeflow.training.models import V1DeleteOptions -from kubernetes.client import ( - ApiClient, - V1Container, - V1EnvVar, - V1ObjectMeta, - V1PodSpec, - V1PodTemplateSpec, -) - -TEST_NAME = "test" -TEST_IMAGE = "docker.io/test-training" - -TIMEOUT = "timeout" -RUNTIME = "runtime" -MOCK_POD_OBJ = "mock_pod_obj" -NO_PODS = "no_pods" -DUMMY_POD_NAME = "Dummy V1PodList" -LIST_RESPONSE = [ - {"metadata": {"name": DUMMY_POD_NAME}}, -] -SUCCESS = "success" -FAILED = "Failed" -CREATED = "Created" -RUNNING = "Running" -RESTARTING = "Restarting" -SUCCEEDED = "Succeeded" -INVALID = "invalid" - -FAIL_LOGS = "fail_logs" -FAIL_EVENTS = "fail_events" -MULTI_PODS = "multi_pods" -PENDING_POD = "pending_pod" -NO_STATUS_POD = "no_status_pod" -QUEUE_TIMEOUT = "queue_timeout" -QUEUE_EMPTY = "queue_empty" -EVENT_CREATION_TIMESTAMP = datetime(2024, 1, 5, 22, 58, 20) - - -def conditional_error_handler(*args, **kwargs): - if args[2] == TIMEOUT: - raise multiprocessing.TimeoutError() - elif args[2] == RUNTIME: - raise RuntimeError() - - -def serialize_k8s_object(obj): - api_client = ApiClient() - return api_client.sanitize_for_serialization(obj) - - -def get_namespaced_custom_object_response(*args, **kwargs): - if args[2] == TIMEOUT: - raise multiprocessing.TimeoutError() - elif args[2] == RUNTIME: - raise RuntimeError() - - # Create a serialized Job - serialized_job = serialize_k8s_object(generate_job_with_status(create_job())) - - # Mock the thread and set it's return value to the serialized Job - mock_thread = Mock() - mock_thread.get.return_value = serialized_job - - return mock_thread - - -def list_namespaced_custom_object_response(*args, **kwargs): - if args[2] == TIMEOUT: - raise multiprocessing.TimeoutError() - elif args[2] == RUNTIME: - raise RuntimeError() - elif args[2] == "empty-namespace": - mock_response = {"items": []} - elif args[2] == "multi-jobs": - mock_response = { - "items": [ - serialize_k8s_object(generate_job_with_status(create_job())), - serialize_k8s_object(generate_job_with_status(create_job())), - ] - } - else: - mock_response = { - "items": [serialize_k8s_object(generate_job_with_status(create_job()))] - } - - mock_thread = Mock() - mock_thread.get.return_value = mock_response - return mock_thread - - -def list_namespaced_pod_response(*args, **kwargs): - class MockResponse: - def get(self, timeout): - """ - Simulates Kubernetes API response for listing namespaced pods, - and pass timeout for verification - - :return: - - If `args[0] == "timeout"`, raises `TimeoutError`. - - If `args[0] == "runtime"`, raises `Exception`. - - If `args[0] == "mock_pod_obj"`, returns a mock pod object - with `metadata.name = "Dummy V1PodList"`. - - If `args[0] == "no_pods"`, returns an empty list of pods. - - Otherwise, returns a default list of dicts representing pods, - with `timeout` included, for testing. - """ - LIST_RESPONSE[0][TIMEOUT] = timeout - if args[0] == TIMEOUT: - raise multiprocessing.TimeoutError() - if args[0] == RUNTIME: - raise Exception() - if args[0] == MOCK_POD_OBJ: - pod_obj = Mock(metadata=Mock()) - pod_obj.metadata.name = DUMMY_POD_NAME - return Mock(items=[pod_obj]) - if args[0] == NO_PODS: - return Mock(items=[]) - return Mock(items=LIST_RESPONSE) - - return MockResponse() - - -def create_job( - command=None, - args=None, - num_workers=2, - env_vars=None, -): - # Handle env_vars as either a dict or a list - if env_vars: - if isinstance(env_vars, dict): - env_vars = [V1EnvVar(name=k, value=v) for k, v in env_vars.items()] - elif isinstance(env_vars, list): - env_vars = [ - v if isinstance(v, V1EnvVar) else V1EnvVar(**v) for v in env_vars - ] - - container = V1Container( - name=constants.PYTORCHJOB_CONTAINER, - image=TEST_IMAGE, - command=command, - args=args, - env=env_vars, - ) - - master = KubeflowOrgV1ReplicaSpec( - replicas=1, - template=V1PodTemplateSpec( - metadata=V1ObjectMeta( - annotations={constants.ISTIO_SIDECAR_INJECTION: "false"} - ), - spec=V1PodSpec(containers=[container]), - ), - ) - - pytorch_replica_specs = {"Master": master} - - # PyTorchJob always has 1 master and N-1 worker replicas. - if num_workers > 1: - pytorch_replica_specs["Worker"] = KubeflowOrgV1ReplicaSpec( - replicas=num_workers - 1, - template=V1PodTemplateSpec( - metadata=V1ObjectMeta( - annotations={constants.ISTIO_SIDECAR_INJECTION: "false"} - ), - spec=V1PodSpec(containers=[container]), - ), - ) - - pytorchjob = KubeflowOrgV1PyTorchJob( - api_version=constants.API_VERSION, - kind=constants.PYTORCHJOB_KIND, - metadata=V1ObjectMeta(name=TEST_NAME, namespace=TEST_NAME), - spec=KubeflowOrgV1PyTorchJobSpec( - run_policy=KubeflowOrgV1RunPolicy(clean_pod_policy=None), - pytorch_replica_specs=pytorch_replica_specs, - ), - ) - - return pytorchjob - - -# Check if actual string contains all elements from the expected list. -class AnyStringWithElementsFromList: - def __init__(self, expected): - self.expected = expected - - def __eq__(self, actual): - return all(e in str(actual) for e in self.expected) - - -def create_job_from_func(num_workers, packages_to_install=None, pip_index_url=None): - - command = constants.DEFAULT_COMMAND - if num_workers > 1: - args = [f'{constants.ENTRYPOINT_TORCH} "$program_path/ephemeral_script.py"'] - else: - args = [f'{constants.ENTRYPOINT_PYTHON} "$program_path/ephemeral_script.py"'] - - if pip_index_url and packages_to_install: - args += [f"--index-url {pip_index_url} {packages_to_install[0]}"] - - job = create_job(command, AnyStringWithElementsFromList(args), num_workers) - - return job - - -def generate_job_with_status( - job: constants.JOB_MODELS_TYPE, - condition_type: str = constants.JOB_CONDITION_SUCCEEDED, -) -> constants.JOB_MODELS_TYPE: - job.status = KubeflowOrgV1JobStatus( - conditions=[ - KubeflowOrgV1JobCondition( - type=condition_type, - status=constants.CONDITION_STATUS_TRUE, - ) - ] - ) - return job - - -class DummyJobClass: - def __init__(self, kind) -> None: - self.kind = kind - - -def generate_pod(status, name=DUMMY_POD_NAME, timestamp=None): - pod = Mock(metadata=Mock()) - pod.metadata.name = name - pod.metadata.creation_timestamp = timestamp - pod.status = status - return pod - - -def mock_get_job_pods(*args, **kwargs): - """Mock get_job_pods to return controlled pod objects""" - namespace = kwargs.get("namespace") - if namespace == f"pod {TIMEOUT}": - raise TimeoutError() - if namespace == f"pod {RUNTIME}": - raise RuntimeError() - if namespace == INVALID: - raise ValueError() - - # Handle different test scenarios - if namespace == MULTI_PODS: - return [generate_pod(Mock(phase=RUNNING), f"pod-{i}") for i in range(3)] - - # To find relevant events, the pod's creation time must precede the event's creation time - pod_creation_timestamp = EVENT_CREATION_TIMESTAMP - timedelta(seconds=1) - pod = generate_pod(None, timestamp=pod_creation_timestamp) - if namespace == PENDING_POD: - pod.status = Mock(phase=constants.POD_PHASE_PENDING) - elif namespace == NO_STATUS_POD: - pod.status = None - else: - pod.status = Mock(phase=RUNNING) - return [pod] - - -def mock_get_job(*args, **kwargs): - """Mock get_job_pods to return controlled pod objects""" - namespace = kwargs.get("namespace") - if namespace == f"job {TIMEOUT}": - raise TimeoutError() - if namespace == f"job {RUNTIME}": - raise RuntimeError() - - # Handle different test scenarios - job = Mock() - # To find relevant events, the job's creation time must precede the event's creation time - job.metadata = Mock( - creation_timestamp=EVENT_CREATION_TIMESTAMP - timedelta(seconds=1) - ) - return job - - -def mock_read_namespaced_pod_log(*args, **kwargs): - """Mock for reading pod logs""" - if kwargs.get("namespace") == FAIL_LOGS: - raise Exception("Failed to read logs") - return "test log content" - - -def mock_watch(self, *args, **kwargs): - namespace = kwargs.get("namespace") - if namespace == FAIL_LOGS: - raise Exception("Failed to read logs") - if namespace == QUEUE_TIMEOUT: - log_lines = [TIMEOUT] - elif namespace == QUEUE_EMPTY: - log_lines = [QUEUE_EMPTY] - else: - log_lines = ["line 1 of pod logs", "line 2 of pod logs", "line 3 of pod logs"] - return iter(log_lines) - - -def mock_get_log_queue_pool(log_streams): - mock_logs = [] - for stream in log_streams: - # Convert iterator to list to preserve values - log_lines = list(stream) - mock_queue = Mock() - # Use a list to maintain state between calls - remaining_logs = log_lines.copy() # Make a copy to avoid modifying original - - def get_next(timeout, logs=remaining_logs): - if logs: - log = logs.pop(0) - if log == TIMEOUT: - raise TimeoutError - if log == QUEUE_EMPTY: - raise queue.Empty - return log - return None - - mock_queue.get = Mock(side_effect=get_next) - mock_queue.put = Mock() - mock_logs.append(mock_queue) - return mock_logs - - -def mock_list_namespaced_event(*args, **kwargs): - """Mock for listing namespace events""" - - class MockEvent: - def __init__(self, kind, name): - self.involved_object = Mock(kind=kind) - self.involved_object.name = name - self.metadata = Mock(creation_timestamp=EVENT_CREATION_TIMESTAMP) - self.message = f"{kind} Event 1" - - class MockEventList: - def __init__(self): - self.items = [ - MockEvent(constants.POD_KIND, DUMMY_POD_NAME), - MockEvent(constants.PYTORCHJOB_KIND, TEST_NAME), - ] - - if kwargs.get("namespace") == FAIL_EVENTS: - raise Exception("Failed to read events") - return MockEventList() - - -test_data_create_job = [ - ( - "valid flow", - {"job": create_job(), "namespace": TEST_NAME}, - SUCCESS, - create_job(), - ), - ( - "valid flow to create multi-node job with torchrun", - { - "name": TEST_NAME, - "namespace": TEST_NAME, - "train_func": lambda: print("Test Training Function"), - "base_image": TEST_IMAGE, - "num_workers": 3, - "packages_to_install": ["boto3==1.34.14"], - "pip_index_url": "https://pypi.custom.com/simple", - }, - SUCCESS, - create_job_from_func( - num_workers=3, - packages_to_install=["boto3==1.34.1"], - pip_index_url="https://pypi.custom.com/simple", - ), - ), - ( - "valid flow to create job with 1 worker", - { - "name": TEST_NAME, - "namespace": TEST_NAME, - "train_func": lambda: print("Test Training Function"), - "base_image": TEST_IMAGE, - "num_workers": 1, - }, - SUCCESS, - create_job_from_func(num_workers=1), - ), - ( - "valid flow to create job using image", - { - "name": TEST_NAME, - "namespace": TEST_NAME, - "base_image": TEST_IMAGE, - "num_workers": 2, - }, - SUCCESS, - create_job(num_workers=2), - ), - ( - "invalid extra parameter", - { - "job": create_job(), - "namespace": TEST_NAME, - "base_image": "test_image", - }, - ValueError, - None, - ), - ( - "invalid job kind", - {"job_kind": "invalid_job_kind"}, - ValueError, - None, - ), - ( - "job name missing with train function", - {"train_func": lambda: "test train function"}, - ValueError, - None, - ), - ( - "job name missing with base image", - {"base_image": "test_image"}, - ValueError, - None, - ), - ( - "uncallable train function", - { - "name": TEST_NAME, - "train_func": "uncallable train function", - }, - ValueError, - None, - ), - ( - "invalid number of workers", - { - "name": TEST_NAME, - "num_workers": 0, - }, - ValueError, - None, - ), - ( - "num_procs_per_worker is set for TFJob", - { - "name": TEST_NAME, - "job_kind": constants.TFJOB_KIND, - "num_procs_per_worker": 5, - "base_image": "test_image", - }, - ValueError, - None, - ), - ( - "num_chief_replicas and num_ps_replicas is set for PyTorchJov", - { - "name": TEST_NAME, - "num_chief_replicas": 1, - "num_ps_replicas": 1, - "base_image": "test_image", - }, - ValueError, - None, - ), - ( - "paddle job can't be created using function", - { - "name": TEST_NAME, - "train_func": lambda: "test train function", - "job_kind": constants.PADDLEJOB_KIND, - }, - ValueError, - None, - ), - ( - "invalid job object", - {"job": DummyJobClass(constants.TFJOB_KIND)}, - ValueError, - None, - ), - ( - "create_namespaced_custom_object timeout error", - {"job": create_job(), "namespace": TIMEOUT}, - TimeoutError, - None, - ), - ( - "create_namespaced_custom_object runtime error", - {"job": create_job(), "namespace": RUNTIME}, - RuntimeError, - None, - ), - ( - "valid flow with env_vars as dict", - { - "name": TEST_NAME, - "namespace": TEST_NAME, - "env_vars": {"ENV_VAR": "env_value"}, - "base_image": TEST_IMAGE, - "num_workers": 1, - }, - SUCCESS, - create_job(env_vars={"ENV_VAR": "env_value"}, num_workers=1), - ), - ( - "valid flow with env_vars as list", - { - "name": TEST_NAME, - "namespace": TEST_NAME, - "env_vars": [V1EnvVar(name="ENV_VAR", value="env_value")], - "base_image": TEST_IMAGE, - "num_workers": 2, - }, - SUCCESS, - create_job( - env_vars=[V1EnvVar(name="ENV_VAR", value="env_value")], num_workers=2 - ), - ), -] - -test_data_get_job_pods = [ - ( - "valid flow with default namespace and default timeout", - { - "name": TEST_NAME, - }, - f"{constants.JOB_NAME_LABEL}={TEST_NAME}", - LIST_RESPONSE, - ), - ( - "invalid replica_type", - {"name": TEST_NAME, "replica_type": "invalid_replica_type"}, - "Label not relevant", - ValueError, - ), - ( - "invalid replica_type (uppercase)", - {"name": TEST_NAME, "replica_type": constants.REPLICA_TYPE_WORKER}, - "Label not relevant", - ValueError, - ), - ( - "valid flow with specific timeout, replica_index, replica_type and master role", - { - "name": TEST_NAME, - "namespace": "test_namespace", - "timeout": 60, - "is_master": True, - "replica_type": constants.REPLICA_TYPE_MASTER.lower(), - "replica_index": 0, - }, - f"{constants.JOB_NAME_LABEL}={TEST_NAME}," - f"{constants.JOB_ROLE_LABEL}={constants.JOB_ROLE_MASTER}," - f"{constants.REPLICA_TYPE_LABEL}={constants.REPLICA_TYPE_MASTER.lower()}," - f"{constants.REPLICA_INDEX_LABEL}=0", - LIST_RESPONSE, - ), - ( - "invalid flow with TimeoutError", - { - "name": TEST_NAME, - "namespace": TIMEOUT, - }, - "Label not relevant", - TimeoutError, - ), - ( - "invalid flow with RuntimeError", - { - "name": TEST_NAME, - "namespace": RUNTIME, - }, - "Label not relevant", - RuntimeError, - ), -] - -test_data_wait_for_job_conditions = [ - ( - "timeout waiting for succeeded condition", - { - "name": TEST_NAME, - "namespace": TIMEOUT, - "wait_timeout": 0, - }, - TimeoutError, - ), - ( - "invalid expected condition", - { - "name": TEST_NAME, - "namespace": "value", - "expected_conditions": {"invalid"}, - }, - ValueError, - ), - ( - "invalid expected condition(lowercase)", - { - "name": TEST_NAME, - "namespace": "value", - "expected_conditions": {"succeeded"}, - }, - ValueError, - ), - ( - "job failed unexpectedly", - { - "name": TEST_NAME, - "namespace": RUNTIME, - }, - RuntimeError, - ), - ( - "valid case", - { - "name": TEST_NAME, - "namespace": "test-namespace", - }, - generate_job_with_status(create_job()), - ), - ( - "valid case with specified callback", - { - "name": TEST_NAME, - "namespace": "test-namespace", - "callback": lambda job: "test train function", - }, - generate_job_with_status(create_job()), - ), -] - - -test_data_get_job_pod_names = [ - ( - "valid flow", - { - "name": TEST_NAME, - "namespace": MOCK_POD_OBJ, - }, - [DUMMY_POD_NAME], - ), - ( - "valid flow with no pods available", - { - "name": TEST_NAME, - "namespace": NO_PODS, - }, - [], - ), -] - - -test_data_update_job = [ - ( - "valid flow", - { - "name": TEST_NAME, - "job": create_job(), - }, - "No output", - ), - ( - "invalid job_kind", - { - "name": TEST_NAME, - "job": create_job(), - "job_kind": "invalid_job_kind", - }, - ValueError, - ), - ( - "invalid flow with TimeoutError", - { - "name": TEST_NAME, - "namespace": TIMEOUT, - "job": create_job(), - }, - TimeoutError, - ), - ( - "invalid flow with RuntimeError", - { - "name": TEST_NAME, - "namespace": RUNTIME, - "job": create_job(), - }, - RuntimeError, - ), -] - -test_data_get_job = [ - ( - "valid flow with all parameters set", - { - "name": TEST_NAME, - "namespace": TEST_NAME, - "job_kind": constants.PYTORCHJOB_KIND, - "timeout": 120, - }, - SUCCESS, - ), - ( - "invalid flow with default namespace and a Job that doesn't exist", - {"name": TEST_NAME, "job_kind": constants.TFJOB_KIND}, - RuntimeError, - ), - ( - "invalid flow incorrect parameter", - {"name": TEST_NAME, "test": "example"}, - TypeError, - ), - ( - "invalid flow with incorrect value", - {"name": TEST_NAME, "job_kind": INVALID}, - ValueError, - ), - ( - "runtime error case", - { - "name": TEST_NAME, - "namespace": "runtime", - "job_kind": constants.PYTORCHJOB_KIND, - }, - RuntimeError, - ), - ( - "invalid flow with timeout error", - {"name": TEST_NAME, "namespace": TIMEOUT}, - TimeoutError, - ), -] - -test_data_list_jobs = [ - ( - "valid flow with default namespace and default timeout", - {}, - SUCCESS, - [generate_job_with_status(create_job())], - ), - ( - "valid flow with all parameters set", - { - "namespace": TEST_NAME, - "job_kind": constants.PYTORCHJOB_KIND, - "timeout": 120, - }, - SUCCESS, - [generate_job_with_status(create_job())], - ), - ( - "valid flow with empty job list", - { - "namespace": "empty-namespace", - }, - SUCCESS, - [], - ), - ( - "valid flow with multiple jobs", - { - "namespace": "multi-jobs", - }, - SUCCESS, - [ - generate_job_with_status(create_job()), - generate_job_with_status(create_job()), - ], - ), - ( - "invalid flow with default namespace and a Job that doesn't exist", - {"job_kind": constants.TFJOB_KIND}, - RuntimeError, - None, - ), - ( - "invalid flow with incorrect parameter", - {"test": "example"}, - TypeError, - None, - ), - ( - "invalid flow with incorrect job_kind value", - {"job_kind": "FailJob"}, - ValueError, - None, - ), - ( - "runtime error case", - {"namespace": RUNTIME}, - RuntimeError, - None, - ), - ( - "timeout error case", - {"namespace": TIMEOUT}, - TimeoutError, - None, - ), -] - - -test_data_delete_job = [ - ( - "valid flow with default namespace", - { - "name": TEST_NAME, - }, - SUCCESS, - ), - ( - "invalid extra parameter", - {"name": TEST_NAME, "namespace": TEST_NAME, "example": "test"}, - TypeError, - ), - ( - "invalid job kind", - {"name": TEST_NAME, "job_kind": "invalid_job_kind"}, - RuntimeError, - ), - ( - "job name missing", - {"namespace": TEST_NAME, "job_kind": constants.PYTORCHJOB_KIND}, - TypeError, - ), - ( - "delete_namespaced_custom_object timeout error", - {"name": TEST_NAME, "namespace": TIMEOUT}, - TimeoutError, - ), - ( - "delete_namespaced_custom_object runtime error", - {"name": TEST_NAME, "namespace": RUNTIME}, - RuntimeError, - ), - ( - "valid flow", - { - "name": TEST_NAME, - "namespace": TEST_NAME, - "job_kind": constants.PYTORCHJOB_KIND, - }, - SUCCESS, - ), - ( - "valid flow with delete options", - { - "name": TEST_NAME, - "delete_options": V1DeleteOptions(grace_period_seconds=30), - }, - SUCCESS, - ), -] - - -test_data_get_job_conditions = [ - ( - "valid flow with failed job condition", - {"name": TEST_NAME, "namespace": FAILED}, - generate_job_with_status( - create_job(), condition_type=constants.JOB_CONDITION_FAILED - ), - ), - ( - "valid flow with restarting job condition", - {"name": TEST_NAME, "namespace": RESTARTING}, - generate_job_with_status( - create_job(), condition_type=constants.JOB_CONDITION_RESTARTING - ), - ), - ( - "valid flow with running job condition", - {"name": TEST_NAME, "namespace": RUNNING}, - generate_job_with_status( - create_job(), condition_type=constants.JOB_CONDITION_RUNNING - ), - ), - ( - "valid flow with created job condition", - {"name": TEST_NAME, "namespace": CREATED}, - generate_job_with_status( - create_job(), condition_type=constants.JOB_CONDITION_CREATED - ), - ), - ( - "valid flow with all parameters set", - { - "name": TEST_NAME, - "namespace": TEST_NAME, - "job": create_job(), - "job_kind": constants.PYTORCHJOB_KIND, - "timeout": 120, - }, - generate_job_with_status(create_job()), - ), - ( - "invalid flow with default namespace and a Job that doesn't exist", - {"name": TEST_NAME, "job_kind": constants.TFJOB_KIND}, - RuntimeError, - ), - ( - "invalid flow incorrect parameter", - {"name": TEST_NAME, "test": "example"}, - TypeError, - ), - ( - "invalid flow with incorrect value", - {"name": TEST_NAME, "job_kind": INVALID}, - ValueError, - ), - ( - "runtime error case", - { - "name": TEST_NAME, - "namespace": "runtime", - "job_kind": constants.PYTORCHJOB_KIND, - }, - RuntimeError, - ), - ( - "invalid flow with timeout error", - {"name": TEST_NAME, "namespace": TIMEOUT}, - TimeoutError, - ), -] - - -test_data_is_job_created = [ - ( - "valid flow with all parameters set", - { - "name": TEST_NAME, - "namespace": CREATED, - "job": create_job(), - "job_kind": constants.PYTORCHJOB_KIND, - "timeout": 120, - }, - True, - ), - ( - "invalid flow with default namespace and a Job that doesn't exist", - {"name": TEST_NAME, "job_kind": constants.TFJOB_KIND}, - RuntimeError, - ), - ( - "invalid flow incorrect parameter", - {"name": TEST_NAME, "test": "example"}, - TypeError, - ), - ( - "invalid flow with incorrect value", - {"name": TEST_NAME, "job_kind": INVALID}, - ValueError, - ), - ( - "runtime error case", - { - "name": TEST_NAME, - "namespace": "runtime", - "job_kind": constants.PYTORCHJOB_KIND, - }, - RuntimeError, - ), - ( - "invalid flow with timeout error", - {"name": TEST_NAME, "namespace": TIMEOUT}, - TimeoutError, - ), -] - - -test_data_is_job_running = [ - ( - "valid flow with job that is running", - {"name": TEST_NAME, "namespace": RUNNING}, - True, - ), - ( - "valid flow with all parameters set", - { - "name": TEST_NAME, - "namespace": RUNNING, - "job": create_job(), - "job_kind": constants.PYTORCHJOB_KIND, - "timeout": 120, - }, - True, - ), - ( - "invalid flow with default namespace and a Job that doesn't exist", - {"name": TEST_NAME, "job_kind": constants.TFJOB_KIND}, - RuntimeError, - ), - ( - "invalid flow incorrect parameter", - {"name": TEST_NAME, "test": "example"}, - TypeError, - ), - ( - "invalid flow with incorrect value", - {"name": TEST_NAME, "job_kind": INVALID}, - ValueError, - ), - ( - "runtime error case", - { - "name": TEST_NAME, - "namespace": "runtime", - "job_kind": constants.PYTORCHJOB_KIND, - }, - RuntimeError, - ), - ( - "invalid flow with timeout error", - {"name": TEST_NAME, "namespace": TIMEOUT}, - TimeoutError, - ), -] - - -test_data_is_job_restarting = [ - ( - "valid flow with job that is restarting", - {"name": TEST_NAME, "namespace": RESTARTING}, - True, - ), - ( - "valid flow with all parameters set", - { - "name": TEST_NAME, - "namespace": RESTARTING, - "job": create_job(), - "job_kind": constants.PYTORCHJOB_KIND, - "timeout": 120, - }, - True, - ), - ( - "invalid flow with default namespace and a Job that doesn't exist", - {"name": TEST_NAME, "job_kind": constants.TFJOB_KIND}, - RuntimeError, - ), - ( - "invalid flow incorrect parameter", - {"name": TEST_NAME, "test": "example"}, - TypeError, - ), - ( - "invalid flow with incorrect value", - {"name": TEST_NAME, "job_kind": INVALID}, - ValueError, - ), - ( - "runtime error case", - { - "name": TEST_NAME, - "namespace": "runtime", - "job_kind": constants.PYTORCHJOB_KIND, - }, - RuntimeError, - ), - ( - "invalid flow with timeout error", - {"name": TEST_NAME, "namespace": TIMEOUT}, - TimeoutError, - ), -] - - -test_data_is_job_failed = [ - ( - "valid flow with job that is failed", - {"name": TEST_NAME, "namespace": FAILED}, - True, - ), - ( - "valid flow with all parameters set", - { - "name": TEST_NAME, - "namespace": FAILED, - "job": create_job(), - "job_kind": constants.PYTORCHJOB_KIND, - "timeout": 120, - }, - True, - ), - ( - "invalid flow with default namespace and a Job that doesn't exist", - {"name": TEST_NAME, "job_kind": constants.TFJOB_KIND}, - RuntimeError, - ), - ( - "invalid flow incorrect parameter", - {"name": TEST_NAME, "test": "example"}, - TypeError, - ), - ( - "invalid flow with incorrect value", - {"name": TEST_NAME, "job_kind": INVALID}, - ValueError, - ), - ( - "runtime error case", - { - "name": TEST_NAME, - "namespace": "runtime", - "job_kind": constants.PYTORCHJOB_KIND, - }, - RuntimeError, - ), - ( - "invalid flow with timeout error", - {"name": TEST_NAME, "namespace": TIMEOUT}, - TimeoutError, - ), -] - - -test_data_is_job_succeded = [ - ( - "valid flow with all parameters set", - { - "name": TEST_NAME, - "namespace": SUCCEEDED, - "job": create_job(), - "job_kind": constants.PYTORCHJOB_KIND, - "timeout": 120, - }, - True, - ), - ( - "invalid flow with default namespace and a Job that doesn't exist", - {"name": TEST_NAME, "job_kind": constants.TFJOB_KIND}, - RuntimeError, - ), - ( - "invalid flow incorrect parameter", - {"name": TEST_NAME, "test": "example"}, - TypeError, - ), - ( - "invalid flow with incorrect value", - {"name": TEST_NAME, "job_kind": INVALID}, - ValueError, - ), - ( - "runtime error case", - { - "name": TEST_NAME, - "namespace": "runtime", - "job_kind": constants.PYTORCHJOB_KIND, - }, - RuntimeError, - ), - ( - "invalid flow with timeout error", - {"name": TEST_NAME, "namespace": TIMEOUT}, - TimeoutError, - ), -] - -test_data_get_job_logs = [ - # Basic cases - ( - "valid flow with default parameters", - { - "name": TEST_NAME, - }, - {DUMMY_POD_NAME: "test log content"}, - {}, - SUCCESS, - ), - ( - "pod with pending status", - { - "name": TEST_NAME, - "namespace": PENDING_POD, - }, - {}, # No logs expected for pending pods - {}, - SUCCESS, - ), - ( - "pod with pending status and follow", - { - "name": TEST_NAME, - "namespace": PENDING_POD, - "follow": True, - }, - {}, # No logs expected for pending pods - {}, - SUCCESS, - ), - ( - "pod with no status", - { - "name": TEST_NAME, - "namespace": NO_STATUS_POD, - }, - {}, # No logs expected - {}, - SUCCESS, - ), - ( - "pod with no status and follow", - { - "name": TEST_NAME, - "namespace": NO_STATUS_POD, - "follow": True, - }, - {}, # No logs expected - {}, - SUCCESS, - ), - ( - "valid flow with logs and verbose", - { - "name": TEST_NAME, - "namespace": TEST_NAME, - "verbose": True, - }, - {DUMMY_POD_NAME: "test log content"}, - { - f"{constants.PYTORCHJOB_KIND.lower()}/{TEST_NAME}": [ - f"{EVENT_CREATION_TIMESTAMP.strftime('%Y-%m-%d %H:%M:%S')} PyTorchJob Event 1" - ], - f"{constants.POD_KIND.lower()}/{DUMMY_POD_NAME}": [ - f"{EVENT_CREATION_TIMESTAMP.strftime('%Y-%m-%d %H:%M:%S')} Pod Event 1" - ], - }, - SUCCESS, - ), - ( - "valid flow with worker logs", - { - "name": TEST_NAME, - "namespace": TEST_NAME, - "is_master": False, - "replica_type": constants.REPLICA_TYPE_WORKER.lower(), - "replica_index": 0, - }, - {DUMMY_POD_NAME: "test log content"}, - {}, - SUCCESS, - ), - # Streaming cases - ( - "valid flow with follow logs", - { - "name": TEST_NAME, - "follow": True, - }, - { - DUMMY_POD_NAME: ( - "line 1 of pod logs" "line 2 of pod logs" "line 3 of pod logs" - ) - }, - {}, - SUCCESS, - ), - ( - "valid flow with follow logs and multiple pods", - { - "name": TEST_NAME, - "namespace": MULTI_PODS, - "follow": True, - }, - { - "pod-0": ("line 1 of pod logs" "line 2 of pod logs" "line 3 of pod logs"), - "pod-1": ("line 1 of pod logs" "line 2 of pod logs" "line 3 of pod logs"), - "pod-2": ("line 1 of pod logs" "line 2 of pod logs" "line 3 of pod logs"), - }, - {}, - SUCCESS, - ), - ( - "follow logs with queue empty", - { - "name": TEST_NAME, - "namespace": QUEUE_EMPTY, - "follow": True, - }, - {}, - {}, - SUCCESS, - ), - # Error cases - ( - "invalid replica type", - { - "name": TEST_NAME, - "namespace": INVALID, - "replica_type": "invalid_replica", - }, - None, - None, - ValueError, - ), - ( - "timeout error when getting pods", - { - "name": TEST_NAME, - "namespace": f"pod {TIMEOUT}", - }, - None, - None, - TimeoutError, - ), - ( - "runtime error when getting pods", - { - "name": TEST_NAME, - "namespace": f"pod {RUNTIME}", - }, - None, - None, - RuntimeError, - ), - ( - "exception when reading logs with follow", - { - "name": TEST_NAME, - "namespace": FAIL_LOGS, - "follow": True, - }, - None, - None, - Exception, - ), - ( - "runtime error when reading logs", - { - "name": TEST_NAME, - "namespace": FAIL_LOGS, - }, - None, - None, - RuntimeError, - ), - ( - "exception when reading events", - { - "name": TEST_NAME, - "namespace": FAIL_EVENTS, - "verbose": True, - }, - None, - None, - Exception, - ), - ( - "timeout error when getting job", - { - "name": TEST_NAME, - "namespace": f"job {TIMEOUT}", - "verbose": True, - }, - None, - None, - TimeoutError, - ), - ( - "runtime error when getting job", - { - "name": TEST_NAME, - "namespace": f"job {RUNTIME}", - "verbose": True, - }, - None, - None, - RuntimeError, - ), - ( - "follow logs with queue timeout", - { - "name": TEST_NAME, - "namespace": QUEUE_TIMEOUT, - "follow": True, - }, - {}, # Empty logs due to timeout - {}, - TimeoutError, - ), -] - - -@pytest.fixture -def training_client(request): - mock_get_job_and_job_pods = ( - request.param.get("mock_get_job_and_job_pods", False) - if hasattr(request, "param") - else False - ) - with patch( - "kubernetes.client.CustomObjectsApi", - return_value=Mock( - create_namespaced_custom_object=Mock(side_effect=conditional_error_handler), - patch_namespaced_custom_object=Mock(side_effect=conditional_error_handler), - delete_namespaced_custom_object=Mock(side_effect=conditional_error_handler), - get_namespaced_custom_object=Mock( - side_effect=get_namespaced_custom_object_response - ), - list_namespaced_custom_object=Mock( - side_effect=list_namespaced_custom_object_response - ), - ), - ), patch( - "kubernetes.client.CoreV1Api", - return_value=Mock( - list_namespaced_pod=Mock(side_effect=list_namespaced_pod_response), - read_namespaced_pod_log=Mock(side_effect=mock_read_namespaced_pod_log), - list_namespaced_event=Mock(side_effect=mock_list_namespaced_event), - ), - ), patch( - "kubernetes.config.load_kube_config", return_value=Mock() - ), patch( - "kubernetes.watch.Watch", - return_value=Mock( - stream=Mock(side_effect=mock_watch), - ), - ), patch( - "kubeflow.training.utils.utils.get_log_queue_pool", - side_effect=mock_get_log_queue_pool, - ): - client = TrainingClient(job_kind=constants.PYTORCHJOB_KIND) - if mock_get_job_and_job_pods: - client.get_job_pods = Mock(side_effect=mock_get_job_pods) - client.get_job = Mock(side_effect=mock_get_job) - yield client - - -@pytest.mark.parametrize( - "test_name,kwargs,expected_output,expected_job", test_data_create_job -) -def test_create_job(training_client, test_name, kwargs, expected_output, expected_job): - """ - test create_job function of training client - """ - print("Executing test:", test_name) - try: - training_client.create_job(**kwargs) - - assert expected_output == SUCCESS - - training_client.custom_api.create_namespaced_custom_object.assert_called_with( - constants.GROUP, - constants.VERSION, - kwargs["namespace"], - constants.JOB_PARAMETERS[constants.PYTORCHJOB_KIND]["plural"], - expected_job, - ) - except Exception as e: - assert type(e) is expected_output - print("test execution complete") - - -@pytest.mark.parametrize( - "test_name,kwargs,expected_label_selector,expected_output", - test_data_get_job_pods, -) -def test_get_job_pods( - training_client, test_name, kwargs, expected_label_selector, expected_output -): - """ - test get_job_pods function of training client - """ - print("Executing test:", test_name) - try: - out = training_client.get_job_pods(**kwargs) - # Verify that list_namespaced_pod called with specified arguments - training_client.core_api.list_namespaced_pod.assert_called_with( - kwargs.get("namespace", constants.DEFAULT_NAMESPACE), - label_selector=expected_label_selector, - async_req=True, - ) - assert out[0].pop(TIMEOUT) == kwargs.get(TIMEOUT, constants.DEFAULT_TIMEOUT) - assert out == expected_output - except Exception as e: - assert type(e) is expected_output - print("test execution complete") - - -@pytest.mark.parametrize( - "test_name,kwargs,expected_output", - test_data_get_job_pod_names, -) -def test_get_job_pod_names(training_client, test_name, kwargs, expected_output): - """ - test get_job_pod_names function of training client - """ - print("Executing test:", test_name) - out = training_client.get_job_pod_names(**kwargs) - assert out == expected_output - print("test execution complete") - - -@pytest.mark.parametrize("test_name,kwargs,expected_output", test_data_update_job) -def test_update_job(training_client, test_name, kwargs, expected_output): - """ - test update_job function of training client - """ - print("Executing test:", test_name) - try: - training_client.update_job(**kwargs) - training_client.custom_api.patch_namespaced_custom_object.assert_called_with( - constants.GROUP, - constants.VERSION, - kwargs.get("namespace", constants.DEFAULT_NAMESPACE), - constants.JOB_PARAMETERS[kwargs.get("job_kind", training_client.job_kind)][ - "plural" - ], - kwargs.get("name"), - kwargs.get("job"), - ) - except Exception as e: - assert type(e) is expected_output - print("test execution complete") - - -@pytest.mark.parametrize( - "test_name,kwargs,expected_output", test_data_wait_for_job_conditions -) -def test_wait_for_job_conditions(training_client, test_name, kwargs, expected_output): - """ - test wait_for_job_conditions function of training client - """ - print("Executing test:", test_name) - try: - out = training_client.wait_for_job_conditions(**kwargs) - assert out == expected_output - except Exception as e: - assert type(e) is expected_output - print("test execution complete") - - -@pytest.mark.parametrize("test_name,kwargs,expected_output", test_data_delete_job) -def test_delete_job(training_client, test_name, kwargs, expected_output): - """ - test delete_job function of training client - """ - print("Executing test: ", test_name) - try: - training_client.delete_job(**kwargs) - assert expected_output == SUCCESS - except Exception as e: - assert type(e) is expected_output - - print("test execution complete") - - -@pytest.mark.parametrize("test_name,kwargs,expected_output", test_data_get_job) -def test_get_job(training_client, test_name, kwargs, expected_output): - """ - test get_job function of training client - """ - print("Executing test: ", test_name) - - try: - training_client.get_job(**kwargs) - assert expected_output == SUCCESS - except Exception as e: - assert type(e) is expected_output - - print("test execution complete") - - -@pytest.mark.parametrize( - "test_name,kwargs,expected_output", test_data_get_job_conditions -) -def test_get_job_conditions(training_client, test_name, kwargs, expected_output): - """ - test get_job_conditions function of training client - """ - print("Executing test: ", test_name) - - try: - training_client.get_job_conditions(**kwargs) - if kwargs.get("namespace") is TEST_NAME: - assert expected_output == generate_job_with_status(create_job()) - else: - assert expected_output == generate_job_with_status( - create_job(), condition_type=kwargs.get("namespace") - ) - except Exception as e: - assert type(e) is expected_output - - print("test execution complete") - - -@pytest.mark.parametrize("test_name,kwargs,expected_output", test_data_is_job_created) -def test_is_job_created(training_client, test_name, kwargs, expected_output): - """ - test is_job_created function of training client - """ - print("Executing test: ", test_name) - - try: - training_client.is_job_created(**kwargs) - if kwargs.get("namespace") is not (CREATED or RUNTIME or TIMEOUT): - assert expected_output is False - else: - assert expected_output is True - except Exception as e: - assert type(e) is expected_output - - print("test execution complete") - - -@pytest.mark.parametrize("test_name,kwargs,expected_output", test_data_is_job_running) -def test_is_job_running(training_client, test_name, kwargs, expected_output): - """ - test is_job_running function of training client - """ - print("Executing test: ", test_name) - - try: - training_client.is_job_running(**kwargs) - if kwargs.get("namespace") is not (RUNNING or RUNTIME or TIMEOUT): - assert expected_output is False - else: - assert expected_output is True - except Exception as e: - assert type(e) is expected_output - - print("test execution complete") - - -@pytest.mark.parametrize( - "test_name,kwargs,expected_output", test_data_is_job_restarting -) -def test_is_job_restarting(training_client, test_name, kwargs, expected_output): - """ - test is_is_job_restarting function of training client - """ - print("Executing test: ", test_name) - - try: - training_client.is_job_restarting(**kwargs) - if kwargs.get("namespace") is not (RESTARTING or RUNTIME or TIMEOUT): - assert expected_output is False - else: - assert expected_output is True - except Exception as e: - assert type(e) is expected_output - - print("test execution complete") - - -@pytest.mark.parametrize("test_name,kwargs,expected_output", test_data_is_job_failed) -def test_is_job_failed(training_client, test_name, kwargs, expected_output): - """ - test is_is_job_failed function of training client - """ - print("Executing test: ", test_name) - - try: - training_client.is_job_failed(**kwargs) - if kwargs.get("namespace") is not (FAILED or RUNTIME or TIMEOUT): - assert expected_output is False - else: - assert expected_output is True - except Exception as e: - assert type(e) is expected_output - - print("test execution complete") - - -@pytest.mark.parametrize("test_name,kwargs,expected_output", test_data_is_job_succeded) -def test_is_job_succeeded(training_client, test_name, kwargs, expected_output): - """ - test is_job_succeeded function of training client - """ - print("Executing test: ", test_name) - - try: - training_client.is_job_succeeded(**kwargs) - if kwargs.get("namespace") is not (SUCCEEDED or RUNTIME or TIMEOUT): - assert expected_output is False - else: - assert expected_output is True - except Exception as e: - assert type(e) is expected_output - - print("test execution complete") - - -@pytest.mark.parametrize( - "test_name,kwargs,expected_status,expected_response", test_data_list_jobs -) -def test_list_jobs( - training_client, test_name, kwargs, expected_status, expected_response -): - """ - test list_jobs function of training client - """ - print("Executing test: ", test_name) - try: - resp = training_client.list_jobs(**kwargs) - assert expected_status == SUCCESS - assert isinstance(resp, list) - assert len(resp) == len(expected_response) - for actual_job, expected_job in zip(resp, expected_response): - assert actual_job.to_dict() == expected_job.to_dict() - except Exception as e: - assert type(e) is expected_status - - print("test execution complete") - - -@pytest.mark.parametrize( - "training_client", [{"mock_get_job_and_job_pods": True}], indirect=True -) -@pytest.mark.parametrize( - "test_name,kwargs,expected_logs,expected_events,expected_output", - test_data_get_job_logs, -) -def test_get_job_logs( - training_client, - test_name, - kwargs, - expected_logs, - expected_events, - expected_output, -): - """ - test get_job_logs function of training client - """ - print("Executing test:", test_name) - - try: - logs_dict, events_dict = training_client.get_job_logs(**kwargs) - - assert expected_output == SUCCESS - assert logs_dict == expected_logs - - if kwargs.get("verbose", False): - assert events_dict == expected_events - else: - assert events_dict == {} - - # Verify API calls - training_client.get_job_pods.assert_called_with( - name=kwargs["name"], - namespace=kwargs.get("namespace", constants.DEFAULT_NAMESPACE), - is_master=kwargs.get("is_master", True), - replica_type=kwargs.get("replica_type"), - replica_index=kwargs.get("replica_index"), - timeout=kwargs.get("timeout", constants.DEFAULT_TIMEOUT), - ) - - if kwargs.get("verbose", False): - training_client.get_job.assert_called_with( - name=kwargs["name"], - namespace=kwargs.get("namespace", constants.DEFAULT_NAMESPACE), - ) - training_client.core_api.list_namespaced_event.assert_called_with( - namespace=kwargs.get("namespace", constants.DEFAULT_NAMESPACE) - ) - - except Exception as e: - assert type(e) is expected_output - - print("test execution complete") diff --git a/sdk/python/kubeflow/training/api_client.py b/sdk/python/kubeflow/training/api_client.py deleted file mode 100644 index 52d67804b0..0000000000 --- a/sdk/python/kubeflow/training/api_client.py +++ /dev/null @@ -1,666 +0,0 @@ -# coding: utf-8 -""" - Kubeflow Training SDK - - Python SDK for Kubeflow Training # noqa: E501 - - The version of the OpenAPI document: v1.7.0 - Generated by: https://openapi-generator.tech -""" - -from __future__ import absolute_import - -import atexit -import datetime -from dateutil.parser import parse -import json -import mimetypes -from multiprocessing.pool import ThreadPool -import os -import re -import tempfile - -# python 2 and python 3 compatibility library -import six -from six.moves.urllib.parse import quote - -from kubeflow.training.configuration import Configuration -import kubeflow.training.models -from kubeflow.training import rest -from kubeflow.training.exceptions import ApiValueError, ApiException - - -class ApiClient(object): - """Generic API client for OpenAPI client library builds. - - OpenAPI generic API client. This client handles the client- - server communication, and is invariant across implementations. Specifics of - the methods and models for each application are generated from the OpenAPI - templates. - - NOTE: This class is auto generated by OpenAPI Generator. - Ref: https://openapi-generator.tech - Do not edit the class manually. - - :param configuration: .Configuration object for this client - :param header_name: a header to pass when making calls to the API. - :param header_value: a header value to pass when making calls to - the API. - :param cookie: a cookie to include in the header when making calls - to the API - :param pool_threads: The number of threads to use for async requests - to the API. More threads means more concurrent API requests. - """ - - PRIMITIVE_TYPES = (float, bool, bytes, six.text_type) + six.integer_types - NATIVE_TYPES_MAPPING = { - 'int': int, - 'long': int if six.PY3 else long, # noqa: F821 - 'float': float, - 'str': str, - 'bool': bool, - 'date': datetime.date, - 'datetime': datetime.datetime, - 'object': object, - } - _pool = None - - def __init__(self, configuration=None, header_name=None, header_value=None, - cookie=None, pool_threads=1): - if configuration is None: - configuration = Configuration.get_default_copy() - self.configuration = configuration - self.pool_threads = pool_threads - - self.rest_client = rest.RESTClientObject(configuration) - self.default_headers = {} - if header_name is not None: - self.default_headers[header_name] = header_value - self.cookie = cookie - # Set default User-Agent. - self.user_agent = 'OpenAPI-Generator/1.7.0/python' - self.client_side_validation = configuration.client_side_validation - - def __enter__(self): - return self - - def __exit__(self, exc_type, exc_value, traceback): - self.close() - - def close(self): - if self._pool: - self._pool.close() - self._pool.join() - self._pool = None - if hasattr(atexit, 'unregister'): - atexit.unregister(self.close) - - @property - def pool(self): - """Create thread pool on first request - avoids instantiating unused threadpool for blocking clients. - """ - if self._pool is None: - atexit.register(self.close) - self._pool = ThreadPool(self.pool_threads) - return self._pool - - @property - def user_agent(self): - """User agent for this API client""" - return self.default_headers['User-Agent'] - - @user_agent.setter - def user_agent(self, value): - self.default_headers['User-Agent'] = value - - def set_default_header(self, header_name, header_value): - self.default_headers[header_name] = header_value - - def __call_api( - self, resource_path, method, path_params=None, - query_params=None, header_params=None, body=None, post_params=None, - files=None, response_type=None, auth_settings=None, - _return_http_data_only=None, collection_formats=None, - _preload_content=True, _request_timeout=None, _host=None): - - config = self.configuration - - # header parameters - header_params = header_params or {} - header_params.update(self.default_headers) - if self.cookie: - header_params['Cookie'] = self.cookie - if header_params: - header_params = self.sanitize_for_serialization(header_params) - header_params = dict(self.parameters_to_tuples(header_params, - collection_formats)) - - # path parameters - if path_params: - path_params = self.sanitize_for_serialization(path_params) - path_params = self.parameters_to_tuples(path_params, - collection_formats) - for k, v in path_params: - # specified safe chars, encode everything - resource_path = resource_path.replace( - '{%s}' % k, - quote(str(v), safe=config.safe_chars_for_path_param) - ) - - # query parameters - if query_params: - query_params = self.sanitize_for_serialization(query_params) - query_params = self.parameters_to_tuples(query_params, - collection_formats) - - # post parameters - if post_params or files: - post_params = post_params if post_params else [] - post_params = self.sanitize_for_serialization(post_params) - post_params = self.parameters_to_tuples(post_params, - collection_formats) - post_params.extend(self.files_parameters(files)) - - # auth setting - self.update_params_for_auth(header_params, query_params, auth_settings) - - # body - if body: - body = self.sanitize_for_serialization(body) - - # request url - if _host is None: - url = self.configuration.host + resource_path - else: - # use server/host defined in path or operation instead - url = _host + resource_path - - try: - # perform request and return response - response_data = self.request( - method, url, query_params=query_params, headers=header_params, - post_params=post_params, body=body, - _preload_content=_preload_content, - _request_timeout=_request_timeout) - except ApiException as e: - e.body = e.body.decode('utf-8') if six.PY3 else e.body - raise e - - content_type = response_data.getheader('content-type') - - self.last_response = response_data - - return_data = response_data - - if not _preload_content: - return return_data - - if six.PY3 and response_type not in ["file", "bytes"]: - match = None - if content_type is not None: - match = re.search(r"charset=([a-zA-Z\-\d]+)[\s\;]?", content_type) - encoding = match.group(1) if match else "utf-8" - response_data.data = response_data.data.decode(encoding) - - # deserialize response data - if response_type: - return_data = self.deserialize(response_data, response_type) - else: - return_data = None - - if _return_http_data_only: - return (return_data) - else: - return (return_data, response_data.status, - response_data.getheaders()) - - def sanitize_for_serialization(self, obj): - """Builds a JSON POST object. - - If obj is None, return None. - If obj is str, int, long, float, bool, return directly. - If obj is datetime.datetime, datetime.date - convert to string in iso8601 format. - If obj is list, sanitize each element in the list. - If obj is dict, return the dict. - If obj is OpenAPI model, return the properties dict. - - :param obj: The data to serialize. - :return: The serialized form of data. - """ - if obj is None: - return None - elif isinstance(obj, self.PRIMITIVE_TYPES): - return obj - elif isinstance(obj, list): - return [self.sanitize_for_serialization(sub_obj) - for sub_obj in obj] - elif isinstance(obj, tuple): - return tuple(self.sanitize_for_serialization(sub_obj) - for sub_obj in obj) - elif isinstance(obj, (datetime.datetime, datetime.date)): - return obj.isoformat() - - if isinstance(obj, dict): - obj_dict = obj - else: - # Convert model obj to dict except - # attributes `openapi_types`, `attribute_map` - # and attributes which value is not None. - # Convert attribute name to json key in - # model definition for request. - obj_dict = {obj.attribute_map[attr]: getattr(obj, attr) - for attr, _ in six.iteritems(obj.openapi_types) - if getattr(obj, attr) is not None} - - return {key: self.sanitize_for_serialization(val) - for key, val in six.iteritems(obj_dict)} - - def deserialize(self, response, response_type): - """Deserializes response into an object. - - :param response: RESTResponse object to be deserialized. - :param response_type: class literal for - deserialized object, or string of class name. - - :return: deserialized object. - """ - # handle file downloading - # save response body into a tmp file and return the instance - if response_type == "file": - return self.__deserialize_file(response) - - # fetch data from response object - try: - data = json.loads(response.data) - except ValueError: - data = response.data - - return self.__deserialize(data, response_type) - - def __deserialize(self, data, klass): - """Deserializes dict, list, str into an object. - - :param data: dict, list or str. - :param klass: class literal, or string of class name. - - :return: object. - """ - if data is None: - return None - - if type(klass) == str: - if klass.startswith('list['): - sub_kls = re.match(r'list\[(.*)\]', klass).group(1) - return [self.__deserialize(sub_data, sub_kls) - for sub_data in data] - - if klass.startswith('dict('): - sub_kls = re.match(r'dict\(([^,]*), (.*)\)', klass).group(2) - return {k: self.__deserialize(v, sub_kls) - for k, v in six.iteritems(data)} - - # convert str to class - if klass in self.NATIVE_TYPES_MAPPING: - klass = self.NATIVE_TYPES_MAPPING[klass] - else: - klass = getattr(kubeflow.training.models, klass) - - if klass in self.PRIMITIVE_TYPES: - return self.__deserialize_primitive(data, klass) - elif klass == object: - return self.__deserialize_object(data) - elif klass == datetime.date: - return self.__deserialize_date(data) - elif klass == datetime.datetime: - return self.__deserialize_datetime(data) - else: - return self.__deserialize_model(data, klass) - - def call_api(self, resource_path, method, - path_params=None, query_params=None, header_params=None, - body=None, post_params=None, files=None, - response_type=None, auth_settings=None, async_req=None, - _return_http_data_only=None, collection_formats=None, - _preload_content=True, _request_timeout=None, _host=None): - """Makes the HTTP request (synchronous) and returns deserialized data. - - To make an async_req request, set the async_req parameter. - - :param resource_path: Path to method endpoint. - :param method: Method to call. - :param path_params: Path parameters in the url. - :param query_params: Query parameters in the url. - :param header_params: Header parameters to be - placed in the request header. - :param body: Request body. - :param post_params dict: Request post form parameters, - for `application/x-www-form-urlencoded`, `multipart/form-data`. - :param auth_settings list: Auth Settings names for the request. - :param response: Response data type. - :param files dict: key -> filename, value -> filepath, - for `multipart/form-data`. - :param async_req bool: execute request asynchronously - :param _return_http_data_only: response data without head status code - and headers - :param collection_formats: dict of collection formats for path, query, - header, and post parameters. - :param _preload_content: if False, the urllib3.HTTPResponse object will - be returned without reading/decoding response - data. Default is True. - :param _request_timeout: timeout setting for this request. If one - number provided, it will be total request - timeout. It can also be a pair (tuple) of - (connection, read) timeouts. - :return: - If async_req parameter is True, - the request will be called asynchronously. - The method will return the request thread. - If parameter async_req is False or missing, - then the method will return the response directly. - """ - if not async_req: - return self.__call_api(resource_path, method, - path_params, query_params, header_params, - body, post_params, files, - response_type, auth_settings, - _return_http_data_only, collection_formats, - _preload_content, _request_timeout, _host) - - return self.pool.apply_async(self.__call_api, (resource_path, - method, path_params, - query_params, - header_params, body, - post_params, files, - response_type, - auth_settings, - _return_http_data_only, - collection_formats, - _preload_content, - _request_timeout, - _host)) - - def request(self, method, url, query_params=None, headers=None, - post_params=None, body=None, _preload_content=True, - _request_timeout=None): - """Makes the HTTP request using RESTClient.""" - if method == "GET": - return self.rest_client.GET(url, - query_params=query_params, - _preload_content=_preload_content, - _request_timeout=_request_timeout, - headers=headers) - elif method == "HEAD": - return self.rest_client.HEAD(url, - query_params=query_params, - _preload_content=_preload_content, - _request_timeout=_request_timeout, - headers=headers) - elif method == "OPTIONS": - return self.rest_client.OPTIONS(url, - query_params=query_params, - headers=headers, - _preload_content=_preload_content, - _request_timeout=_request_timeout) - elif method == "POST": - return self.rest_client.POST(url, - query_params=query_params, - headers=headers, - post_params=post_params, - _preload_content=_preload_content, - _request_timeout=_request_timeout, - body=body) - elif method == "PUT": - return self.rest_client.PUT(url, - query_params=query_params, - headers=headers, - post_params=post_params, - _preload_content=_preload_content, - _request_timeout=_request_timeout, - body=body) - elif method == "PATCH": - return self.rest_client.PATCH(url, - query_params=query_params, - headers=headers, - post_params=post_params, - _preload_content=_preload_content, - _request_timeout=_request_timeout, - body=body) - elif method == "DELETE": - return self.rest_client.DELETE(url, - query_params=query_params, - headers=headers, - _preload_content=_preload_content, - _request_timeout=_request_timeout, - body=body) - else: - raise ApiValueError( - "http method must be `GET`, `HEAD`, `OPTIONS`," - " `POST`, `PATCH`, `PUT` or `DELETE`." - ) - - def parameters_to_tuples(self, params, collection_formats): - """Get parameters as list of tuples, formatting collections. - - :param params: Parameters as dict or list of two-tuples - :param dict collection_formats: Parameter collection formats - :return: Parameters as list of tuples, collections formatted - """ - new_params = [] - if collection_formats is None: - collection_formats = {} - for k, v in six.iteritems(params) if isinstance(params, dict) else params: # noqa: E501 - if k in collection_formats: - collection_format = collection_formats[k] - if collection_format == 'multi': - new_params.extend((k, value) for value in v) - else: - if collection_format == 'ssv': - delimiter = ' ' - elif collection_format == 'tsv': - delimiter = '\t' - elif collection_format == 'pipes': - delimiter = '|' - else: # csv is the default - delimiter = ',' - new_params.append( - (k, delimiter.join(str(value) for value in v))) - else: - new_params.append((k, v)) - return new_params - - def files_parameters(self, files=None): - """Builds form parameters. - - :param files: File parameters. - :return: Form parameters with files. - """ - params = [] - - if files: - for k, v in six.iteritems(files): - if not v: - continue - file_names = v if type(v) is list else [v] - for n in file_names: - with open(n, 'rb') as f: - filename = os.path.basename(f.name) - filedata = f.read() - mimetype = (mimetypes.guess_type(filename)[0] or - 'application/octet-stream') - params.append( - tuple([k, tuple([filename, filedata, mimetype])])) - - return params - - def select_header_accept(self, accepts): - """Returns `Accept` based on an array of accepts provided. - - :param accepts: List of headers. - :return: Accept (e.g. application/json). - """ - if not accepts: - return - - accepts = [x.lower() for x in accepts] - - if 'application/json' in accepts: - return 'application/json' - else: - return ', '.join(accepts) - - def select_header_content_type(self, content_types): - """Returns `Content-Type` based on an array of content_types provided. - - :param content_types: List of content-types. - :return: Content-Type (e.g. application/json). - """ - if not content_types: - return 'application/json' - - content_types = [x.lower() for x in content_types] - - if 'application/json' in content_types or '*/*' in content_types: - return 'application/json' - else: - return content_types[0] - - def update_params_for_auth(self, headers, querys, auth_settings): - """Updates header and query params based on authentication setting. - - :param headers: Header parameters dict to be updated. - :param querys: Query parameters tuple list to be updated. - :param auth_settings: Authentication setting identifiers list. - """ - if not auth_settings: - return - - for auth in auth_settings: - auth_setting = self.configuration.auth_settings().get(auth) - if auth_setting: - if auth_setting['in'] == 'cookie': - headers['Cookie'] = auth_setting['value'] - elif auth_setting['in'] == 'header': - headers[auth_setting['key']] = auth_setting['value'] - elif auth_setting['in'] == 'query': - querys.append((auth_setting['key'], auth_setting['value'])) - else: - raise ApiValueError( - 'Authentication token must be in `query` or `header`' - ) - - def __deserialize_file(self, response): - """Deserializes body to file - - Saves response body into a file in a temporary folder, - using the filename from the `Content-Disposition` header if provided. - - :param response: RESTResponse. - :return: file path. - """ - fd, path = tempfile.mkstemp(dir=self.configuration.temp_folder_path) - os.close(fd) - os.remove(path) - - content_disposition = response.getheader("Content-Disposition") - if content_disposition: - filename = re.search(r'filename=[\'"]?([^\'"\s]+)[\'"]?', - content_disposition).group(1) - path = os.path.join(os.path.dirname(path), filename) - - with open(path, "wb") as f: - f.write(response.data) - - return path - - def __deserialize_primitive(self, data, klass): - """Deserializes string to primitive type. - - :param data: str. - :param klass: class literal. - - :return: int, long, float, str, bool. - """ - try: - return klass(data) - except UnicodeEncodeError: - return six.text_type(data) - except TypeError: - return data - - def __deserialize_object(self, value): - """Return an original value. - - :return: object. - """ - return value - - def __deserialize_date(self, string): - """Deserializes string to date. - - :param string: str. - :return: date. - """ - try: - return parse(string).date() - except ImportError: - return string - except ValueError: - raise rest.ApiException( - status=0, - reason="Failed to parse `{0}` as date object".format(string) - ) - - def __deserialize_datetime(self, string): - """Deserializes string to datetime. - - The string should be in iso8601 datetime format. - - :param string: str. - :return: datetime. - """ - try: - return parse(string) - except ImportError: - return string - except ValueError: - raise rest.ApiException( - status=0, - reason=( - "Failed to parse `{0}` as datetime object" - .format(string) - ) - ) - - def __deserialize_model(self, data, klass): - """Deserializes list or dict to model. - - :param data: dict, list. - :param klass: class literal. - :return: model object. - """ - has_discriminator = False - if (hasattr(klass, 'get_real_child_model') - and klass.discriminator_value_class_map): - has_discriminator = True - - if not klass.openapi_types and has_discriminator is False: - return data - - kwargs = {} - if (data is not None and - klass.openapi_types is not None and - isinstance(data, (list, dict))): - for attr, attr_type in six.iteritems(klass.openapi_types): - if klass.attribute_map[attr] in data: - value = data[klass.attribute_map[attr]] - kwargs[attr] = self.__deserialize(value, attr_type) - - instance = klass(**kwargs) - - if has_discriminator: - klass_name = instance.get_real_child_model(data) - if klass_name: - instance = self.__deserialize(data, klass_name) - return instance diff --git a/sdk/python/kubeflow/training/configuration.py b/sdk/python/kubeflow/training/configuration.py deleted file mode 100644 index 123d843fef..0000000000 --- a/sdk/python/kubeflow/training/configuration.py +++ /dev/null @@ -1,376 +0,0 @@ -# coding: utf-8 - -""" - Kubeflow Training SDK - - Python SDK for Kubeflow Training # noqa: E501 - - The version of the OpenAPI document: v1.7.0 - Generated by: https://openapi-generator.tech -""" - - -from __future__ import absolute_import - -import copy -import logging -import multiprocessing -import sys -import urllib3 - -import six -from six.moves import http_client as httplib - - -class Configuration(object): - """NOTE: This class is auto generated by OpenAPI Generator - - Ref: https://openapi-generator.tech - Do not edit the class manually. - - :param host: Base url - :param api_key: Dict to store API key(s). - Each entry in the dict specifies an API key. - The dict key is the name of the security scheme in the OAS specification. - The dict value is the API key secret. - :param api_key_prefix: Dict to store API prefix (e.g. Bearer) - The dict key is the name of the security scheme in the OAS specification. - The dict value is an API key prefix when generating the auth data. - :param username: Username for HTTP basic authentication - :param password: Password for HTTP basic authentication - :param discard_unknown_keys: Boolean value indicating whether to discard - unknown properties. A server may send a response that includes additional - properties that are not known by the client in the following scenarios: - 1. The OpenAPI document is incomplete, i.e. it does not match the server - implementation. - 2. The client was generated using an older version of the OpenAPI document - and the server has been upgraded since then. - If a schema in the OpenAPI document defines the additionalProperties attribute, - then all undeclared properties received by the server are injected into the - additional properties map. In that case, there are undeclared properties, and - nothing to discard. - - """ - - _default = None - - def __init__(self, host="http://localhost", - api_key=None, api_key_prefix=None, - username=None, password=None, - discard_unknown_keys=False, - ): - """Constructor - """ - self.host = host - """Default Base url - """ - self.temp_folder_path = None - """Temp file folder for downloading files - """ - # Authentication Settings - self.api_key = {} - if api_key: - self.api_key = api_key - """dict to store API key(s) - """ - self.api_key_prefix = {} - if api_key_prefix: - self.api_key_prefix = api_key_prefix - """dict to store API prefix (e.g. Bearer) - """ - self.refresh_api_key_hook = None - """function hook to refresh API key if expired - """ - self.username = username - """Username for HTTP basic authentication - """ - self.password = password - """Password for HTTP basic authentication - """ - self.discard_unknown_keys = discard_unknown_keys - self.logger = {} - """Logging Settings - """ - self.logger["package_logger"] = logging.getLogger("kubeflow.training") - self.logger["urllib3_logger"] = logging.getLogger("urllib3") - self.logger_format = '%(asctime)s %(levelname)s %(message)s' - """Log format - """ - self.logger_stream_handler = None - """Log stream handler - """ - self.logger_file_handler = None - """Log file handler - """ - self.logger_file = None - """Debug file location - """ - self.debug = False - """Debug switch - """ - - self.verify_ssl = True - """SSL/TLS verification - Set this to false to skip verifying SSL certificate when calling API - from https server. - """ - self.ssl_ca_cert = None - """Set this to customize the certificate file to verify the peer. - """ - self.cert_file = None - """client certificate file - """ - self.key_file = None - """client key file - """ - self.assert_hostname = None - """Set this to True/False to enable/disable SSL hostname verification. - """ - - self.connection_pool_maxsize = multiprocessing.cpu_count() * 5 - """urllib3 connection pool's maximum number of connections saved - per pool. urllib3 uses 1 connection as default value, but this is - not the best value when you are making a lot of possibly parallel - requests to the same host, which is often the case here. - cpu_count * 5 is used as default value to increase performance. - """ - - self.proxy = None - """Proxy URL - """ - self.proxy_headers = None - """Proxy headers - """ - self.safe_chars_for_path_param = '' - """Safe chars for path_param - """ - self.retries = None - """Adding retries to override urllib3 default value 3 - """ - # Disable client side validation - self.client_side_validation = True - - def __deepcopy__(self, memo): - cls = self.__class__ - result = cls.__new__(cls) - memo[id(self)] = result - for k, v in self.__dict__.items(): - if k not in ('logger', 'logger_file_handler'): - setattr(result, k, copy.deepcopy(v, memo)) - # shallow copy of loggers - result.logger = copy.copy(self.logger) - # use setters to configure loggers - result.logger_file = self.logger_file - result.debug = self.debug - return result - - def __setattr__(self, name, value): - object.__setattr__(self, name, value) - - @classmethod - def set_default(cls, default): - """Set default instance of configuration. - - It stores default configuration, which can be - returned by get_default_copy method. - - :param default: object of Configuration - """ - cls._default = copy.deepcopy(default) - - @classmethod - def get_default_copy(cls): - """Return new instance of configuration. - - This method returns newly created, based on default constructor, - object of Configuration class or returns a copy of default - configuration passed by the set_default method. - - :return: The configuration object. - """ - if cls._default is not None: - return copy.deepcopy(cls._default) - return Configuration() - - @property - def logger_file(self): - """The logger file. - - If the logger_file is None, then add stream handler and remove file - handler. Otherwise, add file handler and remove stream handler. - - :param value: The logger_file path. - :type: str - """ - return self.__logger_file - - @logger_file.setter - def logger_file(self, value): - """The logger file. - - If the logger_file is None, then add stream handler and remove file - handler. Otherwise, add file handler and remove stream handler. - - :param value: The logger_file path. - :type: str - """ - self.__logger_file = value - if self.__logger_file: - # If set logging file, - # then add file handler and remove stream handler. - self.logger_file_handler = logging.FileHandler(self.__logger_file) - self.logger_file_handler.setFormatter(self.logger_formatter) - for _, logger in six.iteritems(self.logger): - logger.addHandler(self.logger_file_handler) - - @property - def debug(self): - """Debug status - - :param value: The debug status, True or False. - :type: bool - """ - return self.__debug - - @debug.setter - def debug(self, value): - """Debug status - - :param value: The debug status, True or False. - :type: bool - """ - self.__debug = value - if self.__debug: - # if debug status is True, turn on debug logging - for _, logger in six.iteritems(self.logger): - logger.setLevel(logging.DEBUG) - # turn on httplib debug - httplib.HTTPConnection.debuglevel = 1 - else: - # if debug status is False, turn off debug logging, - # setting log level to default `logging.WARNING` - for _, logger in six.iteritems(self.logger): - logger.setLevel(logging.WARNING) - # turn off httplib debug - httplib.HTTPConnection.debuglevel = 0 - - @property - def logger_format(self): - """The logger format. - - The logger_formatter will be updated when sets logger_format. - - :param value: The format string. - :type: str - """ - return self.__logger_format - - @logger_format.setter - def logger_format(self, value): - """The logger format. - - The logger_formatter will be updated when sets logger_format. - - :param value: The format string. - :type: str - """ - self.__logger_format = value - self.logger_formatter = logging.Formatter(self.__logger_format) - - def get_api_key_with_prefix(self, identifier): - """Gets API key (with prefix if set). - - :param identifier: The identifier of apiKey. - :return: The token for api key authentication. - """ - if self.refresh_api_key_hook is not None: - self.refresh_api_key_hook(self) - key = self.api_key.get(identifier) - if key: - prefix = self.api_key_prefix.get(identifier) - if prefix: - return "%s %s" % (prefix, key) - else: - return key - - def get_basic_auth_token(self): - """Gets HTTP basic authentication header (string). - - :return: The token for basic HTTP authentication. - """ - username = "" - if self.username is not None: - username = self.username - password = "" - if self.password is not None: - password = self.password - return urllib3.util.make_headers( - basic_auth=username + ':' + password - ).get('authorization') - - def auth_settings(self): - """Gets Auth Settings dict for api client. - - :return: The Auth Settings information dict. - """ - auth = {} - return auth - - def to_debug_report(self): - """Gets the essential information for debugging. - - :return: The report for debugging. - """ - return "Python SDK Debug Report:\n"\ - "OS: {env}\n"\ - "Python Version: {pyversion}\n"\ - "Version of the API: v1.7.0\n"\ - "SDK Package Version: 1.7.0".\ - format(env=sys.platform, pyversion=sys.version) - - def get_host_settings(self): - """Gets an array of host settings - - :return: An array of host settings - """ - return [ - { - 'url': "/", - 'description': "No description provided", - } - ] - - def get_host_from_settings(self, index, variables=None): - """Gets host URL based on the index and variables - :param index: array index of the host settings - :param variables: hash of variable and the corresponding value - :return: URL based on host settings - """ - variables = {} if variables is None else variables - servers = self.get_host_settings() - - try: - server = servers[index] - except IndexError: - raise ValueError( - "Invalid index {0} when selecting the host settings. " - "Must be less than {1}".format(index, len(servers))) - - url = server['url'] - - # go through variables and replace placeholders - for variable_name, variable in server['variables'].items(): - used_value = variables.get( - variable_name, variable['default_value']) - - if 'enum_values' in variable \ - and used_value not in variable['enum_values']: - raise ValueError( - "The variable `{0}` in the host URL has invalid value " - "{1}. Must be {2}.".format( - variable_name, variables[variable_name], - variable['enum_values'])) - - url = url.replace("{" + variable_name + "}", used_value) - - return url diff --git a/sdk/python/kubeflow/training/constants/__init__.py b/sdk/python/kubeflow/training/constants/__init__.py deleted file mode 100644 index ede60a09ab..0000000000 --- a/sdk/python/kubeflow/training/constants/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright 2019 kubeflow.org. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/sdk/python/kubeflow/training/constants/constants.py b/sdk/python/kubeflow/training/constants/constants.py deleted file mode 100644 index 2a5415ea26..0000000000 --- a/sdk/python/kubeflow/training/constants/constants.py +++ /dev/null @@ -1,209 +0,0 @@ -# Copyright 2021 kubeflow.org. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -from typing import Union - -from kubeflow.storage_initializer.constants import INIT_CONTAINER_MOUNT_PATH -from kubeflow.training import models - -# How long to wait in seconds for requests to the Kubernetes API Server. -DEFAULT_TIMEOUT = 120 - -# The default PIP index URL to download Python packages. -DEFAULT_PIP_INDEX_URL = "https://pypi.org/simple" - -# The default namespace in case namespace not define explicitly -DEFAULT_NAMESPACE = "default" - -# Annotation to disable Istio sidecar. -ISTIO_SIDECAR_INJECTION = "sidecar.istio.io/inject" - -# Common constants. -GROUP = "kubeflow.org" -VERSION = "v1" -API_VERSION = f"{GROUP}/{VERSION}" - -# Kind for pod. -POD_KIND = "Pod" - -# Pending status for pod phase. -POD_PHASE_PENDING = "Pending" - - -# Training Job conditions. -JOB_CONDITION_CREATED = "Created" -JOB_CONDITION_RUNNING = "Running" -JOB_CONDITION_RESTARTING = "Restarting" -JOB_CONDITION_SUCCEEDED = "Succeeded" -JOB_CONDITION_FAILED = "Failed" -JOB_CONDITIONS = { - JOB_CONDITION_CREATED, - JOB_CONDITION_RUNNING, - JOB_CONDITION_RESTARTING, - JOB_CONDITION_SUCCEEDED, - JOB_CONDITION_FAILED, -} -# True means that Training Job is in this condition. -CONDITION_STATUS_TRUE = "True" - -# Job Label Names -JOB_NAME_LABEL = "training.kubeflow.org/job-name" -JOB_ROLE_LABEL = "training.kubeflow.org/job-role" -JOB_ROLE_MASTER = "master" -REPLICA_TYPE_LABEL = "training.kubeflow.org/replica-type" -REPLICA_INDEX_LABEL = "training.kubeflow.org/replica-index" - -# Various replica types. -REPLICA_TYPE_CHIEF = "Chief" -REPLICA_TYPE_PS = "PS" -REPLICA_TYPE_MASTER = "Master" -REPLICA_TYPE_WORKER = "Worker" -REPLICA_TYPE_SCHEDULER = "Scheduler" -REPLICA_TYPE_SERVER = "Server" -REPLICA_TYPE_LAUNCHER = "Launcher" - -# Constants for Train API. -STORAGE_INITIALIZER = "storage-initializer" -# The default value for dataset and model storage PVC. -PVC_DEFAULT_SIZE = "10Gi" -# The default value for PVC access modes. -PVC_DEFAULT_ACCESS_MODES = ["ReadWriteOnce", "ReadOnlyMany"] - - -# TODO (andreyvelich): We should add image tag for Storage Initializer and Trainer. -STORAGE_INITIALIZER_IMAGE = os.getenv( - "STORAGE_INITIALIZER_IMAGE", "docker.io/kubeflow/storage-initializer" -) - -STORAGE_INITIALIZER_VOLUME_MOUNT = models.V1VolumeMount( - name=STORAGE_INITIALIZER, - mount_path=INIT_CONTAINER_MOUNT_PATH, -) - -TRAINER_TRANSFORMER_IMAGE = os.getenv( - "TRAINER_TRANSFORMER_IMAGE", "docker.io/kubeflow/trainer-huggingface" -) - -# TFJob constants. -TFJOB_KIND = "TFJob" -TFJOB_MODEL = "KubeflowOrgV1TFJob" -TFJOB_PLURAL = "tfjobs" -TFJOB_CONTAINER = "tensorflow" -TFJOB_REPLICA_TYPES = ( - REPLICA_TYPE_PS.lower(), - REPLICA_TYPE_CHIEF.lower(), - REPLICA_TYPE_WORKER.lower(), -) - -TFJOB_BASE_IMAGE = "docker.io/tensorflow/tensorflow:2.9.1" -TFJOB_BASE_IMAGE_GPU = "docker.io/tensorflow/tensorflow:2.9.1-gpu" - -# PyTorchJob constants -PYTORCHJOB_KIND = "PyTorchJob" -PYTORCHJOB_MODEL = "KubeflowOrgV1PyTorchJob" -PYTORCHJOB_PLURAL = "pytorchjobs" -PYTORCHJOB_CONTAINER = "pytorch" -PYTORCHJOB_REPLICA_TYPES = (REPLICA_TYPE_MASTER.lower(), REPLICA_TYPE_WORKER.lower()) -PYTORCHJOB_BASE_IMAGE = "docker.io/pytorch/pytorch:2.1.2-cuda11.8-cudnn8-runtime" - -ENTRYPOINT_TORCH = "torchrun" -ENTRYPOINT_PYTHON = "python -u" -DEFAULT_COMMAND = ["bash", "-c"] - -# XGBoostJob constants -XGBOOSTJOB_KIND = "XGBoostJob" -XGBOOSTJOB_MODEL = "KubeflowOrgV1XGBoostJob" -XGBOOSTJOB_PLURAL = "xgboostjobs" -XGBOOSTJOB_CONTAINER = "xgboost" -XGBOOSTJOB_REPLICA_TYPES = (REPLICA_TYPE_MASTER.lower(), REPLICA_TYPE_WORKER.lower()) - -# MPIJob constants -MPIJOB_KIND = "MPIJob" -MPIJOB_MODEL = "KubeflowOrgV1MPIJob" -MPIJOB_PLURAL = "mpijobs" -MPIJOB_CONTAINER = "mpi" -MPIJOB_REPLICA_TYPES = (REPLICA_TYPE_LAUNCHER.lower(), REPLICA_TYPE_WORKER.lower()) - -# PaddleJob constants -PADDLEJOB_KIND = "PaddleJob" -PADDLEJOB_MODEL = "KubeflowOrgV1PaddleJob" -PADDLEJOB_PLURAL = "paddlejobs" -PADDLEJOB_CONTAINER = "paddle" -PADDLEJOB_REPLICA_TYPES = (REPLICA_TYPE_MASTER.lower(), REPLICA_TYPE_WORKER.lower()) - -PADDLEJOB_BASE_IMAGE = ( - "docker.io/paddlepaddle/paddle:2.4.0rc0-gpu-cuda11.2-cudnn8.1-trt8.0" -) - -# JAXJob constants -JAXJOB_KIND = "JAXJob" -JAXJOB_MODEL = "KubeflowOrgV1JAXJob" -JAXJOB_PLURAL = "jaxjobs" -JAXJOB_CONTAINER = "jax" -JAXJOB_REPLICA_TYPES = REPLICA_TYPE_WORKER.lower() -JAXJOB_BASE_IMAGE = "docker.io/kubeflow/jaxjob-dist-spmd-mnist:latest" - -# Dictionary to get plural, model, and container for each Job kind. -JOB_PARAMETERS = { - TFJOB_KIND: { - "model": TFJOB_MODEL, - "plural": TFJOB_PLURAL, - "container": TFJOB_CONTAINER, - "base_image": TFJOB_BASE_IMAGE, - }, - PYTORCHJOB_KIND: { - "model": PYTORCHJOB_MODEL, - "plural": PYTORCHJOB_PLURAL, - "container": PYTORCHJOB_CONTAINER, - "base_image": PYTORCHJOB_BASE_IMAGE, - }, - XGBOOSTJOB_KIND: { - "model": XGBOOSTJOB_MODEL, - "plural": XGBOOSTJOB_PLURAL, - "container": XGBOOSTJOB_CONTAINER, - "base_image": "TODO", - }, - MPIJOB_KIND: { - "model": MPIJOB_MODEL, - "plural": MPIJOB_PLURAL, - "container": MPIJOB_CONTAINER, - "base_image": "TODO", - }, - PADDLEJOB_KIND: { - "model": PADDLEJOB_MODEL, - "plural": PADDLEJOB_PLURAL, - "container": PADDLEJOB_CONTAINER, - "base_image": PADDLEJOB_BASE_IMAGE, - }, - JAXJOB_KIND: { - "model": JAXJOB_MODEL, - "plural": JAXJOB_PLURAL, - "container": JAXJOB_CONTAINER, - "base_image": "JAXJOB_BASE_IMAGE", - }, -} - -# Tuple of all Job models. -JOB_MODELS = tuple([d["model"] for d in list(JOB_PARAMETERS.values())]) - -# Union type of all Job models. -JOB_MODELS_TYPE = Union[ - models.KubeflowOrgV1TFJob, - models.KubeflowOrgV1PyTorchJob, - models.KubeflowOrgV1XGBoostJob, - models.KubeflowOrgV1MPIJob, - models.KubeflowOrgV1PaddleJob, - models.KubeflowOrgV1JAXJob, -] diff --git a/sdk/python/kubeflow/training/exceptions.py b/sdk/python/kubeflow/training/exceptions.py deleted file mode 100644 index b3377179b2..0000000000 --- a/sdk/python/kubeflow/training/exceptions.py +++ /dev/null @@ -1,120 +0,0 @@ -# coding: utf-8 - -""" - Kubeflow Training SDK - - Python SDK for Kubeflow Training # noqa: E501 - - The version of the OpenAPI document: v1.7.0 - Generated by: https://openapi-generator.tech -""" - - -import six - - -class OpenApiException(Exception): - """The base exception class for all OpenAPIExceptions""" - - -class ApiTypeError(OpenApiException, TypeError): - def __init__(self, msg, path_to_item=None, valid_classes=None, - key_type=None): - """ Raises an exception for TypeErrors - - Args: - msg (str): the exception message - - Keyword Args: - path_to_item (list): a list of keys an indices to get to the - current_item - None if unset - valid_classes (tuple): the primitive classes that current item - should be an instance of - None if unset - key_type (bool): False if our value is a value in a dict - True if it is a key in a dict - False if our item is an item in a list - None if unset - """ - self.path_to_item = path_to_item - self.valid_classes = valid_classes - self.key_type = key_type - full_msg = msg - if path_to_item: - full_msg = "{0} at {1}".format(msg, render_path(path_to_item)) - super(ApiTypeError, self).__init__(full_msg) - - -class ApiValueError(OpenApiException, ValueError): - def __init__(self, msg, path_to_item=None): - """ - Args: - msg (str): the exception message - - Keyword Args: - path_to_item (list) the path to the exception in the - received_data dict. None if unset - """ - - self.path_to_item = path_to_item - full_msg = msg - if path_to_item: - full_msg = "{0} at {1}".format(msg, render_path(path_to_item)) - super(ApiValueError, self).__init__(full_msg) - - -class ApiKeyError(OpenApiException, KeyError): - def __init__(self, msg, path_to_item=None): - """ - Args: - msg (str): the exception message - - Keyword Args: - path_to_item (None/list) the path to the exception in the - received_data dict - """ - self.path_to_item = path_to_item - full_msg = msg - if path_to_item: - full_msg = "{0} at {1}".format(msg, render_path(path_to_item)) - super(ApiKeyError, self).__init__(full_msg) - - -class ApiException(OpenApiException): - - def __init__(self, status=None, reason=None, http_resp=None): - if http_resp: - self.status = http_resp.status - self.reason = http_resp.reason - self.body = http_resp.data - self.headers = http_resp.getheaders() - else: - self.status = status - self.reason = reason - self.body = None - self.headers = None - - def __str__(self): - """Custom error messages for exception""" - error_message = "({0})\n"\ - "Reason: {1}\n".format(self.status, self.reason) - if self.headers: - error_message += "HTTP response headers: {0}\n".format( - self.headers) - - if self.body: - error_message += "HTTP response body: {0}\n".format(self.body) - - return error_message - - -def render_path(path_to_item): - """Returns a string representation of a path""" - result = "" - for pth in path_to_item: - if isinstance(pth, six.integer_types): - result += "[{0}]".format(pth) - else: - result += "['{0}']".format(pth) - return result diff --git a/sdk/python/kubeflow/training/models/__init__.py b/sdk/python/kubeflow/training/models/__init__.py deleted file mode 100644 index 77e97edfd3..0000000000 --- a/sdk/python/kubeflow/training/models/__init__.py +++ /dev/null @@ -1,46 +0,0 @@ -# coding: utf-8 - -# flake8: noqa -""" - Kubeflow Training SDK - - Python SDK for Kubeflow Training # noqa: E501 - - The version of the OpenAPI document: v1.7.0 - Generated by: https://openapi-generator.tech -""" - - -from __future__ import absolute_import - -# Import Kubernetes models. -from kubernetes.client import * - -# import models into model package -from kubeflow.training.models.kubeflow_org_v1_elastic_policy import KubeflowOrgV1ElasticPolicy -from kubeflow.training.models.kubeflow_org_v1_jax_job import KubeflowOrgV1JAXJob -from kubeflow.training.models.kubeflow_org_v1_jax_job_list import KubeflowOrgV1JAXJobList -from kubeflow.training.models.kubeflow_org_v1_jax_job_spec import KubeflowOrgV1JAXJobSpec -from kubeflow.training.models.kubeflow_org_v1_job_condition import KubeflowOrgV1JobCondition -from kubeflow.training.models.kubeflow_org_v1_job_status import KubeflowOrgV1JobStatus -from kubeflow.training.models.kubeflow_org_v1_mpi_job import KubeflowOrgV1MPIJob -from kubeflow.training.models.kubeflow_org_v1_mpi_job_list import KubeflowOrgV1MPIJobList -from kubeflow.training.models.kubeflow_org_v1_mpi_job_spec import KubeflowOrgV1MPIJobSpec -from kubeflow.training.models.kubeflow_org_v1_paddle_elastic_policy import KubeflowOrgV1PaddleElasticPolicy -from kubeflow.training.models.kubeflow_org_v1_paddle_job import KubeflowOrgV1PaddleJob -from kubeflow.training.models.kubeflow_org_v1_paddle_job_list import KubeflowOrgV1PaddleJobList -from kubeflow.training.models.kubeflow_org_v1_paddle_job_spec import KubeflowOrgV1PaddleJobSpec -from kubeflow.training.models.kubeflow_org_v1_py_torch_job import KubeflowOrgV1PyTorchJob -from kubeflow.training.models.kubeflow_org_v1_py_torch_job_list import KubeflowOrgV1PyTorchJobList -from kubeflow.training.models.kubeflow_org_v1_py_torch_job_spec import KubeflowOrgV1PyTorchJobSpec -from kubeflow.training.models.kubeflow_org_v1_rdzv_conf import KubeflowOrgV1RDZVConf -from kubeflow.training.models.kubeflow_org_v1_replica_spec import KubeflowOrgV1ReplicaSpec -from kubeflow.training.models.kubeflow_org_v1_replica_status import KubeflowOrgV1ReplicaStatus -from kubeflow.training.models.kubeflow_org_v1_run_policy import KubeflowOrgV1RunPolicy -from kubeflow.training.models.kubeflow_org_v1_scheduling_policy import KubeflowOrgV1SchedulingPolicy -from kubeflow.training.models.kubeflow_org_v1_tf_job import KubeflowOrgV1TFJob -from kubeflow.training.models.kubeflow_org_v1_tf_job_list import KubeflowOrgV1TFJobList -from kubeflow.training.models.kubeflow_org_v1_tf_job_spec import KubeflowOrgV1TFJobSpec -from kubeflow.training.models.kubeflow_org_v1_xg_boost_job import KubeflowOrgV1XGBoostJob -from kubeflow.training.models.kubeflow_org_v1_xg_boost_job_list import KubeflowOrgV1XGBoostJobList -from kubeflow.training.models.kubeflow_org_v1_xg_boost_job_spec import KubeflowOrgV1XGBoostJobSpec diff --git a/sdk/python/kubeflow/training/models/kubeflow_org_v1_elastic_policy.py b/sdk/python/kubeflow/training/models/kubeflow_org_v1_elastic_policy.py deleted file mode 100644 index 8f8b49dc6a..0000000000 --- a/sdk/python/kubeflow/training/models/kubeflow_org_v1_elastic_policy.py +++ /dev/null @@ -1,392 +0,0 @@ -# coding: utf-8 - -""" - Kubeflow Training SDK - - Python SDK for Kubeflow Training # noqa: E501 - - The version of the OpenAPI document: v1.7.0 - Generated by: https://openapi-generator.tech -""" - - -import pprint -import re # noqa: F401 - -import six - -from kubeflow.training.configuration import Configuration - - -class KubeflowOrgV1ElasticPolicy(object): - """NOTE: This class is auto generated by OpenAPI Generator. - Ref: https://openapi-generator.tech - - Do not edit the class manually. - """ - - """ - Attributes: - openapi_types (dict): The key is attribute name - and the value is attribute type. - attribute_map (dict): The key is attribute name - and the value is json key in definition. - """ - openapi_types = { - 'max_replicas': 'int', - 'max_restarts': 'int', - 'metrics': 'list[K8sIoApiAutoscalingV2MetricSpec]', - 'min_replicas': 'int', - 'n_proc_per_node': 'int', - 'rdzv_backend': 'str', - 'rdzv_conf': 'list[KubeflowOrgV1RDZVConf]', - 'rdzv_host': 'str', - 'rdzv_id': 'str', - 'rdzv_port': 'int', - 'standalone': 'bool' - } - - attribute_map = { - 'max_replicas': 'maxReplicas', - 'max_restarts': 'maxRestarts', - 'metrics': 'metrics', - 'min_replicas': 'minReplicas', - 'n_proc_per_node': 'nProcPerNode', - 'rdzv_backend': 'rdzvBackend', - 'rdzv_conf': 'rdzvConf', - 'rdzv_host': 'rdzvHost', - 'rdzv_id': 'rdzvId', - 'rdzv_port': 'rdzvPort', - 'standalone': 'standalone' - } - - def __init__(self, max_replicas=None, max_restarts=None, metrics=None, min_replicas=None, n_proc_per_node=None, rdzv_backend=None, rdzv_conf=None, rdzv_host=None, rdzv_id=None, rdzv_port=None, standalone=None, local_vars_configuration=None): # noqa: E501 - """KubeflowOrgV1ElasticPolicy - a model defined in OpenAPI""" # noqa: E501 - if local_vars_configuration is None: - local_vars_configuration = Configuration() - self.local_vars_configuration = local_vars_configuration - - self._max_replicas = None - self._max_restarts = None - self._metrics = None - self._min_replicas = None - self._n_proc_per_node = None - self._rdzv_backend = None - self._rdzv_conf = None - self._rdzv_host = None - self._rdzv_id = None - self._rdzv_port = None - self._standalone = None - self.discriminator = None - - if max_replicas is not None: - self.max_replicas = max_replicas - if max_restarts is not None: - self.max_restarts = max_restarts - if metrics is not None: - self.metrics = metrics - if min_replicas is not None: - self.min_replicas = min_replicas - if n_proc_per_node is not None: - self.n_proc_per_node = n_proc_per_node - if rdzv_backend is not None: - self.rdzv_backend = rdzv_backend - if rdzv_conf is not None: - self.rdzv_conf = rdzv_conf - if rdzv_host is not None: - self.rdzv_host = rdzv_host - if rdzv_id is not None: - self.rdzv_id = rdzv_id - if rdzv_port is not None: - self.rdzv_port = rdzv_port - if standalone is not None: - self.standalone = standalone - - @property - def max_replicas(self): - """Gets the max_replicas of this KubeflowOrgV1ElasticPolicy. # noqa: E501 - - upper limit for the number of pods that can be set by the autoscaler; cannot be smaller than MinReplicas, defaults to null. # noqa: E501 - - :return: The max_replicas of this KubeflowOrgV1ElasticPolicy. # noqa: E501 - :rtype: int - """ - return self._max_replicas - - @max_replicas.setter - def max_replicas(self, max_replicas): - """Sets the max_replicas of this KubeflowOrgV1ElasticPolicy. - - upper limit for the number of pods that can be set by the autoscaler; cannot be smaller than MinReplicas, defaults to null. # noqa: E501 - - :param max_replicas: The max_replicas of this KubeflowOrgV1ElasticPolicy. # noqa: E501 - :type: int - """ - - self._max_replicas = max_replicas - - @property - def max_restarts(self): - """Gets the max_restarts of this KubeflowOrgV1ElasticPolicy. # noqa: E501 - - - :return: The max_restarts of this KubeflowOrgV1ElasticPolicy. # noqa: E501 - :rtype: int - """ - return self._max_restarts - - @max_restarts.setter - def max_restarts(self, max_restarts): - """Sets the max_restarts of this KubeflowOrgV1ElasticPolicy. - - - :param max_restarts: The max_restarts of this KubeflowOrgV1ElasticPolicy. # noqa: E501 - :type: int - """ - - self._max_restarts = max_restarts - - @property - def metrics(self): - """Gets the metrics of this KubeflowOrgV1ElasticPolicy. # noqa: E501 - - Metrics contains the specifications which are used to calculate the desired replica count (the maximum replica count across all metrics will be used). The desired replica count is calculated with multiplying the ratio between the target value and the current value by the current number of pods. Ergo, metrics used must decrease as the pod count is increased, and vice-versa. See the individual metric source types for more information about how each type of metric must respond. If not set, the HPA will not be created. # noqa: E501 - - :return: The metrics of this KubeflowOrgV1ElasticPolicy. # noqa: E501 - :rtype: list[K8sIoApiAutoscalingV2MetricSpec] - """ - return self._metrics - - @metrics.setter - def metrics(self, metrics): - """Sets the metrics of this KubeflowOrgV1ElasticPolicy. - - Metrics contains the specifications which are used to calculate the desired replica count (the maximum replica count across all metrics will be used). The desired replica count is calculated with multiplying the ratio between the target value and the current value by the current number of pods. Ergo, metrics used must decrease as the pod count is increased, and vice-versa. See the individual metric source types for more information about how each type of metric must respond. If not set, the HPA will not be created. # noqa: E501 - - :param metrics: The metrics of this KubeflowOrgV1ElasticPolicy. # noqa: E501 - :type: list[K8sIoApiAutoscalingV2MetricSpec] - """ - - self._metrics = metrics - - @property - def min_replicas(self): - """Gets the min_replicas of this KubeflowOrgV1ElasticPolicy. # noqa: E501 - - minReplicas is the lower limit for the number of replicas to which the training job can scale down. It defaults to null. # noqa: E501 - - :return: The min_replicas of this KubeflowOrgV1ElasticPolicy. # noqa: E501 - :rtype: int - """ - return self._min_replicas - - @min_replicas.setter - def min_replicas(self, min_replicas): - """Sets the min_replicas of this KubeflowOrgV1ElasticPolicy. - - minReplicas is the lower limit for the number of replicas to which the training job can scale down. It defaults to null. # noqa: E501 - - :param min_replicas: The min_replicas of this KubeflowOrgV1ElasticPolicy. # noqa: E501 - :type: int - """ - - self._min_replicas = min_replicas - - @property - def n_proc_per_node(self): - """Gets the n_proc_per_node of this KubeflowOrgV1ElasticPolicy. # noqa: E501 - - Number of workers per node; supported values: [auto, cpu, gpu, int]. Deprecated: This API is deprecated in v1.7+ Use .spec.nprocPerNode instead. # noqa: E501 - - :return: The n_proc_per_node of this KubeflowOrgV1ElasticPolicy. # noqa: E501 - :rtype: int - """ - return self._n_proc_per_node - - @n_proc_per_node.setter - def n_proc_per_node(self, n_proc_per_node): - """Sets the n_proc_per_node of this KubeflowOrgV1ElasticPolicy. - - Number of workers per node; supported values: [auto, cpu, gpu, int]. Deprecated: This API is deprecated in v1.7+ Use .spec.nprocPerNode instead. # noqa: E501 - - :param n_proc_per_node: The n_proc_per_node of this KubeflowOrgV1ElasticPolicy. # noqa: E501 - :type: int - """ - - self._n_proc_per_node = n_proc_per_node - - @property - def rdzv_backend(self): - """Gets the rdzv_backend of this KubeflowOrgV1ElasticPolicy. # noqa: E501 - - - :return: The rdzv_backend of this KubeflowOrgV1ElasticPolicy. # noqa: E501 - :rtype: str - """ - return self._rdzv_backend - - @rdzv_backend.setter - def rdzv_backend(self, rdzv_backend): - """Sets the rdzv_backend of this KubeflowOrgV1ElasticPolicy. - - - :param rdzv_backend: The rdzv_backend of this KubeflowOrgV1ElasticPolicy. # noqa: E501 - :type: str - """ - - self._rdzv_backend = rdzv_backend - - @property - def rdzv_conf(self): - """Gets the rdzv_conf of this KubeflowOrgV1ElasticPolicy. # noqa: E501 - - RDZVConf contains additional rendezvous configuration (=,=,...). # noqa: E501 - - :return: The rdzv_conf of this KubeflowOrgV1ElasticPolicy. # noqa: E501 - :rtype: list[KubeflowOrgV1RDZVConf] - """ - return self._rdzv_conf - - @rdzv_conf.setter - def rdzv_conf(self, rdzv_conf): - """Sets the rdzv_conf of this KubeflowOrgV1ElasticPolicy. - - RDZVConf contains additional rendezvous configuration (=,=,...). # noqa: E501 - - :param rdzv_conf: The rdzv_conf of this KubeflowOrgV1ElasticPolicy. # noqa: E501 - :type: list[KubeflowOrgV1RDZVConf] - """ - - self._rdzv_conf = rdzv_conf - - @property - def rdzv_host(self): - """Gets the rdzv_host of this KubeflowOrgV1ElasticPolicy. # noqa: E501 - - - :return: The rdzv_host of this KubeflowOrgV1ElasticPolicy. # noqa: E501 - :rtype: str - """ - return self._rdzv_host - - @rdzv_host.setter - def rdzv_host(self, rdzv_host): - """Sets the rdzv_host of this KubeflowOrgV1ElasticPolicy. - - - :param rdzv_host: The rdzv_host of this KubeflowOrgV1ElasticPolicy. # noqa: E501 - :type: str - """ - - self._rdzv_host = rdzv_host - - @property - def rdzv_id(self): - """Gets the rdzv_id of this KubeflowOrgV1ElasticPolicy. # noqa: E501 - - - :return: The rdzv_id of this KubeflowOrgV1ElasticPolicy. # noqa: E501 - :rtype: str - """ - return self._rdzv_id - - @rdzv_id.setter - def rdzv_id(self, rdzv_id): - """Sets the rdzv_id of this KubeflowOrgV1ElasticPolicy. - - - :param rdzv_id: The rdzv_id of this KubeflowOrgV1ElasticPolicy. # noqa: E501 - :type: str - """ - - self._rdzv_id = rdzv_id - - @property - def rdzv_port(self): - """Gets the rdzv_port of this KubeflowOrgV1ElasticPolicy. # noqa: E501 - - - :return: The rdzv_port of this KubeflowOrgV1ElasticPolicy. # noqa: E501 - :rtype: int - """ - return self._rdzv_port - - @rdzv_port.setter - def rdzv_port(self, rdzv_port): - """Sets the rdzv_port of this KubeflowOrgV1ElasticPolicy. - - - :param rdzv_port: The rdzv_port of this KubeflowOrgV1ElasticPolicy. # noqa: E501 - :type: int - """ - - self._rdzv_port = rdzv_port - - @property - def standalone(self): - """Gets the standalone of this KubeflowOrgV1ElasticPolicy. # noqa: E501 - - Start a local standalone rendezvous backend that is represented by a C10d TCP store on port 29400. Useful when launching single-node, multi-worker job. If specified --rdzv_backend, --rdzv_endpoint, --rdzv_id are auto-assigned; any explicitly set values are ignored. # noqa: E501 - - :return: The standalone of this KubeflowOrgV1ElasticPolicy. # noqa: E501 - :rtype: bool - """ - return self._standalone - - @standalone.setter - def standalone(self, standalone): - """Sets the standalone of this KubeflowOrgV1ElasticPolicy. - - Start a local standalone rendezvous backend that is represented by a C10d TCP store on port 29400. Useful when launching single-node, multi-worker job. If specified --rdzv_backend, --rdzv_endpoint, --rdzv_id are auto-assigned; any explicitly set values are ignored. # noqa: E501 - - :param standalone: The standalone of this KubeflowOrgV1ElasticPolicy. # noqa: E501 - :type: bool - """ - - self._standalone = standalone - - def to_dict(self): - """Returns the model properties as a dict""" - result = {} - - for attr, _ in six.iteritems(self.openapi_types): - value = getattr(self, attr) - if isinstance(value, list): - result[attr] = list(map( - lambda x: x.to_dict() if hasattr(x, "to_dict") else x, - value - )) - elif hasattr(value, "to_dict"): - result[attr] = value.to_dict() - elif isinstance(value, dict): - result[attr] = dict(map( - lambda item: (item[0], item[1].to_dict()) - if hasattr(item[1], "to_dict") else item, - value.items() - )) - else: - result[attr] = value - - return result - - def to_str(self): - """Returns the string representation of the model""" - return pprint.pformat(self.to_dict()) - - def __repr__(self): - """For `print` and `pprint`""" - return self.to_str() - - def __eq__(self, other): - """Returns true if both objects are equal""" - if not isinstance(other, KubeflowOrgV1ElasticPolicy): - return False - - return self.to_dict() == other.to_dict() - - def __ne__(self, other): - """Returns true if both objects are not equal""" - if not isinstance(other, KubeflowOrgV1ElasticPolicy): - return True - - return self.to_dict() != other.to_dict() diff --git a/sdk/python/kubeflow/training/models/kubeflow_org_v1_jax_job.py b/sdk/python/kubeflow/training/models/kubeflow_org_v1_jax_job.py deleted file mode 100644 index 39b03a0daa..0000000000 --- a/sdk/python/kubeflow/training/models/kubeflow_org_v1_jax_job.py +++ /dev/null @@ -1,228 +0,0 @@ -# coding: utf-8 - -""" - Kubeflow Training SDK - - Python SDK for Kubeflow Training # noqa: E501 - - The version of the OpenAPI document: v1.7.0 - Generated by: https://openapi-generator.tech -""" - - -import pprint -import re # noqa: F401 - -import six - -from kubeflow.training.configuration import Configuration - - -class KubeflowOrgV1JAXJob(object): - """NOTE: This class is auto generated by OpenAPI Generator. - Ref: https://openapi-generator.tech - - Do not edit the class manually. - """ - - """ - Attributes: - openapi_types (dict): The key is attribute name - and the value is attribute type. - attribute_map (dict): The key is attribute name - and the value is json key in definition. - """ - openapi_types = { - 'api_version': 'str', - 'kind': 'str', - 'metadata': 'V1ObjectMeta', - 'spec': 'KubeflowOrgV1JAXJobSpec', - 'status': 'KubeflowOrgV1JobStatus' - } - - attribute_map = { - 'api_version': 'apiVersion', - 'kind': 'kind', - 'metadata': 'metadata', - 'spec': 'spec', - 'status': 'status' - } - - def __init__(self, api_version=None, kind=None, metadata=None, spec=None, status=None, local_vars_configuration=None): # noqa: E501 - """KubeflowOrgV1JAXJob - a model defined in OpenAPI""" # noqa: E501 - if local_vars_configuration is None: - local_vars_configuration = Configuration() - self.local_vars_configuration = local_vars_configuration - - self._api_version = None - self._kind = None - self._metadata = None - self._spec = None - self._status = None - self.discriminator = None - - if api_version is not None: - self.api_version = api_version - if kind is not None: - self.kind = kind - if metadata is not None: - self.metadata = metadata - if spec is not None: - self.spec = spec - if status is not None: - self.status = status - - @property - def api_version(self): - """Gets the api_version of this KubeflowOrgV1JAXJob. # noqa: E501 - - APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources # noqa: E501 - - :return: The api_version of this KubeflowOrgV1JAXJob. # noqa: E501 - :rtype: str - """ - return self._api_version - - @api_version.setter - def api_version(self, api_version): - """Sets the api_version of this KubeflowOrgV1JAXJob. - - APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources # noqa: E501 - - :param api_version: The api_version of this KubeflowOrgV1JAXJob. # noqa: E501 - :type: str - """ - - self._api_version = api_version - - @property - def kind(self): - """Gets the kind of this KubeflowOrgV1JAXJob. # noqa: E501 - - Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds # noqa: E501 - - :return: The kind of this KubeflowOrgV1JAXJob. # noqa: E501 - :rtype: str - """ - return self._kind - - @kind.setter - def kind(self, kind): - """Sets the kind of this KubeflowOrgV1JAXJob. - - Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds # noqa: E501 - - :param kind: The kind of this KubeflowOrgV1JAXJob. # noqa: E501 - :type: str - """ - - self._kind = kind - - @property - def metadata(self): - """Gets the metadata of this KubeflowOrgV1JAXJob. # noqa: E501 - - - :return: The metadata of this KubeflowOrgV1JAXJob. # noqa: E501 - :rtype: V1ObjectMeta - """ - return self._metadata - - @metadata.setter - def metadata(self, metadata): - """Sets the metadata of this KubeflowOrgV1JAXJob. - - - :param metadata: The metadata of this KubeflowOrgV1JAXJob. # noqa: E501 - :type: V1ObjectMeta - """ - - self._metadata = metadata - - @property - def spec(self): - """Gets the spec of this KubeflowOrgV1JAXJob. # noqa: E501 - - - :return: The spec of this KubeflowOrgV1JAXJob. # noqa: E501 - :rtype: KubeflowOrgV1JAXJobSpec - """ - return self._spec - - @spec.setter - def spec(self, spec): - """Sets the spec of this KubeflowOrgV1JAXJob. - - - :param spec: The spec of this KubeflowOrgV1JAXJob. # noqa: E501 - :type: KubeflowOrgV1JAXJobSpec - """ - - self._spec = spec - - @property - def status(self): - """Gets the status of this KubeflowOrgV1JAXJob. # noqa: E501 - - - :return: The status of this KubeflowOrgV1JAXJob. # noqa: E501 - :rtype: KubeflowOrgV1JobStatus - """ - return self._status - - @status.setter - def status(self, status): - """Sets the status of this KubeflowOrgV1JAXJob. - - - :param status: The status of this KubeflowOrgV1JAXJob. # noqa: E501 - :type: KubeflowOrgV1JobStatus - """ - - self._status = status - - def to_dict(self): - """Returns the model properties as a dict""" - result = {} - - for attr, _ in six.iteritems(self.openapi_types): - value = getattr(self, attr) - if isinstance(value, list): - result[attr] = list(map( - lambda x: x.to_dict() if hasattr(x, "to_dict") else x, - value - )) - elif hasattr(value, "to_dict"): - result[attr] = value.to_dict() - elif isinstance(value, dict): - result[attr] = dict(map( - lambda item: (item[0], item[1].to_dict()) - if hasattr(item[1], "to_dict") else item, - value.items() - )) - else: - result[attr] = value - - return result - - def to_str(self): - """Returns the string representation of the model""" - return pprint.pformat(self.to_dict()) - - def __repr__(self): - """For `print` and `pprint`""" - return self.to_str() - - def __eq__(self, other): - """Returns true if both objects are equal""" - if not isinstance(other, KubeflowOrgV1JAXJob): - return False - - return self.to_dict() == other.to_dict() - - def __ne__(self, other): - """Returns true if both objects are not equal""" - if not isinstance(other, KubeflowOrgV1JAXJob): - return True - - return self.to_dict() != other.to_dict() diff --git a/sdk/python/kubeflow/training/models/kubeflow_org_v1_jax_job_list.py b/sdk/python/kubeflow/training/models/kubeflow_org_v1_jax_job_list.py deleted file mode 100644 index e90aa1a836..0000000000 --- a/sdk/python/kubeflow/training/models/kubeflow_org_v1_jax_job_list.py +++ /dev/null @@ -1,205 +0,0 @@ -# coding: utf-8 - -""" - Kubeflow Training SDK - - Python SDK for Kubeflow Training # noqa: E501 - - The version of the OpenAPI document: v1.7.0 - Generated by: https://openapi-generator.tech -""" - - -import pprint -import re # noqa: F401 - -import six - -from kubeflow.training.configuration import Configuration - - -class KubeflowOrgV1JAXJobList(object): - """NOTE: This class is auto generated by OpenAPI Generator. - Ref: https://openapi-generator.tech - - Do not edit the class manually. - """ - - """ - Attributes: - openapi_types (dict): The key is attribute name - and the value is attribute type. - attribute_map (dict): The key is attribute name - and the value is json key in definition. - """ - openapi_types = { - 'api_version': 'str', - 'items': 'list[KubeflowOrgV1JAXJob]', - 'kind': 'str', - 'metadata': 'V1ListMeta' - } - - attribute_map = { - 'api_version': 'apiVersion', - 'items': 'items', - 'kind': 'kind', - 'metadata': 'metadata' - } - - def __init__(self, api_version=None, items=None, kind=None, metadata=None, local_vars_configuration=None): # noqa: E501 - """KubeflowOrgV1JAXJobList - a model defined in OpenAPI""" # noqa: E501 - if local_vars_configuration is None: - local_vars_configuration = Configuration() - self.local_vars_configuration = local_vars_configuration - - self._api_version = None - self._items = None - self._kind = None - self._metadata = None - self.discriminator = None - - if api_version is not None: - self.api_version = api_version - self.items = items - if kind is not None: - self.kind = kind - if metadata is not None: - self.metadata = metadata - - @property - def api_version(self): - """Gets the api_version of this KubeflowOrgV1JAXJobList. # noqa: E501 - - APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources # noqa: E501 - - :return: The api_version of this KubeflowOrgV1JAXJobList. # noqa: E501 - :rtype: str - """ - return self._api_version - - @api_version.setter - def api_version(self, api_version): - """Sets the api_version of this KubeflowOrgV1JAXJobList. - - APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources # noqa: E501 - - :param api_version: The api_version of this KubeflowOrgV1JAXJobList. # noqa: E501 - :type: str - """ - - self._api_version = api_version - - @property - def items(self): - """Gets the items of this KubeflowOrgV1JAXJobList. # noqa: E501 - - List of JAXJobs. # noqa: E501 - - :return: The items of this KubeflowOrgV1JAXJobList. # noqa: E501 - :rtype: list[KubeflowOrgV1JAXJob] - """ - return self._items - - @items.setter - def items(self, items): - """Sets the items of this KubeflowOrgV1JAXJobList. - - List of JAXJobs. # noqa: E501 - - :param items: The items of this KubeflowOrgV1JAXJobList. # noqa: E501 - :type: list[KubeflowOrgV1JAXJob] - """ - if self.local_vars_configuration.client_side_validation and items is None: # noqa: E501 - raise ValueError("Invalid value for `items`, must not be `None`") # noqa: E501 - - self._items = items - - @property - def kind(self): - """Gets the kind of this KubeflowOrgV1JAXJobList. # noqa: E501 - - Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds # noqa: E501 - - :return: The kind of this KubeflowOrgV1JAXJobList. # noqa: E501 - :rtype: str - """ - return self._kind - - @kind.setter - def kind(self, kind): - """Sets the kind of this KubeflowOrgV1JAXJobList. - - Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds # noqa: E501 - - :param kind: The kind of this KubeflowOrgV1JAXJobList. # noqa: E501 - :type: str - """ - - self._kind = kind - - @property - def metadata(self): - """Gets the metadata of this KubeflowOrgV1JAXJobList. # noqa: E501 - - - :return: The metadata of this KubeflowOrgV1JAXJobList. # noqa: E501 - :rtype: V1ListMeta - """ - return self._metadata - - @metadata.setter - def metadata(self, metadata): - """Sets the metadata of this KubeflowOrgV1JAXJobList. - - - :param metadata: The metadata of this KubeflowOrgV1JAXJobList. # noqa: E501 - :type: V1ListMeta - """ - - self._metadata = metadata - - def to_dict(self): - """Returns the model properties as a dict""" - result = {} - - for attr, _ in six.iteritems(self.openapi_types): - value = getattr(self, attr) - if isinstance(value, list): - result[attr] = list(map( - lambda x: x.to_dict() if hasattr(x, "to_dict") else x, - value - )) - elif hasattr(value, "to_dict"): - result[attr] = value.to_dict() - elif isinstance(value, dict): - result[attr] = dict(map( - lambda item: (item[0], item[1].to_dict()) - if hasattr(item[1], "to_dict") else item, - value.items() - )) - else: - result[attr] = value - - return result - - def to_str(self): - """Returns the string representation of the model""" - return pprint.pformat(self.to_dict()) - - def __repr__(self): - """For `print` and `pprint`""" - return self.to_str() - - def __eq__(self, other): - """Returns true if both objects are equal""" - if not isinstance(other, KubeflowOrgV1JAXJobList): - return False - - return self.to_dict() == other.to_dict() - - def __ne__(self, other): - """Returns true if both objects are not equal""" - if not isinstance(other, KubeflowOrgV1JAXJobList): - return True - - return self.to_dict() != other.to_dict() diff --git a/sdk/python/kubeflow/training/models/kubeflow_org_v1_jax_job_spec.py b/sdk/python/kubeflow/training/models/kubeflow_org_v1_jax_job_spec.py deleted file mode 100644 index 78d01acdbc..0000000000 --- a/sdk/python/kubeflow/training/models/kubeflow_org_v1_jax_job_spec.py +++ /dev/null @@ -1,150 +0,0 @@ -# coding: utf-8 - -""" - Kubeflow Training SDK - - Python SDK for Kubeflow Training # noqa: E501 - - The version of the OpenAPI document: v1.7.0 - Generated by: https://openapi-generator.tech -""" - - -import pprint -import re # noqa: F401 - -import six - -from kubeflow.training.configuration import Configuration - - -class KubeflowOrgV1JAXJobSpec(object): - """NOTE: This class is auto generated by OpenAPI Generator. - Ref: https://openapi-generator.tech - - Do not edit the class manually. - """ - - """ - Attributes: - openapi_types (dict): The key is attribute name - and the value is attribute type. - attribute_map (dict): The key is attribute name - and the value is json key in definition. - """ - openapi_types = { - 'jax_replica_specs': 'dict(str, KubeflowOrgV1ReplicaSpec)', - 'run_policy': 'KubeflowOrgV1RunPolicy' - } - - attribute_map = { - 'jax_replica_specs': 'jaxReplicaSpecs', - 'run_policy': 'runPolicy' - } - - def __init__(self, jax_replica_specs=None, run_policy=None, local_vars_configuration=None): # noqa: E501 - """KubeflowOrgV1JAXJobSpec - a model defined in OpenAPI""" # noqa: E501 - if local_vars_configuration is None: - local_vars_configuration = Configuration() - self.local_vars_configuration = local_vars_configuration - - self._jax_replica_specs = None - self._run_policy = None - self.discriminator = None - - self.jax_replica_specs = jax_replica_specs - self.run_policy = run_policy - - @property - def jax_replica_specs(self): - """Gets the jax_replica_specs of this KubeflowOrgV1JAXJobSpec. # noqa: E501 - - A map of JAXReplicaType (type) to ReplicaSpec (value). Specifies the JAX cluster configuration. For example, { \"Worker\": JAXReplicaSpec, } # noqa: E501 - - :return: The jax_replica_specs of this KubeflowOrgV1JAXJobSpec. # noqa: E501 - :rtype: dict(str, KubeflowOrgV1ReplicaSpec) - """ - return self._jax_replica_specs - - @jax_replica_specs.setter - def jax_replica_specs(self, jax_replica_specs): - """Sets the jax_replica_specs of this KubeflowOrgV1JAXJobSpec. - - A map of JAXReplicaType (type) to ReplicaSpec (value). Specifies the JAX cluster configuration. For example, { \"Worker\": JAXReplicaSpec, } # noqa: E501 - - :param jax_replica_specs: The jax_replica_specs of this KubeflowOrgV1JAXJobSpec. # noqa: E501 - :type: dict(str, KubeflowOrgV1ReplicaSpec) - """ - if self.local_vars_configuration.client_side_validation and jax_replica_specs is None: # noqa: E501 - raise ValueError("Invalid value for `jax_replica_specs`, must not be `None`") # noqa: E501 - - self._jax_replica_specs = jax_replica_specs - - @property - def run_policy(self): - """Gets the run_policy of this KubeflowOrgV1JAXJobSpec. # noqa: E501 - - - :return: The run_policy of this KubeflowOrgV1JAXJobSpec. # noqa: E501 - :rtype: KubeflowOrgV1RunPolicy - """ - return self._run_policy - - @run_policy.setter - def run_policy(self, run_policy): - """Sets the run_policy of this KubeflowOrgV1JAXJobSpec. - - - :param run_policy: The run_policy of this KubeflowOrgV1JAXJobSpec. # noqa: E501 - :type: KubeflowOrgV1RunPolicy - """ - if self.local_vars_configuration.client_side_validation and run_policy is None: # noqa: E501 - raise ValueError("Invalid value for `run_policy`, must not be `None`") # noqa: E501 - - self._run_policy = run_policy - - def to_dict(self): - """Returns the model properties as a dict""" - result = {} - - for attr, _ in six.iteritems(self.openapi_types): - value = getattr(self, attr) - if isinstance(value, list): - result[attr] = list(map( - lambda x: x.to_dict() if hasattr(x, "to_dict") else x, - value - )) - elif hasattr(value, "to_dict"): - result[attr] = value.to_dict() - elif isinstance(value, dict): - result[attr] = dict(map( - lambda item: (item[0], item[1].to_dict()) - if hasattr(item[1], "to_dict") else item, - value.items() - )) - else: - result[attr] = value - - return result - - def to_str(self): - """Returns the string representation of the model""" - return pprint.pformat(self.to_dict()) - - def __repr__(self): - """For `print` and `pprint`""" - return self.to_str() - - def __eq__(self, other): - """Returns true if both objects are equal""" - if not isinstance(other, KubeflowOrgV1JAXJobSpec): - return False - - return self.to_dict() == other.to_dict() - - def __ne__(self, other): - """Returns true if both objects are not equal""" - if not isinstance(other, KubeflowOrgV1JAXJobSpec): - return True - - return self.to_dict() != other.to_dict() diff --git a/sdk/python/kubeflow/training/models/kubeflow_org_v1_job_condition.py b/sdk/python/kubeflow/training/models/kubeflow_org_v1_job_condition.py deleted file mode 100644 index 2000f8ca2e..0000000000 --- a/sdk/python/kubeflow/training/models/kubeflow_org_v1_job_condition.py +++ /dev/null @@ -1,260 +0,0 @@ -# coding: utf-8 - -""" - Kubeflow Training SDK - - Python SDK for Kubeflow Training # noqa: E501 - - The version of the OpenAPI document: v1.7.0 - Generated by: https://openapi-generator.tech -""" - - -import pprint -import re # noqa: F401 - -import six - -from kubeflow.training.configuration import Configuration - - -class KubeflowOrgV1JobCondition(object): - """NOTE: This class is auto generated by OpenAPI Generator. - Ref: https://openapi-generator.tech - - Do not edit the class manually. - """ - - """ - Attributes: - openapi_types (dict): The key is attribute name - and the value is attribute type. - attribute_map (dict): The key is attribute name - and the value is json key in definition. - """ - openapi_types = { - 'last_transition_time': 'datetime', - 'last_update_time': 'datetime', - 'message': 'str', - 'reason': 'str', - 'status': 'str', - 'type': 'str' - } - - attribute_map = { - 'last_transition_time': 'lastTransitionTime', - 'last_update_time': 'lastUpdateTime', - 'message': 'message', - 'reason': 'reason', - 'status': 'status', - 'type': 'type' - } - - def __init__(self, last_transition_time=None, last_update_time=None, message=None, reason=None, status='', type='', local_vars_configuration=None): # noqa: E501 - """KubeflowOrgV1JobCondition - a model defined in OpenAPI""" # noqa: E501 - if local_vars_configuration is None: - local_vars_configuration = Configuration() - self.local_vars_configuration = local_vars_configuration - - self._last_transition_time = None - self._last_update_time = None - self._message = None - self._reason = None - self._status = None - self._type = None - self.discriminator = None - - if last_transition_time is not None: - self.last_transition_time = last_transition_time - if last_update_time is not None: - self.last_update_time = last_update_time - if message is not None: - self.message = message - if reason is not None: - self.reason = reason - self.status = status - self.type = type - - @property - def last_transition_time(self): - """Gets the last_transition_time of this KubeflowOrgV1JobCondition. # noqa: E501 - - - :return: The last_transition_time of this KubeflowOrgV1JobCondition. # noqa: E501 - :rtype: datetime - """ - return self._last_transition_time - - @last_transition_time.setter - def last_transition_time(self, last_transition_time): - """Sets the last_transition_time of this KubeflowOrgV1JobCondition. - - - :param last_transition_time: The last_transition_time of this KubeflowOrgV1JobCondition. # noqa: E501 - :type: datetime - """ - - self._last_transition_time = last_transition_time - - @property - def last_update_time(self): - """Gets the last_update_time of this KubeflowOrgV1JobCondition. # noqa: E501 - - - :return: The last_update_time of this KubeflowOrgV1JobCondition. # noqa: E501 - :rtype: datetime - """ - return self._last_update_time - - @last_update_time.setter - def last_update_time(self, last_update_time): - """Sets the last_update_time of this KubeflowOrgV1JobCondition. - - - :param last_update_time: The last_update_time of this KubeflowOrgV1JobCondition. # noqa: E501 - :type: datetime - """ - - self._last_update_time = last_update_time - - @property - def message(self): - """Gets the message of this KubeflowOrgV1JobCondition. # noqa: E501 - - A human readable message indicating details about the transition. # noqa: E501 - - :return: The message of this KubeflowOrgV1JobCondition. # noqa: E501 - :rtype: str - """ - return self._message - - @message.setter - def message(self, message): - """Sets the message of this KubeflowOrgV1JobCondition. - - A human readable message indicating details about the transition. # noqa: E501 - - :param message: The message of this KubeflowOrgV1JobCondition. # noqa: E501 - :type: str - """ - - self._message = message - - @property - def reason(self): - """Gets the reason of this KubeflowOrgV1JobCondition. # noqa: E501 - - The reason for the condition's last transition. # noqa: E501 - - :return: The reason of this KubeflowOrgV1JobCondition. # noqa: E501 - :rtype: str - """ - return self._reason - - @reason.setter - def reason(self, reason): - """Sets the reason of this KubeflowOrgV1JobCondition. - - The reason for the condition's last transition. # noqa: E501 - - :param reason: The reason of this KubeflowOrgV1JobCondition. # noqa: E501 - :type: str - """ - - self._reason = reason - - @property - def status(self): - """Gets the status of this KubeflowOrgV1JobCondition. # noqa: E501 - - Status of the condition, one of True, False, Unknown. # noqa: E501 - - :return: The status of this KubeflowOrgV1JobCondition. # noqa: E501 - :rtype: str - """ - return self._status - - @status.setter - def status(self, status): - """Sets the status of this KubeflowOrgV1JobCondition. - - Status of the condition, one of True, False, Unknown. # noqa: E501 - - :param status: The status of this KubeflowOrgV1JobCondition. # noqa: E501 - :type: str - """ - if self.local_vars_configuration.client_side_validation and status is None: # noqa: E501 - raise ValueError("Invalid value for `status`, must not be `None`") # noqa: E501 - - self._status = status - - @property - def type(self): - """Gets the type of this KubeflowOrgV1JobCondition. # noqa: E501 - - Type of job condition. # noqa: E501 - - :return: The type of this KubeflowOrgV1JobCondition. # noqa: E501 - :rtype: str - """ - return self._type - - @type.setter - def type(self, type): - """Sets the type of this KubeflowOrgV1JobCondition. - - Type of job condition. # noqa: E501 - - :param type: The type of this KubeflowOrgV1JobCondition. # noqa: E501 - :type: str - """ - if self.local_vars_configuration.client_side_validation and type is None: # noqa: E501 - raise ValueError("Invalid value for `type`, must not be `None`") # noqa: E501 - - self._type = type - - def to_dict(self): - """Returns the model properties as a dict""" - result = {} - - for attr, _ in six.iteritems(self.openapi_types): - value = getattr(self, attr) - if isinstance(value, list): - result[attr] = list(map( - lambda x: x.to_dict() if hasattr(x, "to_dict") else x, - value - )) - elif hasattr(value, "to_dict"): - result[attr] = value.to_dict() - elif isinstance(value, dict): - result[attr] = dict(map( - lambda item: (item[0], item[1].to_dict()) - if hasattr(item[1], "to_dict") else item, - value.items() - )) - else: - result[attr] = value - - return result - - def to_str(self): - """Returns the string representation of the model""" - return pprint.pformat(self.to_dict()) - - def __repr__(self): - """For `print` and `pprint`""" - return self.to_str() - - def __eq__(self, other): - """Returns true if both objects are equal""" - if not isinstance(other, KubeflowOrgV1JobCondition): - return False - - return self.to_dict() == other.to_dict() - - def __ne__(self, other): - """Returns true if both objects are not equal""" - if not isinstance(other, KubeflowOrgV1JobCondition): - return True - - return self.to_dict() != other.to_dict() diff --git a/sdk/python/kubeflow/training/models/kubeflow_org_v1_job_status.py b/sdk/python/kubeflow/training/models/kubeflow_org_v1_job_status.py deleted file mode 100644 index 3b1bf12573..0000000000 --- a/sdk/python/kubeflow/training/models/kubeflow_org_v1_job_status.py +++ /dev/null @@ -1,228 +0,0 @@ -# coding: utf-8 - -""" - Kubeflow Training SDK - - Python SDK for Kubeflow Training # noqa: E501 - - The version of the OpenAPI document: v1.7.0 - Generated by: https://openapi-generator.tech -""" - - -import pprint -import re # noqa: F401 - -import six - -from kubeflow.training.configuration import Configuration - - -class KubeflowOrgV1JobStatus(object): - """NOTE: This class is auto generated by OpenAPI Generator. - Ref: https://openapi-generator.tech - - Do not edit the class manually. - """ - - """ - Attributes: - openapi_types (dict): The key is attribute name - and the value is attribute type. - attribute_map (dict): The key is attribute name - and the value is json key in definition. - """ - openapi_types = { - 'completion_time': 'datetime', - 'conditions': 'list[KubeflowOrgV1JobCondition]', - 'last_reconcile_time': 'datetime', - 'replica_statuses': 'dict(str, KubeflowOrgV1ReplicaStatus)', - 'start_time': 'datetime' - } - - attribute_map = { - 'completion_time': 'completionTime', - 'conditions': 'conditions', - 'last_reconcile_time': 'lastReconcileTime', - 'replica_statuses': 'replicaStatuses', - 'start_time': 'startTime' - } - - def __init__(self, completion_time=None, conditions=None, last_reconcile_time=None, replica_statuses=None, start_time=None, local_vars_configuration=None): # noqa: E501 - """KubeflowOrgV1JobStatus - a model defined in OpenAPI""" # noqa: E501 - if local_vars_configuration is None: - local_vars_configuration = Configuration() - self.local_vars_configuration = local_vars_configuration - - self._completion_time = None - self._conditions = None - self._last_reconcile_time = None - self._replica_statuses = None - self._start_time = None - self.discriminator = None - - if completion_time is not None: - self.completion_time = completion_time - if conditions is not None: - self.conditions = conditions - if last_reconcile_time is not None: - self.last_reconcile_time = last_reconcile_time - if replica_statuses is not None: - self.replica_statuses = replica_statuses - if start_time is not None: - self.start_time = start_time - - @property - def completion_time(self): - """Gets the completion_time of this KubeflowOrgV1JobStatus. # noqa: E501 - - - :return: The completion_time of this KubeflowOrgV1JobStatus. # noqa: E501 - :rtype: datetime - """ - return self._completion_time - - @completion_time.setter - def completion_time(self, completion_time): - """Sets the completion_time of this KubeflowOrgV1JobStatus. - - - :param completion_time: The completion_time of this KubeflowOrgV1JobStatus. # noqa: E501 - :type: datetime - """ - - self._completion_time = completion_time - - @property - def conditions(self): - """Gets the conditions of this KubeflowOrgV1JobStatus. # noqa: E501 - - Conditions is an array of current observed job conditions. # noqa: E501 - - :return: The conditions of this KubeflowOrgV1JobStatus. # noqa: E501 - :rtype: list[KubeflowOrgV1JobCondition] - """ - return self._conditions - - @conditions.setter - def conditions(self, conditions): - """Sets the conditions of this KubeflowOrgV1JobStatus. - - Conditions is an array of current observed job conditions. # noqa: E501 - - :param conditions: The conditions of this KubeflowOrgV1JobStatus. # noqa: E501 - :type: list[KubeflowOrgV1JobCondition] - """ - - self._conditions = conditions - - @property - def last_reconcile_time(self): - """Gets the last_reconcile_time of this KubeflowOrgV1JobStatus. # noqa: E501 - - - :return: The last_reconcile_time of this KubeflowOrgV1JobStatus. # noqa: E501 - :rtype: datetime - """ - return self._last_reconcile_time - - @last_reconcile_time.setter - def last_reconcile_time(self, last_reconcile_time): - """Sets the last_reconcile_time of this KubeflowOrgV1JobStatus. - - - :param last_reconcile_time: The last_reconcile_time of this KubeflowOrgV1JobStatus. # noqa: E501 - :type: datetime - """ - - self._last_reconcile_time = last_reconcile_time - - @property - def replica_statuses(self): - """Gets the replica_statuses of this KubeflowOrgV1JobStatus. # noqa: E501 - - ReplicaStatuses is map of ReplicaType and ReplicaStatus, specifies the status of each replica. # noqa: E501 - - :return: The replica_statuses of this KubeflowOrgV1JobStatus. # noqa: E501 - :rtype: dict(str, KubeflowOrgV1ReplicaStatus) - """ - return self._replica_statuses - - @replica_statuses.setter - def replica_statuses(self, replica_statuses): - """Sets the replica_statuses of this KubeflowOrgV1JobStatus. - - ReplicaStatuses is map of ReplicaType and ReplicaStatus, specifies the status of each replica. # noqa: E501 - - :param replica_statuses: The replica_statuses of this KubeflowOrgV1JobStatus. # noqa: E501 - :type: dict(str, KubeflowOrgV1ReplicaStatus) - """ - - self._replica_statuses = replica_statuses - - @property - def start_time(self): - """Gets the start_time of this KubeflowOrgV1JobStatus. # noqa: E501 - - - :return: The start_time of this KubeflowOrgV1JobStatus. # noqa: E501 - :rtype: datetime - """ - return self._start_time - - @start_time.setter - def start_time(self, start_time): - """Sets the start_time of this KubeflowOrgV1JobStatus. - - - :param start_time: The start_time of this KubeflowOrgV1JobStatus. # noqa: E501 - :type: datetime - """ - - self._start_time = start_time - - def to_dict(self): - """Returns the model properties as a dict""" - result = {} - - for attr, _ in six.iteritems(self.openapi_types): - value = getattr(self, attr) - if isinstance(value, list): - result[attr] = list(map( - lambda x: x.to_dict() if hasattr(x, "to_dict") else x, - value - )) - elif hasattr(value, "to_dict"): - result[attr] = value.to_dict() - elif isinstance(value, dict): - result[attr] = dict(map( - lambda item: (item[0], item[1].to_dict()) - if hasattr(item[1], "to_dict") else item, - value.items() - )) - else: - result[attr] = value - - return result - - def to_str(self): - """Returns the string representation of the model""" - return pprint.pformat(self.to_dict()) - - def __repr__(self): - """For `print` and `pprint`""" - return self.to_str() - - def __eq__(self, other): - """Returns true if both objects are equal""" - if not isinstance(other, KubeflowOrgV1JobStatus): - return False - - return self.to_dict() == other.to_dict() - - def __ne__(self, other): - """Returns true if both objects are not equal""" - if not isinstance(other, KubeflowOrgV1JobStatus): - return True - - return self.to_dict() != other.to_dict() diff --git a/sdk/python/kubeflow/training/models/kubeflow_org_v1_mpi_job.py b/sdk/python/kubeflow/training/models/kubeflow_org_v1_mpi_job.py deleted file mode 100644 index ecfe6a0e77..0000000000 --- a/sdk/python/kubeflow/training/models/kubeflow_org_v1_mpi_job.py +++ /dev/null @@ -1,228 +0,0 @@ -# coding: utf-8 - -""" - Kubeflow Training SDK - - Python SDK for Kubeflow Training # noqa: E501 - - The version of the OpenAPI document: v1.7.0 - Generated by: https://openapi-generator.tech -""" - - -import pprint -import re # noqa: F401 - -import six - -from kubeflow.training.configuration import Configuration - - -class KubeflowOrgV1MPIJob(object): - """NOTE: This class is auto generated by OpenAPI Generator. - Ref: https://openapi-generator.tech - - Do not edit the class manually. - """ - - """ - Attributes: - openapi_types (dict): The key is attribute name - and the value is attribute type. - attribute_map (dict): The key is attribute name - and the value is json key in definition. - """ - openapi_types = { - 'api_version': 'str', - 'kind': 'str', - 'metadata': 'V1ObjectMeta', - 'spec': 'KubeflowOrgV1MPIJobSpec', - 'status': 'KubeflowOrgV1JobStatus' - } - - attribute_map = { - 'api_version': 'apiVersion', - 'kind': 'kind', - 'metadata': 'metadata', - 'spec': 'spec', - 'status': 'status' - } - - def __init__(self, api_version=None, kind=None, metadata=None, spec=None, status=None, local_vars_configuration=None): # noqa: E501 - """KubeflowOrgV1MPIJob - a model defined in OpenAPI""" # noqa: E501 - if local_vars_configuration is None: - local_vars_configuration = Configuration() - self.local_vars_configuration = local_vars_configuration - - self._api_version = None - self._kind = None - self._metadata = None - self._spec = None - self._status = None - self.discriminator = None - - if api_version is not None: - self.api_version = api_version - if kind is not None: - self.kind = kind - if metadata is not None: - self.metadata = metadata - if spec is not None: - self.spec = spec - if status is not None: - self.status = status - - @property - def api_version(self): - """Gets the api_version of this KubeflowOrgV1MPIJob. # noqa: E501 - - APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources # noqa: E501 - - :return: The api_version of this KubeflowOrgV1MPIJob. # noqa: E501 - :rtype: str - """ - return self._api_version - - @api_version.setter - def api_version(self, api_version): - """Sets the api_version of this KubeflowOrgV1MPIJob. - - APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources # noqa: E501 - - :param api_version: The api_version of this KubeflowOrgV1MPIJob. # noqa: E501 - :type: str - """ - - self._api_version = api_version - - @property - def kind(self): - """Gets the kind of this KubeflowOrgV1MPIJob. # noqa: E501 - - Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds # noqa: E501 - - :return: The kind of this KubeflowOrgV1MPIJob. # noqa: E501 - :rtype: str - """ - return self._kind - - @kind.setter - def kind(self, kind): - """Sets the kind of this KubeflowOrgV1MPIJob. - - Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds # noqa: E501 - - :param kind: The kind of this KubeflowOrgV1MPIJob. # noqa: E501 - :type: str - """ - - self._kind = kind - - @property - def metadata(self): - """Gets the metadata of this KubeflowOrgV1MPIJob. # noqa: E501 - - - :return: The metadata of this KubeflowOrgV1MPIJob. # noqa: E501 - :rtype: V1ObjectMeta - """ - return self._metadata - - @metadata.setter - def metadata(self, metadata): - """Sets the metadata of this KubeflowOrgV1MPIJob. - - - :param metadata: The metadata of this KubeflowOrgV1MPIJob. # noqa: E501 - :type: V1ObjectMeta - """ - - self._metadata = metadata - - @property - def spec(self): - """Gets the spec of this KubeflowOrgV1MPIJob. # noqa: E501 - - - :return: The spec of this KubeflowOrgV1MPIJob. # noqa: E501 - :rtype: KubeflowOrgV1MPIJobSpec - """ - return self._spec - - @spec.setter - def spec(self, spec): - """Sets the spec of this KubeflowOrgV1MPIJob. - - - :param spec: The spec of this KubeflowOrgV1MPIJob. # noqa: E501 - :type: KubeflowOrgV1MPIJobSpec - """ - - self._spec = spec - - @property - def status(self): - """Gets the status of this KubeflowOrgV1MPIJob. # noqa: E501 - - - :return: The status of this KubeflowOrgV1MPIJob. # noqa: E501 - :rtype: KubeflowOrgV1JobStatus - """ - return self._status - - @status.setter - def status(self, status): - """Sets the status of this KubeflowOrgV1MPIJob. - - - :param status: The status of this KubeflowOrgV1MPIJob. # noqa: E501 - :type: KubeflowOrgV1JobStatus - """ - - self._status = status - - def to_dict(self): - """Returns the model properties as a dict""" - result = {} - - for attr, _ in six.iteritems(self.openapi_types): - value = getattr(self, attr) - if isinstance(value, list): - result[attr] = list(map( - lambda x: x.to_dict() if hasattr(x, "to_dict") else x, - value - )) - elif hasattr(value, "to_dict"): - result[attr] = value.to_dict() - elif isinstance(value, dict): - result[attr] = dict(map( - lambda item: (item[0], item[1].to_dict()) - if hasattr(item[1], "to_dict") else item, - value.items() - )) - else: - result[attr] = value - - return result - - def to_str(self): - """Returns the string representation of the model""" - return pprint.pformat(self.to_dict()) - - def __repr__(self): - """For `print` and `pprint`""" - return self.to_str() - - def __eq__(self, other): - """Returns true if both objects are equal""" - if not isinstance(other, KubeflowOrgV1MPIJob): - return False - - return self.to_dict() == other.to_dict() - - def __ne__(self, other): - """Returns true if both objects are not equal""" - if not isinstance(other, KubeflowOrgV1MPIJob): - return True - - return self.to_dict() != other.to_dict() diff --git a/sdk/python/kubeflow/training/models/kubeflow_org_v1_mpi_job_list.py b/sdk/python/kubeflow/training/models/kubeflow_org_v1_mpi_job_list.py deleted file mode 100644 index eeff6bb7ab..0000000000 --- a/sdk/python/kubeflow/training/models/kubeflow_org_v1_mpi_job_list.py +++ /dev/null @@ -1,203 +0,0 @@ -# coding: utf-8 - -""" - Kubeflow Training SDK - - Python SDK for Kubeflow Training # noqa: E501 - - The version of the OpenAPI document: v1.7.0 - Generated by: https://openapi-generator.tech -""" - - -import pprint -import re # noqa: F401 - -import six - -from kubeflow.training.configuration import Configuration - - -class KubeflowOrgV1MPIJobList(object): - """NOTE: This class is auto generated by OpenAPI Generator. - Ref: https://openapi-generator.tech - - Do not edit the class manually. - """ - - """ - Attributes: - openapi_types (dict): The key is attribute name - and the value is attribute type. - attribute_map (dict): The key is attribute name - and the value is json key in definition. - """ - openapi_types = { - 'api_version': 'str', - 'items': 'list[KubeflowOrgV1MPIJob]', - 'kind': 'str', - 'metadata': 'V1ListMeta' - } - - attribute_map = { - 'api_version': 'apiVersion', - 'items': 'items', - 'kind': 'kind', - 'metadata': 'metadata' - } - - def __init__(self, api_version=None, items=None, kind=None, metadata=None, local_vars_configuration=None): # noqa: E501 - """KubeflowOrgV1MPIJobList - a model defined in OpenAPI""" # noqa: E501 - if local_vars_configuration is None: - local_vars_configuration = Configuration() - self.local_vars_configuration = local_vars_configuration - - self._api_version = None - self._items = None - self._kind = None - self._metadata = None - self.discriminator = None - - if api_version is not None: - self.api_version = api_version - self.items = items - if kind is not None: - self.kind = kind - if metadata is not None: - self.metadata = metadata - - @property - def api_version(self): - """Gets the api_version of this KubeflowOrgV1MPIJobList. # noqa: E501 - - APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources # noqa: E501 - - :return: The api_version of this KubeflowOrgV1MPIJobList. # noqa: E501 - :rtype: str - """ - return self._api_version - - @api_version.setter - def api_version(self, api_version): - """Sets the api_version of this KubeflowOrgV1MPIJobList. - - APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources # noqa: E501 - - :param api_version: The api_version of this KubeflowOrgV1MPIJobList. # noqa: E501 - :type: str - """ - - self._api_version = api_version - - @property - def items(self): - """Gets the items of this KubeflowOrgV1MPIJobList. # noqa: E501 - - - :return: The items of this KubeflowOrgV1MPIJobList. # noqa: E501 - :rtype: list[KubeflowOrgV1MPIJob] - """ - return self._items - - @items.setter - def items(self, items): - """Sets the items of this KubeflowOrgV1MPIJobList. - - - :param items: The items of this KubeflowOrgV1MPIJobList. # noqa: E501 - :type: list[KubeflowOrgV1MPIJob] - """ - if self.local_vars_configuration.client_side_validation and items is None: # noqa: E501 - raise ValueError("Invalid value for `items`, must not be `None`") # noqa: E501 - - self._items = items - - @property - def kind(self): - """Gets the kind of this KubeflowOrgV1MPIJobList. # noqa: E501 - - Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds # noqa: E501 - - :return: The kind of this KubeflowOrgV1MPIJobList. # noqa: E501 - :rtype: str - """ - return self._kind - - @kind.setter - def kind(self, kind): - """Sets the kind of this KubeflowOrgV1MPIJobList. - - Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds # noqa: E501 - - :param kind: The kind of this KubeflowOrgV1MPIJobList. # noqa: E501 - :type: str - """ - - self._kind = kind - - @property - def metadata(self): - """Gets the metadata of this KubeflowOrgV1MPIJobList. # noqa: E501 - - - :return: The metadata of this KubeflowOrgV1MPIJobList. # noqa: E501 - :rtype: V1ListMeta - """ - return self._metadata - - @metadata.setter - def metadata(self, metadata): - """Sets the metadata of this KubeflowOrgV1MPIJobList. - - - :param metadata: The metadata of this KubeflowOrgV1MPIJobList. # noqa: E501 - :type: V1ListMeta - """ - - self._metadata = metadata - - def to_dict(self): - """Returns the model properties as a dict""" - result = {} - - for attr, _ in six.iteritems(self.openapi_types): - value = getattr(self, attr) - if isinstance(value, list): - result[attr] = list(map( - lambda x: x.to_dict() if hasattr(x, "to_dict") else x, - value - )) - elif hasattr(value, "to_dict"): - result[attr] = value.to_dict() - elif isinstance(value, dict): - result[attr] = dict(map( - lambda item: (item[0], item[1].to_dict()) - if hasattr(item[1], "to_dict") else item, - value.items() - )) - else: - result[attr] = value - - return result - - def to_str(self): - """Returns the string representation of the model""" - return pprint.pformat(self.to_dict()) - - def __repr__(self): - """For `print` and `pprint`""" - return self.to_str() - - def __eq__(self, other): - """Returns true if both objects are equal""" - if not isinstance(other, KubeflowOrgV1MPIJobList): - return False - - return self.to_dict() == other.to_dict() - - def __ne__(self, other): - """Returns true if both objects are not equal""" - if not isinstance(other, KubeflowOrgV1MPIJobList): - return True - - return self.to_dict() != other.to_dict() diff --git a/sdk/python/kubeflow/training/models/kubeflow_org_v1_mpi_job_spec.py b/sdk/python/kubeflow/training/models/kubeflow_org_v1_mpi_job_spec.py deleted file mode 100644 index 7ad3bba6e8..0000000000 --- a/sdk/python/kubeflow/training/models/kubeflow_org_v1_mpi_job_spec.py +++ /dev/null @@ -1,233 +0,0 @@ -# coding: utf-8 - -""" - Kubeflow Training SDK - - Python SDK for Kubeflow Training # noqa: E501 - - The version of the OpenAPI document: v1.7.0 - Generated by: https://openapi-generator.tech -""" - - -import pprint -import re # noqa: F401 - -import six - -from kubeflow.training.configuration import Configuration - - -class KubeflowOrgV1MPIJobSpec(object): - """NOTE: This class is auto generated by OpenAPI Generator. - Ref: https://openapi-generator.tech - - Do not edit the class manually. - """ - - """ - Attributes: - openapi_types (dict): The key is attribute name - and the value is attribute type. - attribute_map (dict): The key is attribute name - and the value is json key in definition. - """ - openapi_types = { - 'clean_pod_policy': 'str', - 'main_container': 'str', - 'mpi_replica_specs': 'dict(str, KubeflowOrgV1ReplicaSpec)', - 'run_policy': 'KubeflowOrgV1RunPolicy', - 'slots_per_worker': 'int' - } - - attribute_map = { - 'clean_pod_policy': 'cleanPodPolicy', - 'main_container': 'mainContainer', - 'mpi_replica_specs': 'mpiReplicaSpecs', - 'run_policy': 'runPolicy', - 'slots_per_worker': 'slotsPerWorker' - } - - def __init__(self, clean_pod_policy=None, main_container=None, mpi_replica_specs=None, run_policy=None, slots_per_worker=None, local_vars_configuration=None): # noqa: E501 - """KubeflowOrgV1MPIJobSpec - a model defined in OpenAPI""" # noqa: E501 - if local_vars_configuration is None: - local_vars_configuration = Configuration() - self.local_vars_configuration = local_vars_configuration - - self._clean_pod_policy = None - self._main_container = None - self._mpi_replica_specs = None - self._run_policy = None - self._slots_per_worker = None - self.discriminator = None - - if clean_pod_policy is not None: - self.clean_pod_policy = clean_pod_policy - if main_container is not None: - self.main_container = main_container - self.mpi_replica_specs = mpi_replica_specs - if run_policy is not None: - self.run_policy = run_policy - if slots_per_worker is not None: - self.slots_per_worker = slots_per_worker - - @property - def clean_pod_policy(self): - """Gets the clean_pod_policy of this KubeflowOrgV1MPIJobSpec. # noqa: E501 - - CleanPodPolicy defines the policy that whether to kill pods after the job completes. Defaults to None. # noqa: E501 - - :return: The clean_pod_policy of this KubeflowOrgV1MPIJobSpec. # noqa: E501 - :rtype: str - """ - return self._clean_pod_policy - - @clean_pod_policy.setter - def clean_pod_policy(self, clean_pod_policy): - """Sets the clean_pod_policy of this KubeflowOrgV1MPIJobSpec. - - CleanPodPolicy defines the policy that whether to kill pods after the job completes. Defaults to None. # noqa: E501 - - :param clean_pod_policy: The clean_pod_policy of this KubeflowOrgV1MPIJobSpec. # noqa: E501 - :type: str - """ - - self._clean_pod_policy = clean_pod_policy - - @property - def main_container(self): - """Gets the main_container of this KubeflowOrgV1MPIJobSpec. # noqa: E501 - - MainContainer specifies name of the main container which executes the MPI code. # noqa: E501 - - :return: The main_container of this KubeflowOrgV1MPIJobSpec. # noqa: E501 - :rtype: str - """ - return self._main_container - - @main_container.setter - def main_container(self, main_container): - """Sets the main_container of this KubeflowOrgV1MPIJobSpec. - - MainContainer specifies name of the main container which executes the MPI code. # noqa: E501 - - :param main_container: The main_container of this KubeflowOrgV1MPIJobSpec. # noqa: E501 - :type: str - """ - - self._main_container = main_container - - @property - def mpi_replica_specs(self): - """Gets the mpi_replica_specs of this KubeflowOrgV1MPIJobSpec. # noqa: E501 - - `MPIReplicaSpecs` contains maps from `MPIReplicaType` to `ReplicaSpec` that specify the MPI replicas to run. # noqa: E501 - - :return: The mpi_replica_specs of this KubeflowOrgV1MPIJobSpec. # noqa: E501 - :rtype: dict(str, KubeflowOrgV1ReplicaSpec) - """ - return self._mpi_replica_specs - - @mpi_replica_specs.setter - def mpi_replica_specs(self, mpi_replica_specs): - """Sets the mpi_replica_specs of this KubeflowOrgV1MPIJobSpec. - - `MPIReplicaSpecs` contains maps from `MPIReplicaType` to `ReplicaSpec` that specify the MPI replicas to run. # noqa: E501 - - :param mpi_replica_specs: The mpi_replica_specs of this KubeflowOrgV1MPIJobSpec. # noqa: E501 - :type: dict(str, KubeflowOrgV1ReplicaSpec) - """ - if self.local_vars_configuration.client_side_validation and mpi_replica_specs is None: # noqa: E501 - raise ValueError("Invalid value for `mpi_replica_specs`, must not be `None`") # noqa: E501 - - self._mpi_replica_specs = mpi_replica_specs - - @property - def run_policy(self): - """Gets the run_policy of this KubeflowOrgV1MPIJobSpec. # noqa: E501 - - - :return: The run_policy of this KubeflowOrgV1MPIJobSpec. # noqa: E501 - :rtype: KubeflowOrgV1RunPolicy - """ - return self._run_policy - - @run_policy.setter - def run_policy(self, run_policy): - """Sets the run_policy of this KubeflowOrgV1MPIJobSpec. - - - :param run_policy: The run_policy of this KubeflowOrgV1MPIJobSpec. # noqa: E501 - :type: KubeflowOrgV1RunPolicy - """ - - self._run_policy = run_policy - - @property - def slots_per_worker(self): - """Gets the slots_per_worker of this KubeflowOrgV1MPIJobSpec. # noqa: E501 - - Specifies the number of slots per worker used in hostfile. Defaults to 1. # noqa: E501 - - :return: The slots_per_worker of this KubeflowOrgV1MPIJobSpec. # noqa: E501 - :rtype: int - """ - return self._slots_per_worker - - @slots_per_worker.setter - def slots_per_worker(self, slots_per_worker): - """Sets the slots_per_worker of this KubeflowOrgV1MPIJobSpec. - - Specifies the number of slots per worker used in hostfile. Defaults to 1. # noqa: E501 - - :param slots_per_worker: The slots_per_worker of this KubeflowOrgV1MPIJobSpec. # noqa: E501 - :type: int - """ - - self._slots_per_worker = slots_per_worker - - def to_dict(self): - """Returns the model properties as a dict""" - result = {} - - for attr, _ in six.iteritems(self.openapi_types): - value = getattr(self, attr) - if isinstance(value, list): - result[attr] = list(map( - lambda x: x.to_dict() if hasattr(x, "to_dict") else x, - value - )) - elif hasattr(value, "to_dict"): - result[attr] = value.to_dict() - elif isinstance(value, dict): - result[attr] = dict(map( - lambda item: (item[0], item[1].to_dict()) - if hasattr(item[1], "to_dict") else item, - value.items() - )) - else: - result[attr] = value - - return result - - def to_str(self): - """Returns the string representation of the model""" - return pprint.pformat(self.to_dict()) - - def __repr__(self): - """For `print` and `pprint`""" - return self.to_str() - - def __eq__(self, other): - """Returns true if both objects are equal""" - if not isinstance(other, KubeflowOrgV1MPIJobSpec): - return False - - return self.to_dict() == other.to_dict() - - def __ne__(self, other): - """Returns true if both objects are not equal""" - if not isinstance(other, KubeflowOrgV1MPIJobSpec): - return True - - return self.to_dict() != other.to_dict() diff --git a/sdk/python/kubeflow/training/models/kubeflow_org_v1_paddle_elastic_policy.py b/sdk/python/kubeflow/training/models/kubeflow_org_v1_paddle_elastic_policy.py deleted file mode 100644 index e6b9b37e00..0000000000 --- a/sdk/python/kubeflow/training/models/kubeflow_org_v1_paddle_elastic_policy.py +++ /dev/null @@ -1,206 +0,0 @@ -# coding: utf-8 - -""" - Kubeflow Training SDK - - Python SDK for Kubeflow Training # noqa: E501 - - The version of the OpenAPI document: v1.7.0 - Generated by: https://openapi-generator.tech -""" - - -import pprint -import re # noqa: F401 - -import six - -from kubeflow.training.configuration import Configuration - - -class KubeflowOrgV1PaddleElasticPolicy(object): - """NOTE: This class is auto generated by OpenAPI Generator. - Ref: https://openapi-generator.tech - - Do not edit the class manually. - """ - - """ - Attributes: - openapi_types (dict): The key is attribute name - and the value is attribute type. - attribute_map (dict): The key is attribute name - and the value is json key in definition. - """ - openapi_types = { - 'max_replicas': 'int', - 'max_restarts': 'int', - 'metrics': 'list[K8sIoApiAutoscalingV2MetricSpec]', - 'min_replicas': 'int' - } - - attribute_map = { - 'max_replicas': 'maxReplicas', - 'max_restarts': 'maxRestarts', - 'metrics': 'metrics', - 'min_replicas': 'minReplicas' - } - - def __init__(self, max_replicas=None, max_restarts=None, metrics=None, min_replicas=None, local_vars_configuration=None): # noqa: E501 - """KubeflowOrgV1PaddleElasticPolicy - a model defined in OpenAPI""" # noqa: E501 - if local_vars_configuration is None: - local_vars_configuration = Configuration() - self.local_vars_configuration = local_vars_configuration - - self._max_replicas = None - self._max_restarts = None - self._metrics = None - self._min_replicas = None - self.discriminator = None - - if max_replicas is not None: - self.max_replicas = max_replicas - if max_restarts is not None: - self.max_restarts = max_restarts - if metrics is not None: - self.metrics = metrics - if min_replicas is not None: - self.min_replicas = min_replicas - - @property - def max_replicas(self): - """Gets the max_replicas of this KubeflowOrgV1PaddleElasticPolicy. # noqa: E501 - - upper limit for the number of pods that can be set by the autoscaler; cannot be smaller than MinReplicas, defaults to null. # noqa: E501 - - :return: The max_replicas of this KubeflowOrgV1PaddleElasticPolicy. # noqa: E501 - :rtype: int - """ - return self._max_replicas - - @max_replicas.setter - def max_replicas(self, max_replicas): - """Sets the max_replicas of this KubeflowOrgV1PaddleElasticPolicy. - - upper limit for the number of pods that can be set by the autoscaler; cannot be smaller than MinReplicas, defaults to null. # noqa: E501 - - :param max_replicas: The max_replicas of this KubeflowOrgV1PaddleElasticPolicy. # noqa: E501 - :type: int - """ - - self._max_replicas = max_replicas - - @property - def max_restarts(self): - """Gets the max_restarts of this KubeflowOrgV1PaddleElasticPolicy. # noqa: E501 - - MaxRestarts is the limit for restart times of pods in elastic mode. # noqa: E501 - - :return: The max_restarts of this KubeflowOrgV1PaddleElasticPolicy. # noqa: E501 - :rtype: int - """ - return self._max_restarts - - @max_restarts.setter - def max_restarts(self, max_restarts): - """Sets the max_restarts of this KubeflowOrgV1PaddleElasticPolicy. - - MaxRestarts is the limit for restart times of pods in elastic mode. # noqa: E501 - - :param max_restarts: The max_restarts of this KubeflowOrgV1PaddleElasticPolicy. # noqa: E501 - :type: int - """ - - self._max_restarts = max_restarts - - @property - def metrics(self): - """Gets the metrics of this KubeflowOrgV1PaddleElasticPolicy. # noqa: E501 - - Metrics contains the specifications which are used to calculate the desired replica count (the maximum replica count across all metrics will be used). The desired replica count is calculated with multiplying the ratio between the target value and the current value by the current number of pods. Ergo, metrics used must decrease as the pod count is increased, and vice-versa. See the individual metric source types for more information about how each type of metric must respond. If not set, the HPA will not be created. # noqa: E501 - - :return: The metrics of this KubeflowOrgV1PaddleElasticPolicy. # noqa: E501 - :rtype: list[K8sIoApiAutoscalingV2MetricSpec] - """ - return self._metrics - - @metrics.setter - def metrics(self, metrics): - """Sets the metrics of this KubeflowOrgV1PaddleElasticPolicy. - - Metrics contains the specifications which are used to calculate the desired replica count (the maximum replica count across all metrics will be used). The desired replica count is calculated with multiplying the ratio between the target value and the current value by the current number of pods. Ergo, metrics used must decrease as the pod count is increased, and vice-versa. See the individual metric source types for more information about how each type of metric must respond. If not set, the HPA will not be created. # noqa: E501 - - :param metrics: The metrics of this KubeflowOrgV1PaddleElasticPolicy. # noqa: E501 - :type: list[K8sIoApiAutoscalingV2MetricSpec] - """ - - self._metrics = metrics - - @property - def min_replicas(self): - """Gets the min_replicas of this KubeflowOrgV1PaddleElasticPolicy. # noqa: E501 - - minReplicas is the lower limit for the number of replicas to which the training job can scale down. It defaults to null. # noqa: E501 - - :return: The min_replicas of this KubeflowOrgV1PaddleElasticPolicy. # noqa: E501 - :rtype: int - """ - return self._min_replicas - - @min_replicas.setter - def min_replicas(self, min_replicas): - """Sets the min_replicas of this KubeflowOrgV1PaddleElasticPolicy. - - minReplicas is the lower limit for the number of replicas to which the training job can scale down. It defaults to null. # noqa: E501 - - :param min_replicas: The min_replicas of this KubeflowOrgV1PaddleElasticPolicy. # noqa: E501 - :type: int - """ - - self._min_replicas = min_replicas - - def to_dict(self): - """Returns the model properties as a dict""" - result = {} - - for attr, _ in six.iteritems(self.openapi_types): - value = getattr(self, attr) - if isinstance(value, list): - result[attr] = list(map( - lambda x: x.to_dict() if hasattr(x, "to_dict") else x, - value - )) - elif hasattr(value, "to_dict"): - result[attr] = value.to_dict() - elif isinstance(value, dict): - result[attr] = dict(map( - lambda item: (item[0], item[1].to_dict()) - if hasattr(item[1], "to_dict") else item, - value.items() - )) - else: - result[attr] = value - - return result - - def to_str(self): - """Returns the string representation of the model""" - return pprint.pformat(self.to_dict()) - - def __repr__(self): - """For `print` and `pprint`""" - return self.to_str() - - def __eq__(self, other): - """Returns true if both objects are equal""" - if not isinstance(other, KubeflowOrgV1PaddleElasticPolicy): - return False - - return self.to_dict() == other.to_dict() - - def __ne__(self, other): - """Returns true if both objects are not equal""" - if not isinstance(other, KubeflowOrgV1PaddleElasticPolicy): - return True - - return self.to_dict() != other.to_dict() diff --git a/sdk/python/kubeflow/training/models/kubeflow_org_v1_paddle_job.py b/sdk/python/kubeflow/training/models/kubeflow_org_v1_paddle_job.py deleted file mode 100644 index d02cd42bc0..0000000000 --- a/sdk/python/kubeflow/training/models/kubeflow_org_v1_paddle_job.py +++ /dev/null @@ -1,228 +0,0 @@ -# coding: utf-8 - -""" - Kubeflow Training SDK - - Python SDK for Kubeflow Training # noqa: E501 - - The version of the OpenAPI document: v1.7.0 - Generated by: https://openapi-generator.tech -""" - - -import pprint -import re # noqa: F401 - -import six - -from kubeflow.training.configuration import Configuration - - -class KubeflowOrgV1PaddleJob(object): - """NOTE: This class is auto generated by OpenAPI Generator. - Ref: https://openapi-generator.tech - - Do not edit the class manually. - """ - - """ - Attributes: - openapi_types (dict): The key is attribute name - and the value is attribute type. - attribute_map (dict): The key is attribute name - and the value is json key in definition. - """ - openapi_types = { - 'api_version': 'str', - 'kind': 'str', - 'metadata': 'V1ObjectMeta', - 'spec': 'KubeflowOrgV1PaddleJobSpec', - 'status': 'KubeflowOrgV1JobStatus' - } - - attribute_map = { - 'api_version': 'apiVersion', - 'kind': 'kind', - 'metadata': 'metadata', - 'spec': 'spec', - 'status': 'status' - } - - def __init__(self, api_version=None, kind=None, metadata=None, spec=None, status=None, local_vars_configuration=None): # noqa: E501 - """KubeflowOrgV1PaddleJob - a model defined in OpenAPI""" # noqa: E501 - if local_vars_configuration is None: - local_vars_configuration = Configuration() - self.local_vars_configuration = local_vars_configuration - - self._api_version = None - self._kind = None - self._metadata = None - self._spec = None - self._status = None - self.discriminator = None - - if api_version is not None: - self.api_version = api_version - if kind is not None: - self.kind = kind - if metadata is not None: - self.metadata = metadata - if spec is not None: - self.spec = spec - if status is not None: - self.status = status - - @property - def api_version(self): - """Gets the api_version of this KubeflowOrgV1PaddleJob. # noqa: E501 - - APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources # noqa: E501 - - :return: The api_version of this KubeflowOrgV1PaddleJob. # noqa: E501 - :rtype: str - """ - return self._api_version - - @api_version.setter - def api_version(self, api_version): - """Sets the api_version of this KubeflowOrgV1PaddleJob. - - APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources # noqa: E501 - - :param api_version: The api_version of this KubeflowOrgV1PaddleJob. # noqa: E501 - :type: str - """ - - self._api_version = api_version - - @property - def kind(self): - """Gets the kind of this KubeflowOrgV1PaddleJob. # noqa: E501 - - Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds # noqa: E501 - - :return: The kind of this KubeflowOrgV1PaddleJob. # noqa: E501 - :rtype: str - """ - return self._kind - - @kind.setter - def kind(self, kind): - """Sets the kind of this KubeflowOrgV1PaddleJob. - - Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds # noqa: E501 - - :param kind: The kind of this KubeflowOrgV1PaddleJob. # noqa: E501 - :type: str - """ - - self._kind = kind - - @property - def metadata(self): - """Gets the metadata of this KubeflowOrgV1PaddleJob. # noqa: E501 - - - :return: The metadata of this KubeflowOrgV1PaddleJob. # noqa: E501 - :rtype: V1ObjectMeta - """ - return self._metadata - - @metadata.setter - def metadata(self, metadata): - """Sets the metadata of this KubeflowOrgV1PaddleJob. - - - :param metadata: The metadata of this KubeflowOrgV1PaddleJob. # noqa: E501 - :type: V1ObjectMeta - """ - - self._metadata = metadata - - @property - def spec(self): - """Gets the spec of this KubeflowOrgV1PaddleJob. # noqa: E501 - - - :return: The spec of this KubeflowOrgV1PaddleJob. # noqa: E501 - :rtype: KubeflowOrgV1PaddleJobSpec - """ - return self._spec - - @spec.setter - def spec(self, spec): - """Sets the spec of this KubeflowOrgV1PaddleJob. - - - :param spec: The spec of this KubeflowOrgV1PaddleJob. # noqa: E501 - :type: KubeflowOrgV1PaddleJobSpec - """ - - self._spec = spec - - @property - def status(self): - """Gets the status of this KubeflowOrgV1PaddleJob. # noqa: E501 - - - :return: The status of this KubeflowOrgV1PaddleJob. # noqa: E501 - :rtype: KubeflowOrgV1JobStatus - """ - return self._status - - @status.setter - def status(self, status): - """Sets the status of this KubeflowOrgV1PaddleJob. - - - :param status: The status of this KubeflowOrgV1PaddleJob. # noqa: E501 - :type: KubeflowOrgV1JobStatus - """ - - self._status = status - - def to_dict(self): - """Returns the model properties as a dict""" - result = {} - - for attr, _ in six.iteritems(self.openapi_types): - value = getattr(self, attr) - if isinstance(value, list): - result[attr] = list(map( - lambda x: x.to_dict() if hasattr(x, "to_dict") else x, - value - )) - elif hasattr(value, "to_dict"): - result[attr] = value.to_dict() - elif isinstance(value, dict): - result[attr] = dict(map( - lambda item: (item[0], item[1].to_dict()) - if hasattr(item[1], "to_dict") else item, - value.items() - )) - else: - result[attr] = value - - return result - - def to_str(self): - """Returns the string representation of the model""" - return pprint.pformat(self.to_dict()) - - def __repr__(self): - """For `print` and `pprint`""" - return self.to_str() - - def __eq__(self, other): - """Returns true if both objects are equal""" - if not isinstance(other, KubeflowOrgV1PaddleJob): - return False - - return self.to_dict() == other.to_dict() - - def __ne__(self, other): - """Returns true if both objects are not equal""" - if not isinstance(other, KubeflowOrgV1PaddleJob): - return True - - return self.to_dict() != other.to_dict() diff --git a/sdk/python/kubeflow/training/models/kubeflow_org_v1_paddle_job_list.py b/sdk/python/kubeflow/training/models/kubeflow_org_v1_paddle_job_list.py deleted file mode 100644 index 0633c15e34..0000000000 --- a/sdk/python/kubeflow/training/models/kubeflow_org_v1_paddle_job_list.py +++ /dev/null @@ -1,205 +0,0 @@ -# coding: utf-8 - -""" - Kubeflow Training SDK - - Python SDK for Kubeflow Training # noqa: E501 - - The version of the OpenAPI document: v1.7.0 - Generated by: https://openapi-generator.tech -""" - - -import pprint -import re # noqa: F401 - -import six - -from kubeflow.training.configuration import Configuration - - -class KubeflowOrgV1PaddleJobList(object): - """NOTE: This class is auto generated by OpenAPI Generator. - Ref: https://openapi-generator.tech - - Do not edit the class manually. - """ - - """ - Attributes: - openapi_types (dict): The key is attribute name - and the value is attribute type. - attribute_map (dict): The key is attribute name - and the value is json key in definition. - """ - openapi_types = { - 'api_version': 'str', - 'items': 'list[KubeflowOrgV1PaddleJob]', - 'kind': 'str', - 'metadata': 'V1ListMeta' - } - - attribute_map = { - 'api_version': 'apiVersion', - 'items': 'items', - 'kind': 'kind', - 'metadata': 'metadata' - } - - def __init__(self, api_version=None, items=None, kind=None, metadata=None, local_vars_configuration=None): # noqa: E501 - """KubeflowOrgV1PaddleJobList - a model defined in OpenAPI""" # noqa: E501 - if local_vars_configuration is None: - local_vars_configuration = Configuration() - self.local_vars_configuration = local_vars_configuration - - self._api_version = None - self._items = None - self._kind = None - self._metadata = None - self.discriminator = None - - if api_version is not None: - self.api_version = api_version - self.items = items - if kind is not None: - self.kind = kind - if metadata is not None: - self.metadata = metadata - - @property - def api_version(self): - """Gets the api_version of this KubeflowOrgV1PaddleJobList. # noqa: E501 - - APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources # noqa: E501 - - :return: The api_version of this KubeflowOrgV1PaddleJobList. # noqa: E501 - :rtype: str - """ - return self._api_version - - @api_version.setter - def api_version(self, api_version): - """Sets the api_version of this KubeflowOrgV1PaddleJobList. - - APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources # noqa: E501 - - :param api_version: The api_version of this KubeflowOrgV1PaddleJobList. # noqa: E501 - :type: str - """ - - self._api_version = api_version - - @property - def items(self): - """Gets the items of this KubeflowOrgV1PaddleJobList. # noqa: E501 - - List of PaddleJobs. # noqa: E501 - - :return: The items of this KubeflowOrgV1PaddleJobList. # noqa: E501 - :rtype: list[KubeflowOrgV1PaddleJob] - """ - return self._items - - @items.setter - def items(self, items): - """Sets the items of this KubeflowOrgV1PaddleJobList. - - List of PaddleJobs. # noqa: E501 - - :param items: The items of this KubeflowOrgV1PaddleJobList. # noqa: E501 - :type: list[KubeflowOrgV1PaddleJob] - """ - if self.local_vars_configuration.client_side_validation and items is None: # noqa: E501 - raise ValueError("Invalid value for `items`, must not be `None`") # noqa: E501 - - self._items = items - - @property - def kind(self): - """Gets the kind of this KubeflowOrgV1PaddleJobList. # noqa: E501 - - Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds # noqa: E501 - - :return: The kind of this KubeflowOrgV1PaddleJobList. # noqa: E501 - :rtype: str - """ - return self._kind - - @kind.setter - def kind(self, kind): - """Sets the kind of this KubeflowOrgV1PaddleJobList. - - Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds # noqa: E501 - - :param kind: The kind of this KubeflowOrgV1PaddleJobList. # noqa: E501 - :type: str - """ - - self._kind = kind - - @property - def metadata(self): - """Gets the metadata of this KubeflowOrgV1PaddleJobList. # noqa: E501 - - - :return: The metadata of this KubeflowOrgV1PaddleJobList. # noqa: E501 - :rtype: V1ListMeta - """ - return self._metadata - - @metadata.setter - def metadata(self, metadata): - """Sets the metadata of this KubeflowOrgV1PaddleJobList. - - - :param metadata: The metadata of this KubeflowOrgV1PaddleJobList. # noqa: E501 - :type: V1ListMeta - """ - - self._metadata = metadata - - def to_dict(self): - """Returns the model properties as a dict""" - result = {} - - for attr, _ in six.iteritems(self.openapi_types): - value = getattr(self, attr) - if isinstance(value, list): - result[attr] = list(map( - lambda x: x.to_dict() if hasattr(x, "to_dict") else x, - value - )) - elif hasattr(value, "to_dict"): - result[attr] = value.to_dict() - elif isinstance(value, dict): - result[attr] = dict(map( - lambda item: (item[0], item[1].to_dict()) - if hasattr(item[1], "to_dict") else item, - value.items() - )) - else: - result[attr] = value - - return result - - def to_str(self): - """Returns the string representation of the model""" - return pprint.pformat(self.to_dict()) - - def __repr__(self): - """For `print` and `pprint`""" - return self.to_str() - - def __eq__(self, other): - """Returns true if both objects are equal""" - if not isinstance(other, KubeflowOrgV1PaddleJobList): - return False - - return self.to_dict() == other.to_dict() - - def __ne__(self, other): - """Returns true if both objects are not equal""" - if not isinstance(other, KubeflowOrgV1PaddleJobList): - return True - - return self.to_dict() != other.to_dict() diff --git a/sdk/python/kubeflow/training/models/kubeflow_org_v1_paddle_job_spec.py b/sdk/python/kubeflow/training/models/kubeflow_org_v1_paddle_job_spec.py deleted file mode 100644 index 3d897fbf92..0000000000 --- a/sdk/python/kubeflow/training/models/kubeflow_org_v1_paddle_job_spec.py +++ /dev/null @@ -1,176 +0,0 @@ -# coding: utf-8 - -""" - Kubeflow Training SDK - - Python SDK for Kubeflow Training # noqa: E501 - - The version of the OpenAPI document: v1.7.0 - Generated by: https://openapi-generator.tech -""" - - -import pprint -import re # noqa: F401 - -import six - -from kubeflow.training.configuration import Configuration - - -class KubeflowOrgV1PaddleJobSpec(object): - """NOTE: This class is auto generated by OpenAPI Generator. - Ref: https://openapi-generator.tech - - Do not edit the class manually. - """ - - """ - Attributes: - openapi_types (dict): The key is attribute name - and the value is attribute type. - attribute_map (dict): The key is attribute name - and the value is json key in definition. - """ - openapi_types = { - 'elastic_policy': 'KubeflowOrgV1PaddleElasticPolicy', - 'paddle_replica_specs': 'dict(str, KubeflowOrgV1ReplicaSpec)', - 'run_policy': 'KubeflowOrgV1RunPolicy' - } - - attribute_map = { - 'elastic_policy': 'elasticPolicy', - 'paddle_replica_specs': 'paddleReplicaSpecs', - 'run_policy': 'runPolicy' - } - - def __init__(self, elastic_policy=None, paddle_replica_specs=None, run_policy=None, local_vars_configuration=None): # noqa: E501 - """KubeflowOrgV1PaddleJobSpec - a model defined in OpenAPI""" # noqa: E501 - if local_vars_configuration is None: - local_vars_configuration = Configuration() - self.local_vars_configuration = local_vars_configuration - - self._elastic_policy = None - self._paddle_replica_specs = None - self._run_policy = None - self.discriminator = None - - if elastic_policy is not None: - self.elastic_policy = elastic_policy - self.paddle_replica_specs = paddle_replica_specs - self.run_policy = run_policy - - @property - def elastic_policy(self): - """Gets the elastic_policy of this KubeflowOrgV1PaddleJobSpec. # noqa: E501 - - - :return: The elastic_policy of this KubeflowOrgV1PaddleJobSpec. # noqa: E501 - :rtype: KubeflowOrgV1PaddleElasticPolicy - """ - return self._elastic_policy - - @elastic_policy.setter - def elastic_policy(self, elastic_policy): - """Sets the elastic_policy of this KubeflowOrgV1PaddleJobSpec. - - - :param elastic_policy: The elastic_policy of this KubeflowOrgV1PaddleJobSpec. # noqa: E501 - :type: KubeflowOrgV1PaddleElasticPolicy - """ - - self._elastic_policy = elastic_policy - - @property - def paddle_replica_specs(self): - """Gets the paddle_replica_specs of this KubeflowOrgV1PaddleJobSpec. # noqa: E501 - - A map of PaddleReplicaType (type) to ReplicaSpec (value). Specifies the Paddle cluster configuration. For example, { \"Master\": PaddleReplicaSpec, \"Worker\": PaddleReplicaSpec, } # noqa: E501 - - :return: The paddle_replica_specs of this KubeflowOrgV1PaddleJobSpec. # noqa: E501 - :rtype: dict(str, KubeflowOrgV1ReplicaSpec) - """ - return self._paddle_replica_specs - - @paddle_replica_specs.setter - def paddle_replica_specs(self, paddle_replica_specs): - """Sets the paddle_replica_specs of this KubeflowOrgV1PaddleJobSpec. - - A map of PaddleReplicaType (type) to ReplicaSpec (value). Specifies the Paddle cluster configuration. For example, { \"Master\": PaddleReplicaSpec, \"Worker\": PaddleReplicaSpec, } # noqa: E501 - - :param paddle_replica_specs: The paddle_replica_specs of this KubeflowOrgV1PaddleJobSpec. # noqa: E501 - :type: dict(str, KubeflowOrgV1ReplicaSpec) - """ - if self.local_vars_configuration.client_side_validation and paddle_replica_specs is None: # noqa: E501 - raise ValueError("Invalid value for `paddle_replica_specs`, must not be `None`") # noqa: E501 - - self._paddle_replica_specs = paddle_replica_specs - - @property - def run_policy(self): - """Gets the run_policy of this KubeflowOrgV1PaddleJobSpec. # noqa: E501 - - - :return: The run_policy of this KubeflowOrgV1PaddleJobSpec. # noqa: E501 - :rtype: KubeflowOrgV1RunPolicy - """ - return self._run_policy - - @run_policy.setter - def run_policy(self, run_policy): - """Sets the run_policy of this KubeflowOrgV1PaddleJobSpec. - - - :param run_policy: The run_policy of this KubeflowOrgV1PaddleJobSpec. # noqa: E501 - :type: KubeflowOrgV1RunPolicy - """ - if self.local_vars_configuration.client_side_validation and run_policy is None: # noqa: E501 - raise ValueError("Invalid value for `run_policy`, must not be `None`") # noqa: E501 - - self._run_policy = run_policy - - def to_dict(self): - """Returns the model properties as a dict""" - result = {} - - for attr, _ in six.iteritems(self.openapi_types): - value = getattr(self, attr) - if isinstance(value, list): - result[attr] = list(map( - lambda x: x.to_dict() if hasattr(x, "to_dict") else x, - value - )) - elif hasattr(value, "to_dict"): - result[attr] = value.to_dict() - elif isinstance(value, dict): - result[attr] = dict(map( - lambda item: (item[0], item[1].to_dict()) - if hasattr(item[1], "to_dict") else item, - value.items() - )) - else: - result[attr] = value - - return result - - def to_str(self): - """Returns the string representation of the model""" - return pprint.pformat(self.to_dict()) - - def __repr__(self): - """For `print` and `pprint`""" - return self.to_str() - - def __eq__(self, other): - """Returns true if both objects are equal""" - if not isinstance(other, KubeflowOrgV1PaddleJobSpec): - return False - - return self.to_dict() == other.to_dict() - - def __ne__(self, other): - """Returns true if both objects are not equal""" - if not isinstance(other, KubeflowOrgV1PaddleJobSpec): - return True - - return self.to_dict() != other.to_dict() diff --git a/sdk/python/kubeflow/training/models/kubeflow_org_v1_py_torch_job.py b/sdk/python/kubeflow/training/models/kubeflow_org_v1_py_torch_job.py deleted file mode 100644 index b901b1d1d7..0000000000 --- a/sdk/python/kubeflow/training/models/kubeflow_org_v1_py_torch_job.py +++ /dev/null @@ -1,228 +0,0 @@ -# coding: utf-8 - -""" - Kubeflow Training SDK - - Python SDK for Kubeflow Training # noqa: E501 - - The version of the OpenAPI document: v1.7.0 - Generated by: https://openapi-generator.tech -""" - - -import pprint -import re # noqa: F401 - -import six - -from kubeflow.training.configuration import Configuration - - -class KubeflowOrgV1PyTorchJob(object): - """NOTE: This class is auto generated by OpenAPI Generator. - Ref: https://openapi-generator.tech - - Do not edit the class manually. - """ - - """ - Attributes: - openapi_types (dict): The key is attribute name - and the value is attribute type. - attribute_map (dict): The key is attribute name - and the value is json key in definition. - """ - openapi_types = { - 'api_version': 'str', - 'kind': 'str', - 'metadata': 'V1ObjectMeta', - 'spec': 'KubeflowOrgV1PyTorchJobSpec', - 'status': 'KubeflowOrgV1JobStatus' - } - - attribute_map = { - 'api_version': 'apiVersion', - 'kind': 'kind', - 'metadata': 'metadata', - 'spec': 'spec', - 'status': 'status' - } - - def __init__(self, api_version=None, kind=None, metadata=None, spec=None, status=None, local_vars_configuration=None): # noqa: E501 - """KubeflowOrgV1PyTorchJob - a model defined in OpenAPI""" # noqa: E501 - if local_vars_configuration is None: - local_vars_configuration = Configuration() - self.local_vars_configuration = local_vars_configuration - - self._api_version = None - self._kind = None - self._metadata = None - self._spec = None - self._status = None - self.discriminator = None - - if api_version is not None: - self.api_version = api_version - if kind is not None: - self.kind = kind - if metadata is not None: - self.metadata = metadata - if spec is not None: - self.spec = spec - if status is not None: - self.status = status - - @property - def api_version(self): - """Gets the api_version of this KubeflowOrgV1PyTorchJob. # noqa: E501 - - APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources # noqa: E501 - - :return: The api_version of this KubeflowOrgV1PyTorchJob. # noqa: E501 - :rtype: str - """ - return self._api_version - - @api_version.setter - def api_version(self, api_version): - """Sets the api_version of this KubeflowOrgV1PyTorchJob. - - APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources # noqa: E501 - - :param api_version: The api_version of this KubeflowOrgV1PyTorchJob. # noqa: E501 - :type: str - """ - - self._api_version = api_version - - @property - def kind(self): - """Gets the kind of this KubeflowOrgV1PyTorchJob. # noqa: E501 - - Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds # noqa: E501 - - :return: The kind of this KubeflowOrgV1PyTorchJob. # noqa: E501 - :rtype: str - """ - return self._kind - - @kind.setter - def kind(self, kind): - """Sets the kind of this KubeflowOrgV1PyTorchJob. - - Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds # noqa: E501 - - :param kind: The kind of this KubeflowOrgV1PyTorchJob. # noqa: E501 - :type: str - """ - - self._kind = kind - - @property - def metadata(self): - """Gets the metadata of this KubeflowOrgV1PyTorchJob. # noqa: E501 - - - :return: The metadata of this KubeflowOrgV1PyTorchJob. # noqa: E501 - :rtype: V1ObjectMeta - """ - return self._metadata - - @metadata.setter - def metadata(self, metadata): - """Sets the metadata of this KubeflowOrgV1PyTorchJob. - - - :param metadata: The metadata of this KubeflowOrgV1PyTorchJob. # noqa: E501 - :type: V1ObjectMeta - """ - - self._metadata = metadata - - @property - def spec(self): - """Gets the spec of this KubeflowOrgV1PyTorchJob. # noqa: E501 - - - :return: The spec of this KubeflowOrgV1PyTorchJob. # noqa: E501 - :rtype: KubeflowOrgV1PyTorchJobSpec - """ - return self._spec - - @spec.setter - def spec(self, spec): - """Sets the spec of this KubeflowOrgV1PyTorchJob. - - - :param spec: The spec of this KubeflowOrgV1PyTorchJob. # noqa: E501 - :type: KubeflowOrgV1PyTorchJobSpec - """ - - self._spec = spec - - @property - def status(self): - """Gets the status of this KubeflowOrgV1PyTorchJob. # noqa: E501 - - - :return: The status of this KubeflowOrgV1PyTorchJob. # noqa: E501 - :rtype: KubeflowOrgV1JobStatus - """ - return self._status - - @status.setter - def status(self, status): - """Sets the status of this KubeflowOrgV1PyTorchJob. - - - :param status: The status of this KubeflowOrgV1PyTorchJob. # noqa: E501 - :type: KubeflowOrgV1JobStatus - """ - - self._status = status - - def to_dict(self): - """Returns the model properties as a dict""" - result = {} - - for attr, _ in six.iteritems(self.openapi_types): - value = getattr(self, attr) - if isinstance(value, list): - result[attr] = list(map( - lambda x: x.to_dict() if hasattr(x, "to_dict") else x, - value - )) - elif hasattr(value, "to_dict"): - result[attr] = value.to_dict() - elif isinstance(value, dict): - result[attr] = dict(map( - lambda item: (item[0], item[1].to_dict()) - if hasattr(item[1], "to_dict") else item, - value.items() - )) - else: - result[attr] = value - - return result - - def to_str(self): - """Returns the string representation of the model""" - return pprint.pformat(self.to_dict()) - - def __repr__(self): - """For `print` and `pprint`""" - return self.to_str() - - def __eq__(self, other): - """Returns true if both objects are equal""" - if not isinstance(other, KubeflowOrgV1PyTorchJob): - return False - - return self.to_dict() == other.to_dict() - - def __ne__(self, other): - """Returns true if both objects are not equal""" - if not isinstance(other, KubeflowOrgV1PyTorchJob): - return True - - return self.to_dict() != other.to_dict() diff --git a/sdk/python/kubeflow/training/models/kubeflow_org_v1_py_torch_job_list.py b/sdk/python/kubeflow/training/models/kubeflow_org_v1_py_torch_job_list.py deleted file mode 100644 index 650c15e8c4..0000000000 --- a/sdk/python/kubeflow/training/models/kubeflow_org_v1_py_torch_job_list.py +++ /dev/null @@ -1,205 +0,0 @@ -# coding: utf-8 - -""" - Kubeflow Training SDK - - Python SDK for Kubeflow Training # noqa: E501 - - The version of the OpenAPI document: v1.7.0 - Generated by: https://openapi-generator.tech -""" - - -import pprint -import re # noqa: F401 - -import six - -from kubeflow.training.configuration import Configuration - - -class KubeflowOrgV1PyTorchJobList(object): - """NOTE: This class is auto generated by OpenAPI Generator. - Ref: https://openapi-generator.tech - - Do not edit the class manually. - """ - - """ - Attributes: - openapi_types (dict): The key is attribute name - and the value is attribute type. - attribute_map (dict): The key is attribute name - and the value is json key in definition. - """ - openapi_types = { - 'api_version': 'str', - 'items': 'list[KubeflowOrgV1PyTorchJob]', - 'kind': 'str', - 'metadata': 'V1ListMeta' - } - - attribute_map = { - 'api_version': 'apiVersion', - 'items': 'items', - 'kind': 'kind', - 'metadata': 'metadata' - } - - def __init__(self, api_version=None, items=None, kind=None, metadata=None, local_vars_configuration=None): # noqa: E501 - """KubeflowOrgV1PyTorchJobList - a model defined in OpenAPI""" # noqa: E501 - if local_vars_configuration is None: - local_vars_configuration = Configuration() - self.local_vars_configuration = local_vars_configuration - - self._api_version = None - self._items = None - self._kind = None - self._metadata = None - self.discriminator = None - - if api_version is not None: - self.api_version = api_version - self.items = items - if kind is not None: - self.kind = kind - if metadata is not None: - self.metadata = metadata - - @property - def api_version(self): - """Gets the api_version of this KubeflowOrgV1PyTorchJobList. # noqa: E501 - - APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources # noqa: E501 - - :return: The api_version of this KubeflowOrgV1PyTorchJobList. # noqa: E501 - :rtype: str - """ - return self._api_version - - @api_version.setter - def api_version(self, api_version): - """Sets the api_version of this KubeflowOrgV1PyTorchJobList. - - APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources # noqa: E501 - - :param api_version: The api_version of this KubeflowOrgV1PyTorchJobList. # noqa: E501 - :type: str - """ - - self._api_version = api_version - - @property - def items(self): - """Gets the items of this KubeflowOrgV1PyTorchJobList. # noqa: E501 - - List of PyTorchJobs. # noqa: E501 - - :return: The items of this KubeflowOrgV1PyTorchJobList. # noqa: E501 - :rtype: list[KubeflowOrgV1PyTorchJob] - """ - return self._items - - @items.setter - def items(self, items): - """Sets the items of this KubeflowOrgV1PyTorchJobList. - - List of PyTorchJobs. # noqa: E501 - - :param items: The items of this KubeflowOrgV1PyTorchJobList. # noqa: E501 - :type: list[KubeflowOrgV1PyTorchJob] - """ - if self.local_vars_configuration.client_side_validation and items is None: # noqa: E501 - raise ValueError("Invalid value for `items`, must not be `None`") # noqa: E501 - - self._items = items - - @property - def kind(self): - """Gets the kind of this KubeflowOrgV1PyTorchJobList. # noqa: E501 - - Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds # noqa: E501 - - :return: The kind of this KubeflowOrgV1PyTorchJobList. # noqa: E501 - :rtype: str - """ - return self._kind - - @kind.setter - def kind(self, kind): - """Sets the kind of this KubeflowOrgV1PyTorchJobList. - - Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds # noqa: E501 - - :param kind: The kind of this KubeflowOrgV1PyTorchJobList. # noqa: E501 - :type: str - """ - - self._kind = kind - - @property - def metadata(self): - """Gets the metadata of this KubeflowOrgV1PyTorchJobList. # noqa: E501 - - - :return: The metadata of this KubeflowOrgV1PyTorchJobList. # noqa: E501 - :rtype: V1ListMeta - """ - return self._metadata - - @metadata.setter - def metadata(self, metadata): - """Sets the metadata of this KubeflowOrgV1PyTorchJobList. - - - :param metadata: The metadata of this KubeflowOrgV1PyTorchJobList. # noqa: E501 - :type: V1ListMeta - """ - - self._metadata = metadata - - def to_dict(self): - """Returns the model properties as a dict""" - result = {} - - for attr, _ in six.iteritems(self.openapi_types): - value = getattr(self, attr) - if isinstance(value, list): - result[attr] = list(map( - lambda x: x.to_dict() if hasattr(x, "to_dict") else x, - value - )) - elif hasattr(value, "to_dict"): - result[attr] = value.to_dict() - elif isinstance(value, dict): - result[attr] = dict(map( - lambda item: (item[0], item[1].to_dict()) - if hasattr(item[1], "to_dict") else item, - value.items() - )) - else: - result[attr] = value - - return result - - def to_str(self): - """Returns the string representation of the model""" - return pprint.pformat(self.to_dict()) - - def __repr__(self): - """For `print` and `pprint`""" - return self.to_str() - - def __eq__(self, other): - """Returns true if both objects are equal""" - if not isinstance(other, KubeflowOrgV1PyTorchJobList): - return False - - return self.to_dict() == other.to_dict() - - def __ne__(self, other): - """Returns true if both objects are not equal""" - if not isinstance(other, KubeflowOrgV1PyTorchJobList): - return True - - return self.to_dict() != other.to_dict() diff --git a/sdk/python/kubeflow/training/models/kubeflow_org_v1_py_torch_job_spec.py b/sdk/python/kubeflow/training/models/kubeflow_org_v1_py_torch_job_spec.py deleted file mode 100644 index 9dedaca76a..0000000000 --- a/sdk/python/kubeflow/training/models/kubeflow_org_v1_py_torch_job_spec.py +++ /dev/null @@ -1,204 +0,0 @@ -# coding: utf-8 - -""" - Kubeflow Training SDK - - Python SDK for Kubeflow Training # noqa: E501 - - The version of the OpenAPI document: v1.7.0 - Generated by: https://openapi-generator.tech -""" - - -import pprint -import re # noqa: F401 - -import six - -from kubeflow.training.configuration import Configuration - - -class KubeflowOrgV1PyTorchJobSpec(object): - """NOTE: This class is auto generated by OpenAPI Generator. - Ref: https://openapi-generator.tech - - Do not edit the class manually. - """ - - """ - Attributes: - openapi_types (dict): The key is attribute name - and the value is attribute type. - attribute_map (dict): The key is attribute name - and the value is json key in definition. - """ - openapi_types = { - 'elastic_policy': 'KubeflowOrgV1ElasticPolicy', - 'nproc_per_node': 'str', - 'pytorch_replica_specs': 'dict(str, KubeflowOrgV1ReplicaSpec)', - 'run_policy': 'KubeflowOrgV1RunPolicy' - } - - attribute_map = { - 'elastic_policy': 'elasticPolicy', - 'nproc_per_node': 'nprocPerNode', - 'pytorch_replica_specs': 'pytorchReplicaSpecs', - 'run_policy': 'runPolicy' - } - - def __init__(self, elastic_policy=None, nproc_per_node=None, pytorch_replica_specs=None, run_policy=None, local_vars_configuration=None): # noqa: E501 - """KubeflowOrgV1PyTorchJobSpec - a model defined in OpenAPI""" # noqa: E501 - if local_vars_configuration is None: - local_vars_configuration = Configuration() - self.local_vars_configuration = local_vars_configuration - - self._elastic_policy = None - self._nproc_per_node = None - self._pytorch_replica_specs = None - self._run_policy = None - self.discriminator = None - - if elastic_policy is not None: - self.elastic_policy = elastic_policy - if nproc_per_node is not None: - self.nproc_per_node = nproc_per_node - self.pytorch_replica_specs = pytorch_replica_specs - self.run_policy = run_policy - - @property - def elastic_policy(self): - """Gets the elastic_policy of this KubeflowOrgV1PyTorchJobSpec. # noqa: E501 - - - :return: The elastic_policy of this KubeflowOrgV1PyTorchJobSpec. # noqa: E501 - :rtype: KubeflowOrgV1ElasticPolicy - """ - return self._elastic_policy - - @elastic_policy.setter - def elastic_policy(self, elastic_policy): - """Sets the elastic_policy of this KubeflowOrgV1PyTorchJobSpec. - - - :param elastic_policy: The elastic_policy of this KubeflowOrgV1PyTorchJobSpec. # noqa: E501 - :type: KubeflowOrgV1ElasticPolicy - """ - - self._elastic_policy = elastic_policy - - @property - def nproc_per_node(self): - """Gets the nproc_per_node of this KubeflowOrgV1PyTorchJobSpec. # noqa: E501 - - Number of workers per node; supported values: [auto, cpu, gpu, int]. For more, https://github.com/pytorch/pytorch/blob/26f7f470df64d90e092081e39507e4ac751f55d6/torch/distributed/run.py#L629-L658. Defaults to auto. # noqa: E501 - - :return: The nproc_per_node of this KubeflowOrgV1PyTorchJobSpec. # noqa: E501 - :rtype: str - """ - return self._nproc_per_node - - @nproc_per_node.setter - def nproc_per_node(self, nproc_per_node): - """Sets the nproc_per_node of this KubeflowOrgV1PyTorchJobSpec. - - Number of workers per node; supported values: [auto, cpu, gpu, int]. For more, https://github.com/pytorch/pytorch/blob/26f7f470df64d90e092081e39507e4ac751f55d6/torch/distributed/run.py#L629-L658. Defaults to auto. # noqa: E501 - - :param nproc_per_node: The nproc_per_node of this KubeflowOrgV1PyTorchJobSpec. # noqa: E501 - :type: str - """ - - self._nproc_per_node = nproc_per_node - - @property - def pytorch_replica_specs(self): - """Gets the pytorch_replica_specs of this KubeflowOrgV1PyTorchJobSpec. # noqa: E501 - - A map of PyTorchReplicaType (type) to ReplicaSpec (value). Specifies the PyTorch cluster configuration. For example, { \"Master\": PyTorchReplicaSpec, \"Worker\": PyTorchReplicaSpec, } # noqa: E501 - - :return: The pytorch_replica_specs of this KubeflowOrgV1PyTorchJobSpec. # noqa: E501 - :rtype: dict(str, KubeflowOrgV1ReplicaSpec) - """ - return self._pytorch_replica_specs - - @pytorch_replica_specs.setter - def pytorch_replica_specs(self, pytorch_replica_specs): - """Sets the pytorch_replica_specs of this KubeflowOrgV1PyTorchJobSpec. - - A map of PyTorchReplicaType (type) to ReplicaSpec (value). Specifies the PyTorch cluster configuration. For example, { \"Master\": PyTorchReplicaSpec, \"Worker\": PyTorchReplicaSpec, } # noqa: E501 - - :param pytorch_replica_specs: The pytorch_replica_specs of this KubeflowOrgV1PyTorchJobSpec. # noqa: E501 - :type: dict(str, KubeflowOrgV1ReplicaSpec) - """ - if self.local_vars_configuration.client_side_validation and pytorch_replica_specs is None: # noqa: E501 - raise ValueError("Invalid value for `pytorch_replica_specs`, must not be `None`") # noqa: E501 - - self._pytorch_replica_specs = pytorch_replica_specs - - @property - def run_policy(self): - """Gets the run_policy of this KubeflowOrgV1PyTorchJobSpec. # noqa: E501 - - - :return: The run_policy of this KubeflowOrgV1PyTorchJobSpec. # noqa: E501 - :rtype: KubeflowOrgV1RunPolicy - """ - return self._run_policy - - @run_policy.setter - def run_policy(self, run_policy): - """Sets the run_policy of this KubeflowOrgV1PyTorchJobSpec. - - - :param run_policy: The run_policy of this KubeflowOrgV1PyTorchJobSpec. # noqa: E501 - :type: KubeflowOrgV1RunPolicy - """ - if self.local_vars_configuration.client_side_validation and run_policy is None: # noqa: E501 - raise ValueError("Invalid value for `run_policy`, must not be `None`") # noqa: E501 - - self._run_policy = run_policy - - def to_dict(self): - """Returns the model properties as a dict""" - result = {} - - for attr, _ in six.iteritems(self.openapi_types): - value = getattr(self, attr) - if isinstance(value, list): - result[attr] = list(map( - lambda x: x.to_dict() if hasattr(x, "to_dict") else x, - value - )) - elif hasattr(value, "to_dict"): - result[attr] = value.to_dict() - elif isinstance(value, dict): - result[attr] = dict(map( - lambda item: (item[0], item[1].to_dict()) - if hasattr(item[1], "to_dict") else item, - value.items() - )) - else: - result[attr] = value - - return result - - def to_str(self): - """Returns the string representation of the model""" - return pprint.pformat(self.to_dict()) - - def __repr__(self): - """For `print` and `pprint`""" - return self.to_str() - - def __eq__(self, other): - """Returns true if both objects are equal""" - if not isinstance(other, KubeflowOrgV1PyTorchJobSpec): - return False - - return self.to_dict() == other.to_dict() - - def __ne__(self, other): - """Returns true if both objects are not equal""" - if not isinstance(other, KubeflowOrgV1PyTorchJobSpec): - return True - - return self.to_dict() != other.to_dict() diff --git a/sdk/python/kubeflow/training/models/kubeflow_org_v1_rdzv_conf.py b/sdk/python/kubeflow/training/models/kubeflow_org_v1_rdzv_conf.py deleted file mode 100644 index fd95efe18c..0000000000 --- a/sdk/python/kubeflow/training/models/kubeflow_org_v1_rdzv_conf.py +++ /dev/null @@ -1,146 +0,0 @@ -# coding: utf-8 - -""" - Kubeflow Training SDK - - Python SDK for Kubeflow Training # noqa: E501 - - The version of the OpenAPI document: v1.7.0 - Generated by: https://openapi-generator.tech -""" - - -import pprint -import re # noqa: F401 - -import six - -from kubeflow.training.configuration import Configuration - - -class KubeflowOrgV1RDZVConf(object): - """NOTE: This class is auto generated by OpenAPI Generator. - Ref: https://openapi-generator.tech - - Do not edit the class manually. - """ - - """ - Attributes: - openapi_types (dict): The key is attribute name - and the value is attribute type. - attribute_map (dict): The key is attribute name - and the value is json key in definition. - """ - openapi_types = { - 'key': 'str', - 'value': 'str' - } - - attribute_map = { - 'key': 'key', - 'value': 'value' - } - - def __init__(self, key=None, value=None, local_vars_configuration=None): # noqa: E501 - """KubeflowOrgV1RDZVConf - a model defined in OpenAPI""" # noqa: E501 - if local_vars_configuration is None: - local_vars_configuration = Configuration() - self.local_vars_configuration = local_vars_configuration - - self._key = None - self._value = None - self.discriminator = None - - if key is not None: - self.key = key - if value is not None: - self.value = value - - @property - def key(self): - """Gets the key of this KubeflowOrgV1RDZVConf. # noqa: E501 - - - :return: The key of this KubeflowOrgV1RDZVConf. # noqa: E501 - :rtype: str - """ - return self._key - - @key.setter - def key(self, key): - """Sets the key of this KubeflowOrgV1RDZVConf. - - - :param key: The key of this KubeflowOrgV1RDZVConf. # noqa: E501 - :type: str - """ - - self._key = key - - @property - def value(self): - """Gets the value of this KubeflowOrgV1RDZVConf. # noqa: E501 - - - :return: The value of this KubeflowOrgV1RDZVConf. # noqa: E501 - :rtype: str - """ - return self._value - - @value.setter - def value(self, value): - """Sets the value of this KubeflowOrgV1RDZVConf. - - - :param value: The value of this KubeflowOrgV1RDZVConf. # noqa: E501 - :type: str - """ - - self._value = value - - def to_dict(self): - """Returns the model properties as a dict""" - result = {} - - for attr, _ in six.iteritems(self.openapi_types): - value = getattr(self, attr) - if isinstance(value, list): - result[attr] = list(map( - lambda x: x.to_dict() if hasattr(x, "to_dict") else x, - value - )) - elif hasattr(value, "to_dict"): - result[attr] = value.to_dict() - elif isinstance(value, dict): - result[attr] = dict(map( - lambda item: (item[0], item[1].to_dict()) - if hasattr(item[1], "to_dict") else item, - value.items() - )) - else: - result[attr] = value - - return result - - def to_str(self): - """Returns the string representation of the model""" - return pprint.pformat(self.to_dict()) - - def __repr__(self): - """For `print` and `pprint`""" - return self.to_str() - - def __eq__(self, other): - """Returns true if both objects are equal""" - if not isinstance(other, KubeflowOrgV1RDZVConf): - return False - - return self.to_dict() == other.to_dict() - - def __ne__(self, other): - """Returns true if both objects are not equal""" - if not isinstance(other, KubeflowOrgV1RDZVConf): - return True - - return self.to_dict() != other.to_dict() diff --git a/sdk/python/kubeflow/training/models/kubeflow_org_v1_replica_spec.py b/sdk/python/kubeflow/training/models/kubeflow_org_v1_replica_spec.py deleted file mode 100644 index c71a94955b..0000000000 --- a/sdk/python/kubeflow/training/models/kubeflow_org_v1_replica_spec.py +++ /dev/null @@ -1,176 +0,0 @@ -# coding: utf-8 - -""" - Kubeflow Training SDK - - Python SDK for Kubeflow Training # noqa: E501 - - The version of the OpenAPI document: v1.7.0 - Generated by: https://openapi-generator.tech -""" - - -import pprint -import re # noqa: F401 - -import six - -from kubeflow.training.configuration import Configuration - - -class KubeflowOrgV1ReplicaSpec(object): - """NOTE: This class is auto generated by OpenAPI Generator. - Ref: https://openapi-generator.tech - - Do not edit the class manually. - """ - - """ - Attributes: - openapi_types (dict): The key is attribute name - and the value is attribute type. - attribute_map (dict): The key is attribute name - and the value is json key in definition. - """ - openapi_types = { - 'replicas': 'int', - 'restart_policy': 'str', - 'template': 'V1PodTemplateSpec' - } - - attribute_map = { - 'replicas': 'replicas', - 'restart_policy': 'restartPolicy', - 'template': 'template' - } - - def __init__(self, replicas=None, restart_policy=None, template=None, local_vars_configuration=None): # noqa: E501 - """KubeflowOrgV1ReplicaSpec - a model defined in OpenAPI""" # noqa: E501 - if local_vars_configuration is None: - local_vars_configuration = Configuration() - self.local_vars_configuration = local_vars_configuration - - self._replicas = None - self._restart_policy = None - self._template = None - self.discriminator = None - - if replicas is not None: - self.replicas = replicas - if restart_policy is not None: - self.restart_policy = restart_policy - if template is not None: - self.template = template - - @property - def replicas(self): - """Gets the replicas of this KubeflowOrgV1ReplicaSpec. # noqa: E501 - - Replicas is the desired number of replicas of the given template. If unspecified, defaults to 1. # noqa: E501 - - :return: The replicas of this KubeflowOrgV1ReplicaSpec. # noqa: E501 - :rtype: int - """ - return self._replicas - - @replicas.setter - def replicas(self, replicas): - """Sets the replicas of this KubeflowOrgV1ReplicaSpec. - - Replicas is the desired number of replicas of the given template. If unspecified, defaults to 1. # noqa: E501 - - :param replicas: The replicas of this KubeflowOrgV1ReplicaSpec. # noqa: E501 - :type: int - """ - - self._replicas = replicas - - @property - def restart_policy(self): - """Gets the restart_policy of this KubeflowOrgV1ReplicaSpec. # noqa: E501 - - Restart policy for all replicas within the job. One of Always, OnFailure, Never and ExitCode. Default to Never. # noqa: E501 - - :return: The restart_policy of this KubeflowOrgV1ReplicaSpec. # noqa: E501 - :rtype: str - """ - return self._restart_policy - - @restart_policy.setter - def restart_policy(self, restart_policy): - """Sets the restart_policy of this KubeflowOrgV1ReplicaSpec. - - Restart policy for all replicas within the job. One of Always, OnFailure, Never and ExitCode. Default to Never. # noqa: E501 - - :param restart_policy: The restart_policy of this KubeflowOrgV1ReplicaSpec. # noqa: E501 - :type: str - """ - - self._restart_policy = restart_policy - - @property - def template(self): - """Gets the template of this KubeflowOrgV1ReplicaSpec. # noqa: E501 - - - :return: The template of this KubeflowOrgV1ReplicaSpec. # noqa: E501 - :rtype: V1PodTemplateSpec - """ - return self._template - - @template.setter - def template(self, template): - """Sets the template of this KubeflowOrgV1ReplicaSpec. - - - :param template: The template of this KubeflowOrgV1ReplicaSpec. # noqa: E501 - :type: V1PodTemplateSpec - """ - - self._template = template - - def to_dict(self): - """Returns the model properties as a dict""" - result = {} - - for attr, _ in six.iteritems(self.openapi_types): - value = getattr(self, attr) - if isinstance(value, list): - result[attr] = list(map( - lambda x: x.to_dict() if hasattr(x, "to_dict") else x, - value - )) - elif hasattr(value, "to_dict"): - result[attr] = value.to_dict() - elif isinstance(value, dict): - result[attr] = dict(map( - lambda item: (item[0], item[1].to_dict()) - if hasattr(item[1], "to_dict") else item, - value.items() - )) - else: - result[attr] = value - - return result - - def to_str(self): - """Returns the string representation of the model""" - return pprint.pformat(self.to_dict()) - - def __repr__(self): - """For `print` and `pprint`""" - return self.to_str() - - def __eq__(self, other): - """Returns true if both objects are equal""" - if not isinstance(other, KubeflowOrgV1ReplicaSpec): - return False - - return self.to_dict() == other.to_dict() - - def __ne__(self, other): - """Returns true if both objects are not equal""" - if not isinstance(other, KubeflowOrgV1ReplicaSpec): - return True - - return self.to_dict() != other.to_dict() diff --git a/sdk/python/kubeflow/training/models/kubeflow_org_v1_replica_status.py b/sdk/python/kubeflow/training/models/kubeflow_org_v1_replica_status.py deleted file mode 100644 index 0f960a2070..0000000000 --- a/sdk/python/kubeflow/training/models/kubeflow_org_v1_replica_status.py +++ /dev/null @@ -1,232 +0,0 @@ -# coding: utf-8 - -""" - Kubeflow Training SDK - - Python SDK for Kubeflow Training # noqa: E501 - - The version of the OpenAPI document: v1.7.0 - Generated by: https://openapi-generator.tech -""" - - -import pprint -import re # noqa: F401 - -import six - -from kubeflow.training.configuration import Configuration - - -class KubeflowOrgV1ReplicaStatus(object): - """NOTE: This class is auto generated by OpenAPI Generator. - Ref: https://openapi-generator.tech - - Do not edit the class manually. - """ - - """ - Attributes: - openapi_types (dict): The key is attribute name - and the value is attribute type. - attribute_map (dict): The key is attribute name - and the value is json key in definition. - """ - openapi_types = { - 'active': 'int', - 'failed': 'int', - 'label_selector': 'V1LabelSelector', - 'selector': 'str', - 'succeeded': 'int' - } - - attribute_map = { - 'active': 'active', - 'failed': 'failed', - 'label_selector': 'labelSelector', - 'selector': 'selector', - 'succeeded': 'succeeded' - } - - def __init__(self, active=None, failed=None, label_selector=None, selector=None, succeeded=None, local_vars_configuration=None): # noqa: E501 - """KubeflowOrgV1ReplicaStatus - a model defined in OpenAPI""" # noqa: E501 - if local_vars_configuration is None: - local_vars_configuration = Configuration() - self.local_vars_configuration = local_vars_configuration - - self._active = None - self._failed = None - self._label_selector = None - self._selector = None - self._succeeded = None - self.discriminator = None - - if active is not None: - self.active = active - if failed is not None: - self.failed = failed - if label_selector is not None: - self.label_selector = label_selector - if selector is not None: - self.selector = selector - if succeeded is not None: - self.succeeded = succeeded - - @property - def active(self): - """Gets the active of this KubeflowOrgV1ReplicaStatus. # noqa: E501 - - The number of actively running pods. # noqa: E501 - - :return: The active of this KubeflowOrgV1ReplicaStatus. # noqa: E501 - :rtype: int - """ - return self._active - - @active.setter - def active(self, active): - """Sets the active of this KubeflowOrgV1ReplicaStatus. - - The number of actively running pods. # noqa: E501 - - :param active: The active of this KubeflowOrgV1ReplicaStatus. # noqa: E501 - :type: int - """ - - self._active = active - - @property - def failed(self): - """Gets the failed of this KubeflowOrgV1ReplicaStatus. # noqa: E501 - - The number of pods which reached phase Failed. # noqa: E501 - - :return: The failed of this KubeflowOrgV1ReplicaStatus. # noqa: E501 - :rtype: int - """ - return self._failed - - @failed.setter - def failed(self, failed): - """Sets the failed of this KubeflowOrgV1ReplicaStatus. - - The number of pods which reached phase Failed. # noqa: E501 - - :param failed: The failed of this KubeflowOrgV1ReplicaStatus. # noqa: E501 - :type: int - """ - - self._failed = failed - - @property - def label_selector(self): - """Gets the label_selector of this KubeflowOrgV1ReplicaStatus. # noqa: E501 - - - :return: The label_selector of this KubeflowOrgV1ReplicaStatus. # noqa: E501 - :rtype: V1LabelSelector - """ - return self._label_selector - - @label_selector.setter - def label_selector(self, label_selector): - """Sets the label_selector of this KubeflowOrgV1ReplicaStatus. - - - :param label_selector: The label_selector of this KubeflowOrgV1ReplicaStatus. # noqa: E501 - :type: V1LabelSelector - """ - - self._label_selector = label_selector - - @property - def selector(self): - """Gets the selector of this KubeflowOrgV1ReplicaStatus. # noqa: E501 - - A Selector is a label query over a set of resources. The result of matchLabels and matchExpressions are ANDed. An empty Selector matches all objects. A null Selector matches no objects. # noqa: E501 - - :return: The selector of this KubeflowOrgV1ReplicaStatus. # noqa: E501 - :rtype: str - """ - return self._selector - - @selector.setter - def selector(self, selector): - """Sets the selector of this KubeflowOrgV1ReplicaStatus. - - A Selector is a label query over a set of resources. The result of matchLabels and matchExpressions are ANDed. An empty Selector matches all objects. A null Selector matches no objects. # noqa: E501 - - :param selector: The selector of this KubeflowOrgV1ReplicaStatus. # noqa: E501 - :type: str - """ - - self._selector = selector - - @property - def succeeded(self): - """Gets the succeeded of this KubeflowOrgV1ReplicaStatus. # noqa: E501 - - The number of pods which reached phase Succeeded. # noqa: E501 - - :return: The succeeded of this KubeflowOrgV1ReplicaStatus. # noqa: E501 - :rtype: int - """ - return self._succeeded - - @succeeded.setter - def succeeded(self, succeeded): - """Sets the succeeded of this KubeflowOrgV1ReplicaStatus. - - The number of pods which reached phase Succeeded. # noqa: E501 - - :param succeeded: The succeeded of this KubeflowOrgV1ReplicaStatus. # noqa: E501 - :type: int - """ - - self._succeeded = succeeded - - def to_dict(self): - """Returns the model properties as a dict""" - result = {} - - for attr, _ in six.iteritems(self.openapi_types): - value = getattr(self, attr) - if isinstance(value, list): - result[attr] = list(map( - lambda x: x.to_dict() if hasattr(x, "to_dict") else x, - value - )) - elif hasattr(value, "to_dict"): - result[attr] = value.to_dict() - elif isinstance(value, dict): - result[attr] = dict(map( - lambda item: (item[0], item[1].to_dict()) - if hasattr(item[1], "to_dict") else item, - value.items() - )) - else: - result[attr] = value - - return result - - def to_str(self): - """Returns the string representation of the model""" - return pprint.pformat(self.to_dict()) - - def __repr__(self): - """For `print` and `pprint`""" - return self.to_str() - - def __eq__(self, other): - """Returns true if both objects are equal""" - if not isinstance(other, KubeflowOrgV1ReplicaStatus): - return False - - return self.to_dict() == other.to_dict() - - def __ne__(self, other): - """Returns true if both objects are not equal""" - if not isinstance(other, KubeflowOrgV1ReplicaStatus): - return True - - return self.to_dict() != other.to_dict() diff --git a/sdk/python/kubeflow/training/models/kubeflow_org_v1_run_policy.py b/sdk/python/kubeflow/training/models/kubeflow_org_v1_run_policy.py deleted file mode 100644 index 7782720075..0000000000 --- a/sdk/python/kubeflow/training/models/kubeflow_org_v1_run_policy.py +++ /dev/null @@ -1,288 +0,0 @@ -# coding: utf-8 - -""" - Kubeflow Training SDK - - Python SDK for Kubeflow Training # noqa: E501 - - The version of the OpenAPI document: v1.7.0 - Generated by: https://openapi-generator.tech -""" - - -import pprint -import re # noqa: F401 - -import six - -from kubeflow.training.configuration import Configuration - - -class KubeflowOrgV1RunPolicy(object): - """NOTE: This class is auto generated by OpenAPI Generator. - Ref: https://openapi-generator.tech - - Do not edit the class manually. - """ - - """ - Attributes: - openapi_types (dict): The key is attribute name - and the value is attribute type. - attribute_map (dict): The key is attribute name - and the value is json key in definition. - """ - openapi_types = { - 'active_deadline_seconds': 'int', - 'backoff_limit': 'int', - 'clean_pod_policy': 'str', - 'managed_by': 'str', - 'scheduling_policy': 'KubeflowOrgV1SchedulingPolicy', - 'suspend': 'bool', - 'ttl_seconds_after_finished': 'int' - } - - attribute_map = { - 'active_deadline_seconds': 'activeDeadlineSeconds', - 'backoff_limit': 'backoffLimit', - 'clean_pod_policy': 'cleanPodPolicy', - 'managed_by': 'managedBy', - 'scheduling_policy': 'schedulingPolicy', - 'suspend': 'suspend', - 'ttl_seconds_after_finished': 'ttlSecondsAfterFinished' - } - - def __init__(self, active_deadline_seconds=None, backoff_limit=None, clean_pod_policy=None, managed_by=None, scheduling_policy=None, suspend=None, ttl_seconds_after_finished=None, local_vars_configuration=None): # noqa: E501 - """KubeflowOrgV1RunPolicy - a model defined in OpenAPI""" # noqa: E501 - if local_vars_configuration is None: - local_vars_configuration = Configuration() - self.local_vars_configuration = local_vars_configuration - - self._active_deadline_seconds = None - self._backoff_limit = None - self._clean_pod_policy = None - self._managed_by = None - self._scheduling_policy = None - self._suspend = None - self._ttl_seconds_after_finished = None - self.discriminator = None - - if active_deadline_seconds is not None: - self.active_deadline_seconds = active_deadline_seconds - if backoff_limit is not None: - self.backoff_limit = backoff_limit - if clean_pod_policy is not None: - self.clean_pod_policy = clean_pod_policy - if managed_by is not None: - self.managed_by = managed_by - if scheduling_policy is not None: - self.scheduling_policy = scheduling_policy - if suspend is not None: - self.suspend = suspend - if ttl_seconds_after_finished is not None: - self.ttl_seconds_after_finished = ttl_seconds_after_finished - - @property - def active_deadline_seconds(self): - """Gets the active_deadline_seconds of this KubeflowOrgV1RunPolicy. # noqa: E501 - - Specifies the duration in seconds relative to the startTime that the job may be active before the system tries to terminate it; value must be positive integer. # noqa: E501 - - :return: The active_deadline_seconds of this KubeflowOrgV1RunPolicy. # noqa: E501 - :rtype: int - """ - return self._active_deadline_seconds - - @active_deadline_seconds.setter - def active_deadline_seconds(self, active_deadline_seconds): - """Sets the active_deadline_seconds of this KubeflowOrgV1RunPolicy. - - Specifies the duration in seconds relative to the startTime that the job may be active before the system tries to terminate it; value must be positive integer. # noqa: E501 - - :param active_deadline_seconds: The active_deadline_seconds of this KubeflowOrgV1RunPolicy. # noqa: E501 - :type: int - """ - - self._active_deadline_seconds = active_deadline_seconds - - @property - def backoff_limit(self): - """Gets the backoff_limit of this KubeflowOrgV1RunPolicy. # noqa: E501 - - Optional number of retries before marking this job failed. # noqa: E501 - - :return: The backoff_limit of this KubeflowOrgV1RunPolicy. # noqa: E501 - :rtype: int - """ - return self._backoff_limit - - @backoff_limit.setter - def backoff_limit(self, backoff_limit): - """Sets the backoff_limit of this KubeflowOrgV1RunPolicy. - - Optional number of retries before marking this job failed. # noqa: E501 - - :param backoff_limit: The backoff_limit of this KubeflowOrgV1RunPolicy. # noqa: E501 - :type: int - """ - - self._backoff_limit = backoff_limit - - @property - def clean_pod_policy(self): - """Gets the clean_pod_policy of this KubeflowOrgV1RunPolicy. # noqa: E501 - - CleanPodPolicy defines the policy to kill pods after the job completes. Default to None. # noqa: E501 - - :return: The clean_pod_policy of this KubeflowOrgV1RunPolicy. # noqa: E501 - :rtype: str - """ - return self._clean_pod_policy - - @clean_pod_policy.setter - def clean_pod_policy(self, clean_pod_policy): - """Sets the clean_pod_policy of this KubeflowOrgV1RunPolicy. - - CleanPodPolicy defines the policy to kill pods after the job completes. Default to None. # noqa: E501 - - :param clean_pod_policy: The clean_pod_policy of this KubeflowOrgV1RunPolicy. # noqa: E501 - :type: str - """ - - self._clean_pod_policy = clean_pod_policy - - @property - def managed_by(self): - """Gets the managed_by of this KubeflowOrgV1RunPolicy. # noqa: E501 - - ManagedBy is used to indicate the controller or entity that manages a job. The value must be either an empty, 'kubeflow.org/training-operator' or 'kueue.x-k8s.io/multikueue'. The training-operator reconciles a job which doesn't have this field at all or the field value is the reserved string 'kubeflow.org/training-operator', but delegates reconciling the job with 'kueue.x-k8s.io/multikueue' to the Kueue. The field is immutable. # noqa: E501 - - :return: The managed_by of this KubeflowOrgV1RunPolicy. # noqa: E501 - :rtype: str - """ - return self._managed_by - - @managed_by.setter - def managed_by(self, managed_by): - """Sets the managed_by of this KubeflowOrgV1RunPolicy. - - ManagedBy is used to indicate the controller or entity that manages a job. The value must be either an empty, 'kubeflow.org/training-operator' or 'kueue.x-k8s.io/multikueue'. The training-operator reconciles a job which doesn't have this field at all or the field value is the reserved string 'kubeflow.org/training-operator', but delegates reconciling the job with 'kueue.x-k8s.io/multikueue' to the Kueue. The field is immutable. # noqa: E501 - - :param managed_by: The managed_by of this KubeflowOrgV1RunPolicy. # noqa: E501 - :type: str - """ - - self._managed_by = managed_by - - @property - def scheduling_policy(self): - """Gets the scheduling_policy of this KubeflowOrgV1RunPolicy. # noqa: E501 - - - :return: The scheduling_policy of this KubeflowOrgV1RunPolicy. # noqa: E501 - :rtype: KubeflowOrgV1SchedulingPolicy - """ - return self._scheduling_policy - - @scheduling_policy.setter - def scheduling_policy(self, scheduling_policy): - """Sets the scheduling_policy of this KubeflowOrgV1RunPolicy. - - - :param scheduling_policy: The scheduling_policy of this KubeflowOrgV1RunPolicy. # noqa: E501 - :type: KubeflowOrgV1SchedulingPolicy - """ - - self._scheduling_policy = scheduling_policy - - @property - def suspend(self): - """Gets the suspend of this KubeflowOrgV1RunPolicy. # noqa: E501 - - suspend specifies whether the Job controller should create Pods or not. If a Job is created with suspend set to true, no Pods are created by the Job controller. If a Job is suspended after creation (i.e. the flag goes from false to true), the Job controller will delete all active Pods and PodGroups associated with this Job. Users must design their workload to gracefully handle this. Suspending a Job will reset the StartTime field of the Job. Defaults to false. # noqa: E501 - - :return: The suspend of this KubeflowOrgV1RunPolicy. # noqa: E501 - :rtype: bool - """ - return self._suspend - - @suspend.setter - def suspend(self, suspend): - """Sets the suspend of this KubeflowOrgV1RunPolicy. - - suspend specifies whether the Job controller should create Pods or not. If a Job is created with suspend set to true, no Pods are created by the Job controller. If a Job is suspended after creation (i.e. the flag goes from false to true), the Job controller will delete all active Pods and PodGroups associated with this Job. Users must design their workload to gracefully handle this. Suspending a Job will reset the StartTime field of the Job. Defaults to false. # noqa: E501 - - :param suspend: The suspend of this KubeflowOrgV1RunPolicy. # noqa: E501 - :type: bool - """ - - self._suspend = suspend - - @property - def ttl_seconds_after_finished(self): - """Gets the ttl_seconds_after_finished of this KubeflowOrgV1RunPolicy. # noqa: E501 - - TTLSecondsAfterFinished is the TTL to clean up jobs. It may take extra ReconcilePeriod seconds for the cleanup, since reconcile gets called periodically. Default to infinite. # noqa: E501 - - :return: The ttl_seconds_after_finished of this KubeflowOrgV1RunPolicy. # noqa: E501 - :rtype: int - """ - return self._ttl_seconds_after_finished - - @ttl_seconds_after_finished.setter - def ttl_seconds_after_finished(self, ttl_seconds_after_finished): - """Sets the ttl_seconds_after_finished of this KubeflowOrgV1RunPolicy. - - TTLSecondsAfterFinished is the TTL to clean up jobs. It may take extra ReconcilePeriod seconds for the cleanup, since reconcile gets called periodically. Default to infinite. # noqa: E501 - - :param ttl_seconds_after_finished: The ttl_seconds_after_finished of this KubeflowOrgV1RunPolicy. # noqa: E501 - :type: int - """ - - self._ttl_seconds_after_finished = ttl_seconds_after_finished - - def to_dict(self): - """Returns the model properties as a dict""" - result = {} - - for attr, _ in six.iteritems(self.openapi_types): - value = getattr(self, attr) - if isinstance(value, list): - result[attr] = list(map( - lambda x: x.to_dict() if hasattr(x, "to_dict") else x, - value - )) - elif hasattr(value, "to_dict"): - result[attr] = value.to_dict() - elif isinstance(value, dict): - result[attr] = dict(map( - lambda item: (item[0], item[1].to_dict()) - if hasattr(item[1], "to_dict") else item, - value.items() - )) - else: - result[attr] = value - - return result - - def to_str(self): - """Returns the string representation of the model""" - return pprint.pformat(self.to_dict()) - - def __repr__(self): - """For `print` and `pprint`""" - return self.to_str() - - def __eq__(self, other): - """Returns true if both objects are equal""" - if not isinstance(other, KubeflowOrgV1RunPolicy): - return False - - return self.to_dict() == other.to_dict() - - def __ne__(self, other): - """Returns true if both objects are not equal""" - if not isinstance(other, KubeflowOrgV1RunPolicy): - return True - - return self.to_dict() != other.to_dict() diff --git a/sdk/python/kubeflow/training/models/kubeflow_org_v1_scheduling_policy.py b/sdk/python/kubeflow/training/models/kubeflow_org_v1_scheduling_policy.py deleted file mode 100644 index 16a1d9acdd..0000000000 --- a/sdk/python/kubeflow/training/models/kubeflow_org_v1_scheduling_policy.py +++ /dev/null @@ -1,224 +0,0 @@ -# coding: utf-8 - -""" - Kubeflow Training SDK - - Python SDK for Kubeflow Training # noqa: E501 - - The version of the OpenAPI document: v1.7.0 - Generated by: https://openapi-generator.tech -""" - - -import pprint -import re # noqa: F401 - -import six - -from kubeflow.training.configuration import Configuration - - -class KubeflowOrgV1SchedulingPolicy(object): - """NOTE: This class is auto generated by OpenAPI Generator. - Ref: https://openapi-generator.tech - - Do not edit the class manually. - """ - - """ - Attributes: - openapi_types (dict): The key is attribute name - and the value is attribute type. - attribute_map (dict): The key is attribute name - and the value is json key in definition. - """ - openapi_types = { - 'min_available': 'int', - 'min_resources': 'dict(str, Quantity)', - 'priority_class': 'str', - 'queue': 'str', - 'schedule_timeout_seconds': 'int' - } - - attribute_map = { - 'min_available': 'minAvailable', - 'min_resources': 'minResources', - 'priority_class': 'priorityClass', - 'queue': 'queue', - 'schedule_timeout_seconds': 'scheduleTimeoutSeconds' - } - - def __init__(self, min_available=None, min_resources=None, priority_class=None, queue=None, schedule_timeout_seconds=None, local_vars_configuration=None): # noqa: E501 - """KubeflowOrgV1SchedulingPolicy - a model defined in OpenAPI""" # noqa: E501 - if local_vars_configuration is None: - local_vars_configuration = Configuration() - self.local_vars_configuration = local_vars_configuration - - self._min_available = None - self._min_resources = None - self._priority_class = None - self._queue = None - self._schedule_timeout_seconds = None - self.discriminator = None - - if min_available is not None: - self.min_available = min_available - if min_resources is not None: - self.min_resources = min_resources - if priority_class is not None: - self.priority_class = priority_class - if queue is not None: - self.queue = queue - if schedule_timeout_seconds is not None: - self.schedule_timeout_seconds = schedule_timeout_seconds - - @property - def min_available(self): - """Gets the min_available of this KubeflowOrgV1SchedulingPolicy. # noqa: E501 - - - :return: The min_available of this KubeflowOrgV1SchedulingPolicy. # noqa: E501 - :rtype: int - """ - return self._min_available - - @min_available.setter - def min_available(self, min_available): - """Sets the min_available of this KubeflowOrgV1SchedulingPolicy. - - - :param min_available: The min_available of this KubeflowOrgV1SchedulingPolicy. # noqa: E501 - :type: int - """ - - self._min_available = min_available - - @property - def min_resources(self): - """Gets the min_resources of this KubeflowOrgV1SchedulingPolicy. # noqa: E501 - - - :return: The min_resources of this KubeflowOrgV1SchedulingPolicy. # noqa: E501 - :rtype: dict(str, Quantity) - """ - return self._min_resources - - @min_resources.setter - def min_resources(self, min_resources): - """Sets the min_resources of this KubeflowOrgV1SchedulingPolicy. - - - :param min_resources: The min_resources of this KubeflowOrgV1SchedulingPolicy. # noqa: E501 - :type: dict(str, Quantity) - """ - - self._min_resources = min_resources - - @property - def priority_class(self): - """Gets the priority_class of this KubeflowOrgV1SchedulingPolicy. # noqa: E501 - - - :return: The priority_class of this KubeflowOrgV1SchedulingPolicy. # noqa: E501 - :rtype: str - """ - return self._priority_class - - @priority_class.setter - def priority_class(self, priority_class): - """Sets the priority_class of this KubeflowOrgV1SchedulingPolicy. - - - :param priority_class: The priority_class of this KubeflowOrgV1SchedulingPolicy. # noqa: E501 - :type: str - """ - - self._priority_class = priority_class - - @property - def queue(self): - """Gets the queue of this KubeflowOrgV1SchedulingPolicy. # noqa: E501 - - - :return: The queue of this KubeflowOrgV1SchedulingPolicy. # noqa: E501 - :rtype: str - """ - return self._queue - - @queue.setter - def queue(self, queue): - """Sets the queue of this KubeflowOrgV1SchedulingPolicy. - - - :param queue: The queue of this KubeflowOrgV1SchedulingPolicy. # noqa: E501 - :type: str - """ - - self._queue = queue - - @property - def schedule_timeout_seconds(self): - """Gets the schedule_timeout_seconds of this KubeflowOrgV1SchedulingPolicy. # noqa: E501 - - - :return: The schedule_timeout_seconds of this KubeflowOrgV1SchedulingPolicy. # noqa: E501 - :rtype: int - """ - return self._schedule_timeout_seconds - - @schedule_timeout_seconds.setter - def schedule_timeout_seconds(self, schedule_timeout_seconds): - """Sets the schedule_timeout_seconds of this KubeflowOrgV1SchedulingPolicy. - - - :param schedule_timeout_seconds: The schedule_timeout_seconds of this KubeflowOrgV1SchedulingPolicy. # noqa: E501 - :type: int - """ - - self._schedule_timeout_seconds = schedule_timeout_seconds - - def to_dict(self): - """Returns the model properties as a dict""" - result = {} - - for attr, _ in six.iteritems(self.openapi_types): - value = getattr(self, attr) - if isinstance(value, list): - result[attr] = list(map( - lambda x: x.to_dict() if hasattr(x, "to_dict") else x, - value - )) - elif hasattr(value, "to_dict"): - result[attr] = value.to_dict() - elif isinstance(value, dict): - result[attr] = dict(map( - lambda item: (item[0], item[1].to_dict()) - if hasattr(item[1], "to_dict") else item, - value.items() - )) - else: - result[attr] = value - - return result - - def to_str(self): - """Returns the string representation of the model""" - return pprint.pformat(self.to_dict()) - - def __repr__(self): - """For `print` and `pprint`""" - return self.to_str() - - def __eq__(self, other): - """Returns true if both objects are equal""" - if not isinstance(other, KubeflowOrgV1SchedulingPolicy): - return False - - return self.to_dict() == other.to_dict() - - def __ne__(self, other): - """Returns true if both objects are not equal""" - if not isinstance(other, KubeflowOrgV1SchedulingPolicy): - return True - - return self.to_dict() != other.to_dict() diff --git a/sdk/python/kubeflow/training/models/kubeflow_org_v1_tf_job.py b/sdk/python/kubeflow/training/models/kubeflow_org_v1_tf_job.py deleted file mode 100644 index b8e4508db5..0000000000 --- a/sdk/python/kubeflow/training/models/kubeflow_org_v1_tf_job.py +++ /dev/null @@ -1,228 +0,0 @@ -# coding: utf-8 - -""" - Kubeflow Training SDK - - Python SDK for Kubeflow Training # noqa: E501 - - The version of the OpenAPI document: v1.7.0 - Generated by: https://openapi-generator.tech -""" - - -import pprint -import re # noqa: F401 - -import six - -from kubeflow.training.configuration import Configuration - - -class KubeflowOrgV1TFJob(object): - """NOTE: This class is auto generated by OpenAPI Generator. - Ref: https://openapi-generator.tech - - Do not edit the class manually. - """ - - """ - Attributes: - openapi_types (dict): The key is attribute name - and the value is attribute type. - attribute_map (dict): The key is attribute name - and the value is json key in definition. - """ - openapi_types = { - 'api_version': 'str', - 'kind': 'str', - 'metadata': 'V1ObjectMeta', - 'spec': 'KubeflowOrgV1TFJobSpec', - 'status': 'KubeflowOrgV1JobStatus' - } - - attribute_map = { - 'api_version': 'apiVersion', - 'kind': 'kind', - 'metadata': 'metadata', - 'spec': 'spec', - 'status': 'status' - } - - def __init__(self, api_version=None, kind=None, metadata=None, spec=None, status=None, local_vars_configuration=None): # noqa: E501 - """KubeflowOrgV1TFJob - a model defined in OpenAPI""" # noqa: E501 - if local_vars_configuration is None: - local_vars_configuration = Configuration() - self.local_vars_configuration = local_vars_configuration - - self._api_version = None - self._kind = None - self._metadata = None - self._spec = None - self._status = None - self.discriminator = None - - if api_version is not None: - self.api_version = api_version - if kind is not None: - self.kind = kind - if metadata is not None: - self.metadata = metadata - if spec is not None: - self.spec = spec - if status is not None: - self.status = status - - @property - def api_version(self): - """Gets the api_version of this KubeflowOrgV1TFJob. # noqa: E501 - - APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources # noqa: E501 - - :return: The api_version of this KubeflowOrgV1TFJob. # noqa: E501 - :rtype: str - """ - return self._api_version - - @api_version.setter - def api_version(self, api_version): - """Sets the api_version of this KubeflowOrgV1TFJob. - - APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources # noqa: E501 - - :param api_version: The api_version of this KubeflowOrgV1TFJob. # noqa: E501 - :type: str - """ - - self._api_version = api_version - - @property - def kind(self): - """Gets the kind of this KubeflowOrgV1TFJob. # noqa: E501 - - Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds # noqa: E501 - - :return: The kind of this KubeflowOrgV1TFJob. # noqa: E501 - :rtype: str - """ - return self._kind - - @kind.setter - def kind(self, kind): - """Sets the kind of this KubeflowOrgV1TFJob. - - Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds # noqa: E501 - - :param kind: The kind of this KubeflowOrgV1TFJob. # noqa: E501 - :type: str - """ - - self._kind = kind - - @property - def metadata(self): - """Gets the metadata of this KubeflowOrgV1TFJob. # noqa: E501 - - - :return: The metadata of this KubeflowOrgV1TFJob. # noqa: E501 - :rtype: V1ObjectMeta - """ - return self._metadata - - @metadata.setter - def metadata(self, metadata): - """Sets the metadata of this KubeflowOrgV1TFJob. - - - :param metadata: The metadata of this KubeflowOrgV1TFJob. # noqa: E501 - :type: V1ObjectMeta - """ - - self._metadata = metadata - - @property - def spec(self): - """Gets the spec of this KubeflowOrgV1TFJob. # noqa: E501 - - - :return: The spec of this KubeflowOrgV1TFJob. # noqa: E501 - :rtype: KubeflowOrgV1TFJobSpec - """ - return self._spec - - @spec.setter - def spec(self, spec): - """Sets the spec of this KubeflowOrgV1TFJob. - - - :param spec: The spec of this KubeflowOrgV1TFJob. # noqa: E501 - :type: KubeflowOrgV1TFJobSpec - """ - - self._spec = spec - - @property - def status(self): - """Gets the status of this KubeflowOrgV1TFJob. # noqa: E501 - - - :return: The status of this KubeflowOrgV1TFJob. # noqa: E501 - :rtype: KubeflowOrgV1JobStatus - """ - return self._status - - @status.setter - def status(self, status): - """Sets the status of this KubeflowOrgV1TFJob. - - - :param status: The status of this KubeflowOrgV1TFJob. # noqa: E501 - :type: KubeflowOrgV1JobStatus - """ - - self._status = status - - def to_dict(self): - """Returns the model properties as a dict""" - result = {} - - for attr, _ in six.iteritems(self.openapi_types): - value = getattr(self, attr) - if isinstance(value, list): - result[attr] = list(map( - lambda x: x.to_dict() if hasattr(x, "to_dict") else x, - value - )) - elif hasattr(value, "to_dict"): - result[attr] = value.to_dict() - elif isinstance(value, dict): - result[attr] = dict(map( - lambda item: (item[0], item[1].to_dict()) - if hasattr(item[1], "to_dict") else item, - value.items() - )) - else: - result[attr] = value - - return result - - def to_str(self): - """Returns the string representation of the model""" - return pprint.pformat(self.to_dict()) - - def __repr__(self): - """For `print` and `pprint`""" - return self.to_str() - - def __eq__(self, other): - """Returns true if both objects are equal""" - if not isinstance(other, KubeflowOrgV1TFJob): - return False - - return self.to_dict() == other.to_dict() - - def __ne__(self, other): - """Returns true if both objects are not equal""" - if not isinstance(other, KubeflowOrgV1TFJob): - return True - - return self.to_dict() != other.to_dict() diff --git a/sdk/python/kubeflow/training/models/kubeflow_org_v1_tf_job_list.py b/sdk/python/kubeflow/training/models/kubeflow_org_v1_tf_job_list.py deleted file mode 100644 index dfaf863b7a..0000000000 --- a/sdk/python/kubeflow/training/models/kubeflow_org_v1_tf_job_list.py +++ /dev/null @@ -1,205 +0,0 @@ -# coding: utf-8 - -""" - Kubeflow Training SDK - - Python SDK for Kubeflow Training # noqa: E501 - - The version of the OpenAPI document: v1.7.0 - Generated by: https://openapi-generator.tech -""" - - -import pprint -import re # noqa: F401 - -import six - -from kubeflow.training.configuration import Configuration - - -class KubeflowOrgV1TFJobList(object): - """NOTE: This class is auto generated by OpenAPI Generator. - Ref: https://openapi-generator.tech - - Do not edit the class manually. - """ - - """ - Attributes: - openapi_types (dict): The key is attribute name - and the value is attribute type. - attribute_map (dict): The key is attribute name - and the value is json key in definition. - """ - openapi_types = { - 'api_version': 'str', - 'items': 'list[KubeflowOrgV1TFJob]', - 'kind': 'str', - 'metadata': 'V1ListMeta' - } - - attribute_map = { - 'api_version': 'apiVersion', - 'items': 'items', - 'kind': 'kind', - 'metadata': 'metadata' - } - - def __init__(self, api_version=None, items=None, kind=None, metadata=None, local_vars_configuration=None): # noqa: E501 - """KubeflowOrgV1TFJobList - a model defined in OpenAPI""" # noqa: E501 - if local_vars_configuration is None: - local_vars_configuration = Configuration() - self.local_vars_configuration = local_vars_configuration - - self._api_version = None - self._items = None - self._kind = None - self._metadata = None - self.discriminator = None - - if api_version is not None: - self.api_version = api_version - self.items = items - if kind is not None: - self.kind = kind - if metadata is not None: - self.metadata = metadata - - @property - def api_version(self): - """Gets the api_version of this KubeflowOrgV1TFJobList. # noqa: E501 - - APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources # noqa: E501 - - :return: The api_version of this KubeflowOrgV1TFJobList. # noqa: E501 - :rtype: str - """ - return self._api_version - - @api_version.setter - def api_version(self, api_version): - """Sets the api_version of this KubeflowOrgV1TFJobList. - - APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources # noqa: E501 - - :param api_version: The api_version of this KubeflowOrgV1TFJobList. # noqa: E501 - :type: str - """ - - self._api_version = api_version - - @property - def items(self): - """Gets the items of this KubeflowOrgV1TFJobList. # noqa: E501 - - List of TFJobs. # noqa: E501 - - :return: The items of this KubeflowOrgV1TFJobList. # noqa: E501 - :rtype: list[KubeflowOrgV1TFJob] - """ - return self._items - - @items.setter - def items(self, items): - """Sets the items of this KubeflowOrgV1TFJobList. - - List of TFJobs. # noqa: E501 - - :param items: The items of this KubeflowOrgV1TFJobList. # noqa: E501 - :type: list[KubeflowOrgV1TFJob] - """ - if self.local_vars_configuration.client_side_validation and items is None: # noqa: E501 - raise ValueError("Invalid value for `items`, must not be `None`") # noqa: E501 - - self._items = items - - @property - def kind(self): - """Gets the kind of this KubeflowOrgV1TFJobList. # noqa: E501 - - Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds # noqa: E501 - - :return: The kind of this KubeflowOrgV1TFJobList. # noqa: E501 - :rtype: str - """ - return self._kind - - @kind.setter - def kind(self, kind): - """Sets the kind of this KubeflowOrgV1TFJobList. - - Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds # noqa: E501 - - :param kind: The kind of this KubeflowOrgV1TFJobList. # noqa: E501 - :type: str - """ - - self._kind = kind - - @property - def metadata(self): - """Gets the metadata of this KubeflowOrgV1TFJobList. # noqa: E501 - - - :return: The metadata of this KubeflowOrgV1TFJobList. # noqa: E501 - :rtype: V1ListMeta - """ - return self._metadata - - @metadata.setter - def metadata(self, metadata): - """Sets the metadata of this KubeflowOrgV1TFJobList. - - - :param metadata: The metadata of this KubeflowOrgV1TFJobList. # noqa: E501 - :type: V1ListMeta - """ - - self._metadata = metadata - - def to_dict(self): - """Returns the model properties as a dict""" - result = {} - - for attr, _ in six.iteritems(self.openapi_types): - value = getattr(self, attr) - if isinstance(value, list): - result[attr] = list(map( - lambda x: x.to_dict() if hasattr(x, "to_dict") else x, - value - )) - elif hasattr(value, "to_dict"): - result[attr] = value.to_dict() - elif isinstance(value, dict): - result[attr] = dict(map( - lambda item: (item[0], item[1].to_dict()) - if hasattr(item[1], "to_dict") else item, - value.items() - )) - else: - result[attr] = value - - return result - - def to_str(self): - """Returns the string representation of the model""" - return pprint.pformat(self.to_dict()) - - def __repr__(self): - """For `print` and `pprint`""" - return self.to_str() - - def __eq__(self, other): - """Returns true if both objects are equal""" - if not isinstance(other, KubeflowOrgV1TFJobList): - return False - - return self.to_dict() == other.to_dict() - - def __ne__(self, other): - """Returns true if both objects are not equal""" - if not isinstance(other, KubeflowOrgV1TFJobList): - return True - - return self.to_dict() != other.to_dict() diff --git a/sdk/python/kubeflow/training/models/kubeflow_org_v1_tf_job_spec.py b/sdk/python/kubeflow/training/models/kubeflow_org_v1_tf_job_spec.py deleted file mode 100644 index 60076a0a86..0000000000 --- a/sdk/python/kubeflow/training/models/kubeflow_org_v1_tf_job_spec.py +++ /dev/null @@ -1,206 +0,0 @@ -# coding: utf-8 - -""" - Kubeflow Training SDK - - Python SDK for Kubeflow Training # noqa: E501 - - The version of the OpenAPI document: v1.7.0 - Generated by: https://openapi-generator.tech -""" - - -import pprint -import re # noqa: F401 - -import six - -from kubeflow.training.configuration import Configuration - - -class KubeflowOrgV1TFJobSpec(object): - """NOTE: This class is auto generated by OpenAPI Generator. - Ref: https://openapi-generator.tech - - Do not edit the class manually. - """ - - """ - Attributes: - openapi_types (dict): The key is attribute name - and the value is attribute type. - attribute_map (dict): The key is attribute name - and the value is json key in definition. - """ - openapi_types = { - 'enable_dynamic_worker': 'bool', - 'run_policy': 'KubeflowOrgV1RunPolicy', - 'success_policy': 'str', - 'tf_replica_specs': 'dict(str, KubeflowOrgV1ReplicaSpec)' - } - - attribute_map = { - 'enable_dynamic_worker': 'enableDynamicWorker', - 'run_policy': 'runPolicy', - 'success_policy': 'successPolicy', - 'tf_replica_specs': 'tfReplicaSpecs' - } - - def __init__(self, enable_dynamic_worker=None, run_policy=None, success_policy=None, tf_replica_specs=None, local_vars_configuration=None): # noqa: E501 - """KubeflowOrgV1TFJobSpec - a model defined in OpenAPI""" # noqa: E501 - if local_vars_configuration is None: - local_vars_configuration = Configuration() - self.local_vars_configuration = local_vars_configuration - - self._enable_dynamic_worker = None - self._run_policy = None - self._success_policy = None - self._tf_replica_specs = None - self.discriminator = None - - if enable_dynamic_worker is not None: - self.enable_dynamic_worker = enable_dynamic_worker - self.run_policy = run_policy - if success_policy is not None: - self.success_policy = success_policy - self.tf_replica_specs = tf_replica_specs - - @property - def enable_dynamic_worker(self): - """Gets the enable_dynamic_worker of this KubeflowOrgV1TFJobSpec. # noqa: E501 - - A switch to enable dynamic worker # noqa: E501 - - :return: The enable_dynamic_worker of this KubeflowOrgV1TFJobSpec. # noqa: E501 - :rtype: bool - """ - return self._enable_dynamic_worker - - @enable_dynamic_worker.setter - def enable_dynamic_worker(self, enable_dynamic_worker): - """Sets the enable_dynamic_worker of this KubeflowOrgV1TFJobSpec. - - A switch to enable dynamic worker # noqa: E501 - - :param enable_dynamic_worker: The enable_dynamic_worker of this KubeflowOrgV1TFJobSpec. # noqa: E501 - :type: bool - """ - - self._enable_dynamic_worker = enable_dynamic_worker - - @property - def run_policy(self): - """Gets the run_policy of this KubeflowOrgV1TFJobSpec. # noqa: E501 - - - :return: The run_policy of this KubeflowOrgV1TFJobSpec. # noqa: E501 - :rtype: KubeflowOrgV1RunPolicy - """ - return self._run_policy - - @run_policy.setter - def run_policy(self, run_policy): - """Sets the run_policy of this KubeflowOrgV1TFJobSpec. - - - :param run_policy: The run_policy of this KubeflowOrgV1TFJobSpec. # noqa: E501 - :type: KubeflowOrgV1RunPolicy - """ - if self.local_vars_configuration.client_side_validation and run_policy is None: # noqa: E501 - raise ValueError("Invalid value for `run_policy`, must not be `None`") # noqa: E501 - - self._run_policy = run_policy - - @property - def success_policy(self): - """Gets the success_policy of this KubeflowOrgV1TFJobSpec. # noqa: E501 - - SuccessPolicy defines the policy to mark the TFJob as succeeded. Default to \"\", using the default rules. # noqa: E501 - - :return: The success_policy of this KubeflowOrgV1TFJobSpec. # noqa: E501 - :rtype: str - """ - return self._success_policy - - @success_policy.setter - def success_policy(self, success_policy): - """Sets the success_policy of this KubeflowOrgV1TFJobSpec. - - SuccessPolicy defines the policy to mark the TFJob as succeeded. Default to \"\", using the default rules. # noqa: E501 - - :param success_policy: The success_policy of this KubeflowOrgV1TFJobSpec. # noqa: E501 - :type: str - """ - - self._success_policy = success_policy - - @property - def tf_replica_specs(self): - """Gets the tf_replica_specs of this KubeflowOrgV1TFJobSpec. # noqa: E501 - - A map of TFReplicaType (type) to ReplicaSpec (value). Specifies the TF cluster configuration. For example, { \"PS\": ReplicaSpec, \"Worker\": ReplicaSpec, } # noqa: E501 - - :return: The tf_replica_specs of this KubeflowOrgV1TFJobSpec. # noqa: E501 - :rtype: dict(str, KubeflowOrgV1ReplicaSpec) - """ - return self._tf_replica_specs - - @tf_replica_specs.setter - def tf_replica_specs(self, tf_replica_specs): - """Sets the tf_replica_specs of this KubeflowOrgV1TFJobSpec. - - A map of TFReplicaType (type) to ReplicaSpec (value). Specifies the TF cluster configuration. For example, { \"PS\": ReplicaSpec, \"Worker\": ReplicaSpec, } # noqa: E501 - - :param tf_replica_specs: The tf_replica_specs of this KubeflowOrgV1TFJobSpec. # noqa: E501 - :type: dict(str, KubeflowOrgV1ReplicaSpec) - """ - if self.local_vars_configuration.client_side_validation and tf_replica_specs is None: # noqa: E501 - raise ValueError("Invalid value for `tf_replica_specs`, must not be `None`") # noqa: E501 - - self._tf_replica_specs = tf_replica_specs - - def to_dict(self): - """Returns the model properties as a dict""" - result = {} - - for attr, _ in six.iteritems(self.openapi_types): - value = getattr(self, attr) - if isinstance(value, list): - result[attr] = list(map( - lambda x: x.to_dict() if hasattr(x, "to_dict") else x, - value - )) - elif hasattr(value, "to_dict"): - result[attr] = value.to_dict() - elif isinstance(value, dict): - result[attr] = dict(map( - lambda item: (item[0], item[1].to_dict()) - if hasattr(item[1], "to_dict") else item, - value.items() - )) - else: - result[attr] = value - - return result - - def to_str(self): - """Returns the string representation of the model""" - return pprint.pformat(self.to_dict()) - - def __repr__(self): - """For `print` and `pprint`""" - return self.to_str() - - def __eq__(self, other): - """Returns true if both objects are equal""" - if not isinstance(other, KubeflowOrgV1TFJobSpec): - return False - - return self.to_dict() == other.to_dict() - - def __ne__(self, other): - """Returns true if both objects are not equal""" - if not isinstance(other, KubeflowOrgV1TFJobSpec): - return True - - return self.to_dict() != other.to_dict() diff --git a/sdk/python/kubeflow/training/models/kubeflow_org_v1_xg_boost_job.py b/sdk/python/kubeflow/training/models/kubeflow_org_v1_xg_boost_job.py deleted file mode 100644 index c644e56cb8..0000000000 --- a/sdk/python/kubeflow/training/models/kubeflow_org_v1_xg_boost_job.py +++ /dev/null @@ -1,228 +0,0 @@ -# coding: utf-8 - -""" - Kubeflow Training SDK - - Python SDK for Kubeflow Training # noqa: E501 - - The version of the OpenAPI document: v1.7.0 - Generated by: https://openapi-generator.tech -""" - - -import pprint -import re # noqa: F401 - -import six - -from kubeflow.training.configuration import Configuration - - -class KubeflowOrgV1XGBoostJob(object): - """NOTE: This class is auto generated by OpenAPI Generator. - Ref: https://openapi-generator.tech - - Do not edit the class manually. - """ - - """ - Attributes: - openapi_types (dict): The key is attribute name - and the value is attribute type. - attribute_map (dict): The key is attribute name - and the value is json key in definition. - """ - openapi_types = { - 'api_version': 'str', - 'kind': 'str', - 'metadata': 'V1ObjectMeta', - 'spec': 'KubeflowOrgV1XGBoostJobSpec', - 'status': 'KubeflowOrgV1JobStatus' - } - - attribute_map = { - 'api_version': 'apiVersion', - 'kind': 'kind', - 'metadata': 'metadata', - 'spec': 'spec', - 'status': 'status' - } - - def __init__(self, api_version=None, kind=None, metadata=None, spec=None, status=None, local_vars_configuration=None): # noqa: E501 - """KubeflowOrgV1XGBoostJob - a model defined in OpenAPI""" # noqa: E501 - if local_vars_configuration is None: - local_vars_configuration = Configuration() - self.local_vars_configuration = local_vars_configuration - - self._api_version = None - self._kind = None - self._metadata = None - self._spec = None - self._status = None - self.discriminator = None - - if api_version is not None: - self.api_version = api_version - if kind is not None: - self.kind = kind - if metadata is not None: - self.metadata = metadata - if spec is not None: - self.spec = spec - if status is not None: - self.status = status - - @property - def api_version(self): - """Gets the api_version of this KubeflowOrgV1XGBoostJob. # noqa: E501 - - APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources # noqa: E501 - - :return: The api_version of this KubeflowOrgV1XGBoostJob. # noqa: E501 - :rtype: str - """ - return self._api_version - - @api_version.setter - def api_version(self, api_version): - """Sets the api_version of this KubeflowOrgV1XGBoostJob. - - APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources # noqa: E501 - - :param api_version: The api_version of this KubeflowOrgV1XGBoostJob. # noqa: E501 - :type: str - """ - - self._api_version = api_version - - @property - def kind(self): - """Gets the kind of this KubeflowOrgV1XGBoostJob. # noqa: E501 - - Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds # noqa: E501 - - :return: The kind of this KubeflowOrgV1XGBoostJob. # noqa: E501 - :rtype: str - """ - return self._kind - - @kind.setter - def kind(self, kind): - """Sets the kind of this KubeflowOrgV1XGBoostJob. - - Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds # noqa: E501 - - :param kind: The kind of this KubeflowOrgV1XGBoostJob. # noqa: E501 - :type: str - """ - - self._kind = kind - - @property - def metadata(self): - """Gets the metadata of this KubeflowOrgV1XGBoostJob. # noqa: E501 - - - :return: The metadata of this KubeflowOrgV1XGBoostJob. # noqa: E501 - :rtype: V1ObjectMeta - """ - return self._metadata - - @metadata.setter - def metadata(self, metadata): - """Sets the metadata of this KubeflowOrgV1XGBoostJob. - - - :param metadata: The metadata of this KubeflowOrgV1XGBoostJob. # noqa: E501 - :type: V1ObjectMeta - """ - - self._metadata = metadata - - @property - def spec(self): - """Gets the spec of this KubeflowOrgV1XGBoostJob. # noqa: E501 - - - :return: The spec of this KubeflowOrgV1XGBoostJob. # noqa: E501 - :rtype: KubeflowOrgV1XGBoostJobSpec - """ - return self._spec - - @spec.setter - def spec(self, spec): - """Sets the spec of this KubeflowOrgV1XGBoostJob. - - - :param spec: The spec of this KubeflowOrgV1XGBoostJob. # noqa: E501 - :type: KubeflowOrgV1XGBoostJobSpec - """ - - self._spec = spec - - @property - def status(self): - """Gets the status of this KubeflowOrgV1XGBoostJob. # noqa: E501 - - - :return: The status of this KubeflowOrgV1XGBoostJob. # noqa: E501 - :rtype: KubeflowOrgV1JobStatus - """ - return self._status - - @status.setter - def status(self, status): - """Sets the status of this KubeflowOrgV1XGBoostJob. - - - :param status: The status of this KubeflowOrgV1XGBoostJob. # noqa: E501 - :type: KubeflowOrgV1JobStatus - """ - - self._status = status - - def to_dict(self): - """Returns the model properties as a dict""" - result = {} - - for attr, _ in six.iteritems(self.openapi_types): - value = getattr(self, attr) - if isinstance(value, list): - result[attr] = list(map( - lambda x: x.to_dict() if hasattr(x, "to_dict") else x, - value - )) - elif hasattr(value, "to_dict"): - result[attr] = value.to_dict() - elif isinstance(value, dict): - result[attr] = dict(map( - lambda item: (item[0], item[1].to_dict()) - if hasattr(item[1], "to_dict") else item, - value.items() - )) - else: - result[attr] = value - - return result - - def to_str(self): - """Returns the string representation of the model""" - return pprint.pformat(self.to_dict()) - - def __repr__(self): - """For `print` and `pprint`""" - return self.to_str() - - def __eq__(self, other): - """Returns true if both objects are equal""" - if not isinstance(other, KubeflowOrgV1XGBoostJob): - return False - - return self.to_dict() == other.to_dict() - - def __ne__(self, other): - """Returns true if both objects are not equal""" - if not isinstance(other, KubeflowOrgV1XGBoostJob): - return True - - return self.to_dict() != other.to_dict() diff --git a/sdk/python/kubeflow/training/models/kubeflow_org_v1_xg_boost_job_list.py b/sdk/python/kubeflow/training/models/kubeflow_org_v1_xg_boost_job_list.py deleted file mode 100644 index 83c017c2cf..0000000000 --- a/sdk/python/kubeflow/training/models/kubeflow_org_v1_xg_boost_job_list.py +++ /dev/null @@ -1,203 +0,0 @@ -# coding: utf-8 - -""" - Kubeflow Training SDK - - Python SDK for Kubeflow Training # noqa: E501 - - The version of the OpenAPI document: v1.7.0 - Generated by: https://openapi-generator.tech -""" - - -import pprint -import re # noqa: F401 - -import six - -from kubeflow.training.configuration import Configuration - - -class KubeflowOrgV1XGBoostJobList(object): - """NOTE: This class is auto generated by OpenAPI Generator. - Ref: https://openapi-generator.tech - - Do not edit the class manually. - """ - - """ - Attributes: - openapi_types (dict): The key is attribute name - and the value is attribute type. - attribute_map (dict): The key is attribute name - and the value is json key in definition. - """ - openapi_types = { - 'api_version': 'str', - 'items': 'list[KubeflowOrgV1XGBoostJob]', - 'kind': 'str', - 'metadata': 'V1ListMeta' - } - - attribute_map = { - 'api_version': 'apiVersion', - 'items': 'items', - 'kind': 'kind', - 'metadata': 'metadata' - } - - def __init__(self, api_version=None, items=None, kind=None, metadata=None, local_vars_configuration=None): # noqa: E501 - """KubeflowOrgV1XGBoostJobList - a model defined in OpenAPI""" # noqa: E501 - if local_vars_configuration is None: - local_vars_configuration = Configuration() - self.local_vars_configuration = local_vars_configuration - - self._api_version = None - self._items = None - self._kind = None - self._metadata = None - self.discriminator = None - - if api_version is not None: - self.api_version = api_version - self.items = items - if kind is not None: - self.kind = kind - if metadata is not None: - self.metadata = metadata - - @property - def api_version(self): - """Gets the api_version of this KubeflowOrgV1XGBoostJobList. # noqa: E501 - - APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources # noqa: E501 - - :return: The api_version of this KubeflowOrgV1XGBoostJobList. # noqa: E501 - :rtype: str - """ - return self._api_version - - @api_version.setter - def api_version(self, api_version): - """Sets the api_version of this KubeflowOrgV1XGBoostJobList. - - APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources # noqa: E501 - - :param api_version: The api_version of this KubeflowOrgV1XGBoostJobList. # noqa: E501 - :type: str - """ - - self._api_version = api_version - - @property - def items(self): - """Gets the items of this KubeflowOrgV1XGBoostJobList. # noqa: E501 - - - :return: The items of this KubeflowOrgV1XGBoostJobList. # noqa: E501 - :rtype: list[KubeflowOrgV1XGBoostJob] - """ - return self._items - - @items.setter - def items(self, items): - """Sets the items of this KubeflowOrgV1XGBoostJobList. - - - :param items: The items of this KubeflowOrgV1XGBoostJobList. # noqa: E501 - :type: list[KubeflowOrgV1XGBoostJob] - """ - if self.local_vars_configuration.client_side_validation and items is None: # noqa: E501 - raise ValueError("Invalid value for `items`, must not be `None`") # noqa: E501 - - self._items = items - - @property - def kind(self): - """Gets the kind of this KubeflowOrgV1XGBoostJobList. # noqa: E501 - - Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds # noqa: E501 - - :return: The kind of this KubeflowOrgV1XGBoostJobList. # noqa: E501 - :rtype: str - """ - return self._kind - - @kind.setter - def kind(self, kind): - """Sets the kind of this KubeflowOrgV1XGBoostJobList. - - Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds # noqa: E501 - - :param kind: The kind of this KubeflowOrgV1XGBoostJobList. # noqa: E501 - :type: str - """ - - self._kind = kind - - @property - def metadata(self): - """Gets the metadata of this KubeflowOrgV1XGBoostJobList. # noqa: E501 - - - :return: The metadata of this KubeflowOrgV1XGBoostJobList. # noqa: E501 - :rtype: V1ListMeta - """ - return self._metadata - - @metadata.setter - def metadata(self, metadata): - """Sets the metadata of this KubeflowOrgV1XGBoostJobList. - - - :param metadata: The metadata of this KubeflowOrgV1XGBoostJobList. # noqa: E501 - :type: V1ListMeta - """ - - self._metadata = metadata - - def to_dict(self): - """Returns the model properties as a dict""" - result = {} - - for attr, _ in six.iteritems(self.openapi_types): - value = getattr(self, attr) - if isinstance(value, list): - result[attr] = list(map( - lambda x: x.to_dict() if hasattr(x, "to_dict") else x, - value - )) - elif hasattr(value, "to_dict"): - result[attr] = value.to_dict() - elif isinstance(value, dict): - result[attr] = dict(map( - lambda item: (item[0], item[1].to_dict()) - if hasattr(item[1], "to_dict") else item, - value.items() - )) - else: - result[attr] = value - - return result - - def to_str(self): - """Returns the string representation of the model""" - return pprint.pformat(self.to_dict()) - - def __repr__(self): - """For `print` and `pprint`""" - return self.to_str() - - def __eq__(self, other): - """Returns true if both objects are equal""" - if not isinstance(other, KubeflowOrgV1XGBoostJobList): - return False - - return self.to_dict() == other.to_dict() - - def __ne__(self, other): - """Returns true if both objects are not equal""" - if not isinstance(other, KubeflowOrgV1XGBoostJobList): - return True - - return self.to_dict() != other.to_dict() diff --git a/sdk/python/kubeflow/training/models/kubeflow_org_v1_xg_boost_job_spec.py b/sdk/python/kubeflow/training/models/kubeflow_org_v1_xg_boost_job_spec.py deleted file mode 100644 index 05e3c2e476..0000000000 --- a/sdk/python/kubeflow/training/models/kubeflow_org_v1_xg_boost_job_spec.py +++ /dev/null @@ -1,148 +0,0 @@ -# coding: utf-8 - -""" - Kubeflow Training SDK - - Python SDK for Kubeflow Training # noqa: E501 - - The version of the OpenAPI document: v1.7.0 - Generated by: https://openapi-generator.tech -""" - - -import pprint -import re # noqa: F401 - -import six - -from kubeflow.training.configuration import Configuration - - -class KubeflowOrgV1XGBoostJobSpec(object): - """NOTE: This class is auto generated by OpenAPI Generator. - Ref: https://openapi-generator.tech - - Do not edit the class manually. - """ - - """ - Attributes: - openapi_types (dict): The key is attribute name - and the value is attribute type. - attribute_map (dict): The key is attribute name - and the value is json key in definition. - """ - openapi_types = { - 'run_policy': 'KubeflowOrgV1RunPolicy', - 'xgb_replica_specs': 'dict(str, KubeflowOrgV1ReplicaSpec)' - } - - attribute_map = { - 'run_policy': 'runPolicy', - 'xgb_replica_specs': 'xgbReplicaSpecs' - } - - def __init__(self, run_policy=None, xgb_replica_specs=None, local_vars_configuration=None): # noqa: E501 - """KubeflowOrgV1XGBoostJobSpec - a model defined in OpenAPI""" # noqa: E501 - if local_vars_configuration is None: - local_vars_configuration = Configuration() - self.local_vars_configuration = local_vars_configuration - - self._run_policy = None - self._xgb_replica_specs = None - self.discriminator = None - - self.run_policy = run_policy - self.xgb_replica_specs = xgb_replica_specs - - @property - def run_policy(self): - """Gets the run_policy of this KubeflowOrgV1XGBoostJobSpec. # noqa: E501 - - - :return: The run_policy of this KubeflowOrgV1XGBoostJobSpec. # noqa: E501 - :rtype: KubeflowOrgV1RunPolicy - """ - return self._run_policy - - @run_policy.setter - def run_policy(self, run_policy): - """Sets the run_policy of this KubeflowOrgV1XGBoostJobSpec. - - - :param run_policy: The run_policy of this KubeflowOrgV1XGBoostJobSpec. # noqa: E501 - :type: KubeflowOrgV1RunPolicy - """ - if self.local_vars_configuration.client_side_validation and run_policy is None: # noqa: E501 - raise ValueError("Invalid value for `run_policy`, must not be `None`") # noqa: E501 - - self._run_policy = run_policy - - @property - def xgb_replica_specs(self): - """Gets the xgb_replica_specs of this KubeflowOrgV1XGBoostJobSpec. # noqa: E501 - - - :return: The xgb_replica_specs of this KubeflowOrgV1XGBoostJobSpec. # noqa: E501 - :rtype: dict(str, KubeflowOrgV1ReplicaSpec) - """ - return self._xgb_replica_specs - - @xgb_replica_specs.setter - def xgb_replica_specs(self, xgb_replica_specs): - """Sets the xgb_replica_specs of this KubeflowOrgV1XGBoostJobSpec. - - - :param xgb_replica_specs: The xgb_replica_specs of this KubeflowOrgV1XGBoostJobSpec. # noqa: E501 - :type: dict(str, KubeflowOrgV1ReplicaSpec) - """ - if self.local_vars_configuration.client_side_validation and xgb_replica_specs is None: # noqa: E501 - raise ValueError("Invalid value for `xgb_replica_specs`, must not be `None`") # noqa: E501 - - self._xgb_replica_specs = xgb_replica_specs - - def to_dict(self): - """Returns the model properties as a dict""" - result = {} - - for attr, _ in six.iteritems(self.openapi_types): - value = getattr(self, attr) - if isinstance(value, list): - result[attr] = list(map( - lambda x: x.to_dict() if hasattr(x, "to_dict") else x, - value - )) - elif hasattr(value, "to_dict"): - result[attr] = value.to_dict() - elif isinstance(value, dict): - result[attr] = dict(map( - lambda item: (item[0], item[1].to_dict()) - if hasattr(item[1], "to_dict") else item, - value.items() - )) - else: - result[attr] = value - - return result - - def to_str(self): - """Returns the string representation of the model""" - return pprint.pformat(self.to_dict()) - - def __repr__(self): - """For `print` and `pprint`""" - return self.to_str() - - def __eq__(self, other): - """Returns true if both objects are equal""" - if not isinstance(other, KubeflowOrgV1XGBoostJobSpec): - return False - - return self.to_dict() == other.to_dict() - - def __ne__(self, other): - """Returns true if both objects are not equal""" - if not isinstance(other, KubeflowOrgV1XGBoostJobSpec): - return True - - return self.to_dict() != other.to_dict() diff --git a/sdk/python/kubeflow/training/rest.py b/sdk/python/kubeflow/training/rest.py deleted file mode 100644 index 65e51c9c43..0000000000 --- a/sdk/python/kubeflow/training/rest.py +++ /dev/null @@ -1,291 +0,0 @@ -# coding: utf-8 - -""" - Kubeflow Training SDK - - Python SDK for Kubeflow Training # noqa: E501 - - The version of the OpenAPI document: v1.7.0 - Generated by: https://openapi-generator.tech -""" - - -from __future__ import absolute_import - -import io -import json -import logging -import re -import ssl - -import certifi -# python 2 and python 3 compatibility library -import six -from six.moves.urllib.parse import urlencode -import urllib3 - -from kubeflow.training.exceptions import ApiException, ApiValueError - - -logger = logging.getLogger(__name__) - - -class RESTResponse(io.IOBase): - - def __init__(self, resp): - self.urllib3_response = resp - self.status = resp.status - self.reason = resp.reason - self.data = resp.data - - def getheaders(self): - """Returns a dictionary of the response headers.""" - return self.urllib3_response.getheaders() - - def getheader(self, name, default=None): - """Returns a given response header.""" - return self.urllib3_response.getheader(name, default) - - -class RESTClientObject(object): - - def __init__(self, configuration, pools_size=4, maxsize=None): - # urllib3.PoolManager will pass all kw parameters to connectionpool - # https://github.com/shazow/urllib3/blob/f9409436f83aeb79fbaf090181cd81b784f1b8ce/urllib3/poolmanager.py#L75 # noqa: E501 - # https://github.com/shazow/urllib3/blob/f9409436f83aeb79fbaf090181cd81b784f1b8ce/urllib3/connectionpool.py#L680 # noqa: E501 - # maxsize is the number of requests to host that are allowed in parallel # noqa: E501 - # Custom SSL certificates and client certificates: http://urllib3.readthedocs.io/en/latest/advanced-usage.html # noqa: E501 - - # cert_reqs - if configuration.verify_ssl: - cert_reqs = ssl.CERT_REQUIRED - else: - cert_reqs = ssl.CERT_NONE - - # ca_certs - if configuration.ssl_ca_cert: - ca_certs = configuration.ssl_ca_cert - else: - # if not set certificate file, use Mozilla's root certificates. - ca_certs = certifi.where() - - addition_pool_args = {} - if configuration.assert_hostname is not None: - addition_pool_args['assert_hostname'] = configuration.assert_hostname # noqa: E501 - - if configuration.retries is not None: - addition_pool_args['retries'] = configuration.retries - - if maxsize is None: - if configuration.connection_pool_maxsize is not None: - maxsize = configuration.connection_pool_maxsize - else: - maxsize = 4 - - # https pool manager - if configuration.proxy: - self.pool_manager = urllib3.ProxyManager( - num_pools=pools_size, - maxsize=maxsize, - cert_reqs=cert_reqs, - ca_certs=ca_certs, - cert_file=configuration.cert_file, - key_file=configuration.key_file, - proxy_url=configuration.proxy, - proxy_headers=configuration.proxy_headers, - **addition_pool_args - ) - else: - self.pool_manager = urllib3.PoolManager( - num_pools=pools_size, - maxsize=maxsize, - cert_reqs=cert_reqs, - ca_certs=ca_certs, - cert_file=configuration.cert_file, - key_file=configuration.key_file, - **addition_pool_args - ) - - def request(self, method, url, query_params=None, headers=None, - body=None, post_params=None, _preload_content=True, - _request_timeout=None): - """Perform requests. - - :param method: http request method - :param url: http request url - :param query_params: query parameters in the url - :param headers: http request headers - :param body: request json body, for `application/json` - :param post_params: request post parameters, - `application/x-www-form-urlencoded` - and `multipart/form-data` - :param _preload_content: if False, the urllib3.HTTPResponse object will - be returned without reading/decoding response - data. Default is True. - :param _request_timeout: timeout setting for this request. If one - number provided, it will be total request - timeout. It can also be a pair (tuple) of - (connection, read) timeouts. - """ - method = method.upper() - assert method in ['GET', 'HEAD', 'DELETE', 'POST', 'PUT', - 'PATCH', 'OPTIONS'] - - if post_params and body: - raise ApiValueError( - "body parameter cannot be used with post_params parameter." - ) - - post_params = post_params or {} - headers = headers or {} - - timeout = None - if _request_timeout: - if isinstance(_request_timeout, (int, ) if six.PY3 else (int, long)): # noqa: E501,F821 - timeout = urllib3.Timeout(total=_request_timeout) - elif (isinstance(_request_timeout, tuple) and - len(_request_timeout) == 2): - timeout = urllib3.Timeout( - connect=_request_timeout[0], read=_request_timeout[1]) - - if 'Content-Type' not in headers: - headers['Content-Type'] = 'application/json' - - try: - # For `POST`, `PUT`, `PATCH`, `OPTIONS`, `DELETE` - if method in ['POST', 'PUT', 'PATCH', 'OPTIONS', 'DELETE']: - if query_params: - url += '?' + urlencode(query_params) - if re.search('json', headers['Content-Type'], re.IGNORECASE): - request_body = None - if body is not None: - request_body = json.dumps(body) - r = self.pool_manager.request( - method, url, - body=request_body, - preload_content=_preload_content, - timeout=timeout, - headers=headers) - elif headers['Content-Type'] == 'application/x-www-form-urlencoded': # noqa: E501 - r = self.pool_manager.request( - method, url, - fields=post_params, - encode_multipart=False, - preload_content=_preload_content, - timeout=timeout, - headers=headers) - elif headers['Content-Type'] == 'multipart/form-data': - # must del headers['Content-Type'], or the correct - # Content-Type which generated by urllib3 will be - # overwritten. - del headers['Content-Type'] - r = self.pool_manager.request( - method, url, - fields=post_params, - encode_multipart=True, - preload_content=_preload_content, - timeout=timeout, - headers=headers) - # Pass a `string` parameter directly in the body to support - # other content types than Json when `body` argument is - # provided in serialized form - elif isinstance(body, str) or isinstance(body, bytes): - request_body = body - r = self.pool_manager.request( - method, url, - body=request_body, - preload_content=_preload_content, - timeout=timeout, - headers=headers) - else: - # Cannot generate the request from given parameters - msg = """Cannot prepare a request message for provided - arguments. Please check that your arguments match - declared content type.""" - raise ApiException(status=0, reason=msg) - # For `GET`, `HEAD` - else: - r = self.pool_manager.request(method, url, - fields=query_params, - preload_content=_preload_content, - timeout=timeout, - headers=headers) - except urllib3.exceptions.SSLError as e: - msg = "{0}\n{1}".format(type(e).__name__, str(e)) - raise ApiException(status=0, reason=msg) - - if _preload_content: - r = RESTResponse(r) - - # log response body - logger.debug("response body: %s", r.data) - - if not 200 <= r.status <= 299: - raise ApiException(http_resp=r) - - return r - - def GET(self, url, headers=None, query_params=None, _preload_content=True, - _request_timeout=None): - return self.request("GET", url, - headers=headers, - _preload_content=_preload_content, - _request_timeout=_request_timeout, - query_params=query_params) - - def HEAD(self, url, headers=None, query_params=None, _preload_content=True, - _request_timeout=None): - return self.request("HEAD", url, - headers=headers, - _preload_content=_preload_content, - _request_timeout=_request_timeout, - query_params=query_params) - - def OPTIONS(self, url, headers=None, query_params=None, post_params=None, - body=None, _preload_content=True, _request_timeout=None): - return self.request("OPTIONS", url, - headers=headers, - query_params=query_params, - post_params=post_params, - _preload_content=_preload_content, - _request_timeout=_request_timeout, - body=body) - - def DELETE(self, url, headers=None, query_params=None, body=None, - _preload_content=True, _request_timeout=None): - return self.request("DELETE", url, - headers=headers, - query_params=query_params, - _preload_content=_preload_content, - _request_timeout=_request_timeout, - body=body) - - def POST(self, url, headers=None, query_params=None, post_params=None, - body=None, _preload_content=True, _request_timeout=None): - return self.request("POST", url, - headers=headers, - query_params=query_params, - post_params=post_params, - _preload_content=_preload_content, - _request_timeout=_request_timeout, - body=body) - - def PUT(self, url, headers=None, query_params=None, post_params=None, - body=None, _preload_content=True, _request_timeout=None): - return self.request("PUT", url, - headers=headers, - query_params=query_params, - post_params=post_params, - _preload_content=_preload_content, - _request_timeout=_request_timeout, - body=body) - - def PATCH(self, url, headers=None, query_params=None, post_params=None, - body=None, _preload_content=True, _request_timeout=None): - return self.request("PATCH", url, - headers=headers, - query_params=query_params, - post_params=post_params, - _preload_content=_preload_content, - _request_timeout=_request_timeout, - body=body) diff --git a/sdk/python/kubeflow/training/utils/__init__.py b/sdk/python/kubeflow/training/utils/__init__.py deleted file mode 100644 index ede60a09ab..0000000000 --- a/sdk/python/kubeflow/training/utils/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright 2019 kubeflow.org. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/sdk/python/kubeflow/training/utils/utils.py b/sdk/python/kubeflow/training/utils/utils.py deleted file mode 100644 index 5389f10baf..0000000000 --- a/sdk/python/kubeflow/training/utils/utils.py +++ /dev/null @@ -1,430 +0,0 @@ -# Copyright 2021 kubeflow.org. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import inspect -import json -import logging -import os -import queue -import textwrap -import threading -from datetime import datetime -from typing import Any, Callable, Dict, List, Optional, Tuple, Union - -from kubeflow.training import models -from kubeflow.training.constants import constants -from kubernetes import config - -logger = logging.getLogger(__name__) - - -class StatusLogger: - """Logger to print Training Job statuses.""" - - def __init__(self, header, column_format): - self.header = header - self.column_format = column_format - self.first_call = True - - def __call__(self, *values): - if self.first_call: - logger.debug(self.header) - self.first_call = False - logger.debug(self.column_format.format(*values)) - - -class FakeResponse: - """Fake object of RESTResponse to deserialize - Ref) https://github.com/kubeflow/katib/pull/1630#discussion_r697877815 - Ref) https://github.com/kubernetes-client/python/issues/977#issuecomment-592030030 - """ - - def __init__(self, obj): - self.data = json.dumps(obj) - - -class SetEncoder(json.JSONEncoder): - def default(self, obj): - if isinstance(obj, set): - return list(obj) - if isinstance(obj, type): - return obj.__name__ - return json.JSONEncoder.default(self, obj) - - -def is_running_in_k8s(): - return os.path.isdir("/var/run/secrets/kubernetes.io/") - - -def get_default_target_namespace(): - if not is_running_in_k8s(): - try: - _, current_context = config.list_kube_config_contexts() - return current_context["context"]["namespace"] - except Exception: - return constants.DEFAULT_NAMESPACE - with open("/var/run/secrets/kubernetes.io/serviceaccount/namespace", "r") as f: - return f.readline() - - -def wrap_log_stream(q, stream): - while True: - try: - logline = next(stream) - q.put(logline) - except StopIteration: - q.put(None) - return - - -def get_log_queue_pool(streams): - pool = [] - for stream in streams: - q = queue.Queue(maxsize=100) - pool.append(q) - threading.Thread(target=wrap_log_stream, args=(q, stream)).start() - return pool - - -def has_condition(conditions: List[models.V1JobCondition], condition_type: str) -> bool: - """ - Verify if the condition list has the required condition. - Condition should be valid object with `type` and `status`. - """ - - for c in conditions: - if c.type == condition_type and c.status == constants.CONDITION_STATUS_TRUE: - return True - return False - - -def get_script_for_python_packages( - packages_to_install: List[str], pip_index_url: str -) -> str: - """ - Get init script to install Python packages from the given pip index URL. - """ - packages_str = " ".join([str(package) for package in packages_to_install]) - - script_for_python_packages = textwrap.dedent( - f""" - if ! [ -x "$(command -v pip)" ]; then - python -m ensurepip || python -m ensurepip --user || apt-get install python-pip - fi - - PIP_DISABLE_PIP_VERSION_CHECK=1 python -m pip install --quiet \ - --no-warn-script-location --index-url {pip_index_url} {packages_str} - """ - ) - - return script_for_python_packages - - -def get_command_using_train_func( - train_func: Callable, - entrypoint: str, - train_func_parameters: Optional[Dict[str, Any]] = None, - packages_to_install: Optional[List[str]] = None, - pip_index_url: str = constants.DEFAULT_PIP_INDEX_URL, -) -> Tuple[List[str], List[str]]: - """ - Get container args and command from the given training function and parameters. - """ - # Check if function is callable. - if not callable(train_func): - raise ValueError( - f"Training function must be callable, got function type: {type(train_func)}" - ) - - # Extract function implementation. - func_code = inspect.getsource(train_func) - - # Function might be defined in some indented scope (e.g. in another function). - # We need to dedent the function code. - func_code = textwrap.dedent(func_code) - - # Wrap function code to execute it from the file. For example: - # def train(parameters): - # print('Start Training...') - # train({'lr': 0.01}) - if train_func_parameters is None: - func_code = f"{func_code}\n{train_func.__name__}()\n" - else: - func_code = f"{func_code}\n{train_func.__name__}({train_func_parameters})\n" - - # Prepare execute script template. - exec_script = textwrap.dedent( - """ - program_path=$(mktemp -d) - read -r -d '' SCRIPT << EOM\n - {func_code} - EOM - printf "%s" \"$SCRIPT\" > \"$program_path/ephemeral_script.py\" - {entrypoint} \"$program_path/ephemeral_script.py\"""" - ) - - # Add function code to the execute script. - exec_script = exec_script.format(func_code=func_code, entrypoint=entrypoint) - - # Install Python packages if that is required. - if packages_to_install is not None: - exec_script = ( - get_script_for_python_packages(packages_to_install, pip_index_url) - + exec_script - ) - - # Return container command and args to execute training function. - return constants.DEFAULT_COMMAND, [exec_script] - - -def get_container_spec( - name: str, - base_image: str, - command: Optional[List[str]] = None, - args: Optional[List[str]] = None, - resources: Union[dict, models.V1ResourceRequirements, None] = None, - volume_mounts: Optional[List[models.V1VolumeMount]] = None, - env_vars: Optional[ - Union[Dict[str, str], List[Union[models.V1EnvVar, models.V1EnvVar]]] - ] = None, -) -> models.V1Container: - """ - Get container spec for the given parameters. - """ - - if name is None or base_image is None: - raise ValueError("Container name or base image cannot be none") - - # Handle env_vars as either a dict or a list - if env_vars: - if isinstance(env_vars, dict): - env_vars = [models.V1EnvVar(name=k, value=v) for k, v in env_vars.items()] - elif isinstance(env_vars, list): - env_vars = [ - v if isinstance(v, models.V1EnvVar) else models.V1EnvVar(**v) - for v in env_vars - ] - - # Create initial container spec. - container_spec = models.V1Container( - name=name, - image=base_image, - command=command, - args=args, - volume_mounts=volume_mounts, - env=env_vars, - ) - - # Convert dict to the Kubernetes container resources if that is required. - if isinstance(resources, dict): - # Convert all keys in resources to lowercase. - resources = {k.lower(): v for k, v in resources.items()} - if "gpu" in resources: - resources["nvidia.com/gpu"] = resources.pop("gpu") - - resources = models.V1ResourceRequirements( - requests=resources, - limits=resources, - ) - - # Add resources to the container spec. - container_spec.resources = resources - - return container_spec - - -def get_pod_template_spec( - containers: List[models.V1Container], - init_containers: Optional[List[models.V1Container]] = None, - volumes: Optional[List[models.V1Volume]] = None, -) -> models.V1PodTemplateSpec: - """ - Get Pod template spec for the given parameters. - """ - - # Create Pod template spec. If the value is None, Pod doesn't have that parameter - pod_template_spec = models.V1PodTemplateSpec( - metadata=models.V1ObjectMeta( - annotations={constants.ISTIO_SIDECAR_INJECTION: "false"} - ), - spec=models.V1PodSpec( - init_containers=init_containers, - containers=containers, - volumes=volumes, - ), - ) - - return pod_template_spec - - -def get_tfjob_template( - name: str, - namespace: str, - pod_template_spec: models.V1PodTemplateSpec, - num_workers: int, - num_chief_replicas: Optional[int] = None, - num_ps_replicas: Optional[int] = None, -): - - # Create TFJob template. - tfjob = models.KubeflowOrgV1TFJob( - api_version=constants.API_VERSION, - kind=constants.TFJOB_KIND, - metadata=models.V1ObjectMeta(name=name, namespace=namespace), - spec=models.KubeflowOrgV1TFJobSpec( - run_policy=models.KubeflowOrgV1RunPolicy(clean_pod_policy=None), - tf_replica_specs={}, - ), - ) - - # Add Chief, PS, and Worker replicas to the TFJob. - if num_chief_replicas is not None: - tfjob.spec.tf_replica_specs[constants.REPLICA_TYPE_CHIEF] = ( - models.KubeflowOrgV1ReplicaSpec( - replicas=num_chief_replicas, - template=pod_template_spec, - ) - ) - - if num_ps_replicas is not None: - tfjob.spec.tf_replica_specs[constants.REPLICA_TYPE_PS] = ( - models.KubeflowOrgV1ReplicaSpec( - replicas=num_ps_replicas, - template=pod_template_spec, - ) - ) - - if num_workers is not None: - tfjob.spec.tf_replica_specs[constants.REPLICA_TYPE_WORKER] = ( - models.KubeflowOrgV1ReplicaSpec( - replicas=num_workers, - template=pod_template_spec, - ) - ) - - return tfjob - - -def get_pytorchjob_template( - name: str, - namespace: str, - num_workers: int, - worker_pod_template_spec: Optional[models.V1PodTemplateSpec], - master_pod_template_spec: Optional[models.V1PodTemplateSpec] = None, - num_procs_per_worker: Optional[Union[int, str]] = None, -): - - # Create PyTorchJob template. - pytorchjob = models.KubeflowOrgV1PyTorchJob( - api_version=constants.API_VERSION, - kind=constants.PYTORCHJOB_KIND, - metadata=models.V1ObjectMeta(name=name, namespace=namespace), - spec=models.KubeflowOrgV1PyTorchJobSpec( - run_policy=models.KubeflowOrgV1RunPolicy(clean_pod_policy=None), - pytorch_replica_specs={}, - ), - ) - - if num_procs_per_worker: - pytorchjob.spec.nproc_per_node = str(num_procs_per_worker) - - # Create Master replica if that is set. - if master_pod_template_spec: - pytorchjob.spec.pytorch_replica_specs[constants.REPLICA_TYPE_MASTER] = ( - models.KubeflowOrgV1ReplicaSpec( - replicas=1, - template=master_pod_template_spec, - ) - ) - # If we don't define Master template, use the Worker template. - else: - pytorchjob.spec.pytorch_replica_specs[constants.REPLICA_TYPE_MASTER] = ( - models.KubeflowOrgV1ReplicaSpec( - replicas=1, - template=worker_pod_template_spec, - ) - ) - - # Create Worker with num_workers - 1 replicas. - # TODO (andreyvelich): Investigate if we can run PyTorchJob without the Master - # Currently, if Master is not set, Training Operator controller - # doesn't set RANK and WORLD_SIZE for PyTorchJob. - # Ref issue: https://github.com/kubeflow/training-operator/issues/1991 - if num_workers > 1: - pytorchjob.spec.pytorch_replica_specs[constants.REPLICA_TYPE_WORKER] = ( - models.KubeflowOrgV1ReplicaSpec( - replicas=num_workers - 1, - template=worker_pod_template_spec, - ) - ) - - return pytorchjob - - -def get_pvc_spec( - pvc_name: str, - namespace: str, - storage_config: Dict[str, Optional[Union[str, List[str]]]], -): - if pvc_name is None or namespace is None: - raise ValueError("One of the required storage config argument is None") - - if "size" not in storage_config: - storage_config["size"] = constants.PVC_DEFAULT_SIZE - - if "access_modes" not in storage_config: - storage_config["access_modes"] = constants.PVC_DEFAULT_ACCESS_MODES - - pvc_spec = models.V1PersistentVolumeClaim( - api_version="v1", - kind="PersistentVolumeClaim", - metadata={"name": pvc_name, "namespace": namespace}, - spec=models.V1PersistentVolumeClaimSpec( - access_modes=storage_config["access_modes"], - resources=models.V1ResourceRequirements( - requests={"storage": storage_config["size"]} - ), - ), - ) - - if "storage_class" in storage_config: - pvc_spec.spec.storage_class_name = storage_config["storage_class"] - - return pvc_spec - - -def add_event_to_dict( - events_dict: Dict[str, List[str]], - event: models.CoreV1Event, - object_kind: str, - object_name: str, - object_creation_timestamp: datetime, -): - """Add Kubernetes event to the dict with this format: - ``` - {"/": " "} - ``` - """ - if ( - event.involved_object.kind == object_kind - and event.involved_object.name == object_name - and event.metadata.creation_timestamp >= object_creation_timestamp - ): - event_key = f"{object_kind.lower()}/{object_name}" - event_time = event.metadata.creation_timestamp.strftime("%Y-%m-%d %H:%M:%S") - event_msg = f"{event_time} {event.message}" - if event_key not in events_dict: - events_dict[event_key] = [event_msg] - else: - events_dict[event_key] += [event_msg] diff --git a/sdk/python/setup.py b/sdk/python/setup.py deleted file mode 100644 index 14295d72de..0000000000 --- a/sdk/python/setup.py +++ /dev/null @@ -1,69 +0,0 @@ -# Copyright 2019 kubeflow.org. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import setuptools - -TESTS_REQUIRES = [ - "pytest", - "pytest-tornasync", - "mypy", - "black==24.3.0", - "flake8==4.0.1", -] - -REQUIRES = [ - "certifi>=14.05.14", - "six>=1.10", - "setuptools>=21.0.0", - "urllib3>=1.15.1", - "kubernetes>=27.2.0", - "retrying>=1.3.3", -] - -setuptools.setup( - name="kubeflow-training", - version="1.8.1", - author="Kubeflow Authors", - author_email="hejinchi@cn.ibm.com", - license="Apache License Version 2.0", - url="https://github.com/kubeflow/training-operator/tree/master/sdk/python", - description="Training Operator Python SDK", - long_description="Training Operator Python SDK", - packages=setuptools.find_packages(include=("kubeflow*")), - package_data={}, - include_package_data=False, - zip_safe=False, - classifiers=[ - "Intended Audience :: Developers", - "Intended Audience :: Education", - "Intended Audience :: Science/Research", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "License :: OSI Approved :: Apache Software License", - "Operating System :: OS Independent", - "Topic :: Scientific/Engineering", - "Topic :: Scientific/Engineering :: Artificial Intelligence", - "Topic :: Software Development", - "Topic :: Software Development :: Libraries", - "Topic :: Software Development :: Libraries :: Python Modules", - ], - install_requires=REQUIRES, - tests_require=TESTS_REQUIRES, - extras_require={ - "test": TESTS_REQUIRES, - "huggingface": ["transformers==4.38.0", "peft==0.3.0"], - }, -) diff --git a/sdk/python/test/__init__.py b/sdk/python/test/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/sdk/python/test/conftest.py b/sdk/python/test/conftest.py deleted file mode 100644 index 756906f27f..0000000000 --- a/sdk/python/test/conftest.py +++ /dev/null @@ -1,10 +0,0 @@ -import pytest - - -def pytest_addoption(parser): - parser.addoption("--namespace", action="store", default="default") - - -@pytest.fixture -def job_namespace(request): - return request.config.getoption("--namespace") diff --git a/sdk/python/test/e2e-fine-tune-llm/test_e2e_pytorch_fine_tune_llm.py b/sdk/python/test/e2e-fine-tune-llm/test_e2e_pytorch_fine_tune_llm.py deleted file mode 100644 index 9d6f1c48bf..0000000000 --- a/sdk/python/test/e2e-fine-tune-llm/test_e2e_pytorch_fine_tune_llm.py +++ /dev/null @@ -1,96 +0,0 @@ -# Copyright 2024 kubeflow.org. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import logging - -import transformers -from kubeflow.storage_initializer.hugging_face import ( - HuggingFaceDatasetParams, - HuggingFaceModelParams, - HuggingFaceTrainerParams, -) -from kubeflow.training import TrainingClient, constants -from peft import LoraConfig - -import test.e2e.utils as utils - -logging.basicConfig(format="%(message)s") -logging.getLogger("kubeflow.training.api.training_client").setLevel(logging.DEBUG) - -TRAINING_CLIENT = TrainingClient(job_kind=constants.PYTORCHJOB_KIND) - - -def test_sdk_e2e_create_from_train_api(job_namespace="default"): - JOB_NAME = "pytorchjob-from-train-api" - - # Use test case from fine-tuning API tutorial. - # https://www.kubeflow.org/docs/components/training/user-guides/fine-tuning/ - TRAINING_CLIENT.train( - name=JOB_NAME, - namespace=job_namespace, - # BERT model URI and type of Transformer to train it. - model_provider_parameters=HuggingFaceModelParams( - model_uri="hf://google-bert/bert-base-cased", - transformer_type=transformers.AutoModelForSequenceClassification, - num_labels=5, - ), - # In order to save test time, use 8 samples from Yelp dataset. - dataset_provider_parameters=HuggingFaceDatasetParams( - repo_id="yelp_review_full", - split="train[:8]", - ), - # Specify HuggingFace Trainer parameters. - trainer_parameters=HuggingFaceTrainerParams( - training_parameters=transformers.TrainingArguments( - output_dir="test_trainer", - save_strategy="no", - evaluation_strategy="no", - do_eval=False, - disable_tqdm=True, - log_level="info", - num_train_epochs=1, - ), - # Set LoRA config to reduce number of trainable parameters. - lora_config=LoraConfig( - r=8, - lora_alpha=8, - lora_dropout=0.1, - bias="none", - ), - ), - num_workers=1, - num_procs_per_worker=1, - resources_per_worker={ - "gpu": 0, - "cpu": 2, - "memory": "10G", - }, - storage_config={ - "size": "10Gi", - "access_modes": ["ReadWriteOnce"], - }, - ) - - logging.info(f"List of created {TRAINING_CLIENT.job_kind}s") - logging.info(TRAINING_CLIENT.list_jobs(job_namespace)) - - try: - utils.verify_job_e2e(TRAINING_CLIENT, JOB_NAME, job_namespace, wait_timeout=900) - except Exception as e: - utils.print_job_results(TRAINING_CLIENT, JOB_NAME, job_namespace) - TRAINING_CLIENT.delete_job(JOB_NAME, job_namespace) - raise Exception(f"PyTorchJob create from API E2E fails. Exception: {e}") - - utils.print_job_results(TRAINING_CLIENT, JOB_NAME, job_namespace) - TRAINING_CLIENT.delete_job(JOB_NAME, job_namespace) diff --git a/sdk/python/test/e2e/__init__.py b/sdk/python/test/e2e/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/sdk/python/test/e2e/constants.py b/sdk/python/test/e2e/constants.py deleted file mode 100644 index 04be27836f..0000000000 --- a/sdk/python/test/e2e/constants.py +++ /dev/null @@ -1,26 +0,0 @@ -# Copyright 2023 kubeflow.org. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -TEST_GANG_SCHEDULER_NAME_ENV_KEY = "GANG_SCHEDULER_NAME" -TEST_GANG_SCHEDULER_NAME_SCHEDULER_PLUGINS = "scheduler-plugins" -TEST_GANG_SCHEDULER_NAME_VOLCANO = "volcano" -TEST_GANG_SCHEDULER_NAME_NONE = "none" - -GANG_SCHEDULERS = { - TEST_GANG_SCHEDULER_NAME_SCHEDULER_PLUGINS, - TEST_GANG_SCHEDULER_NAME_VOLCANO, -} -NONE_GANG_SCHEDULERS = {TEST_GANG_SCHEDULER_NAME_NONE, ""} - -DEFAULT_SCHEDULER_PLUGINS_NAME = "scheduler-plugins-scheduler" diff --git a/sdk/python/test/e2e/test_e2e_jaxjob.py b/sdk/python/test/e2e/test_e2e_jaxjob.py deleted file mode 100644 index 7471f67338..0000000000 --- a/sdk/python/test/e2e/test_e2e_jaxjob.py +++ /dev/null @@ -1,160 +0,0 @@ -# Copyright 2024 kubeflow.org. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import logging -import pytest -from typing import Optional - -from kubernetes.client import V1PodTemplateSpec -from kubernetes.client import V1ObjectMeta -from kubernetes.client import V1PodSpec -from kubernetes.client import V1Container -from kubernetes.client import V1ResourceRequirements - -from kubeflow.training import TrainingClient -from kubeflow.training import KubeflowOrgV1ReplicaSpec -from kubeflow.training import KubeflowOrgV1JAXJob -from kubeflow.training import KubeflowOrgV1JAXJobSpec -from kubeflow.training import KubeflowOrgV1RunPolicy -from kubeflow.training import KubeflowOrgV1SchedulingPolicy -from kubeflow.training.constants import constants - -import test.e2e.utils as utils -from test.e2e.constants import TEST_GANG_SCHEDULER_NAME_ENV_KEY -from test.e2e.constants import GANG_SCHEDULERS, NONE_GANG_SCHEDULERS - -logging.basicConfig(format="%(message)s") -logging.getLogger("kubeflow.training.api.training_client").setLevel(logging.DEBUG) - -TRAINING_CLIENT = TrainingClient(job_kind=constants.JAXJOB_KIND) -JOB_NAME = "jaxjob-cpu-ci-test" -CONTAINER_NAME = "jax" -GANG_SCHEDULER_NAME = os.getenv(TEST_GANG_SCHEDULER_NAME_ENV_KEY, "") - - -@pytest.mark.skipif( - GANG_SCHEDULER_NAME in NONE_GANG_SCHEDULERS, - reason="For gang-scheduling", -) -def test_sdk_e2e_with_gang_scheduling(job_namespace): - container = generate_container() - - worker = KubeflowOrgV1ReplicaSpec( - replicas=2, - restart_policy="OnFailure", - template=V1PodTemplateSpec( - metadata=V1ObjectMeta( - annotations={constants.ISTIO_SIDECAR_INJECTION: "false"} - ), - spec=V1PodSpec( - scheduler_name=utils.get_pod_spec_scheduler_name(GANG_SCHEDULER_NAME), - containers=[container], - ), - ), - ) - - unschedulable_jaxjob = generate_jaxjob( - job_namespace, worker, KubeflowOrgV1SchedulingPolicy(min_available=10) - ) - schedulable_jaxjob = generate_jaxjob( - job_namespace, worker, KubeflowOrgV1SchedulingPolicy(min_available=2) - ) - - TRAINING_CLIENT.create_job(job=unschedulable_jaxjob, namespace=job_namespace) - logging.info(f"List of created {TRAINING_CLIENT.job_kind}s") - logging.info(TRAINING_CLIENT.list_jobs(job_namespace)) - - try: - utils.verify_unschedulable_job_e2e(TRAINING_CLIENT, JOB_NAME, job_namespace) - except Exception as e: - utils.print_job_results(TRAINING_CLIENT, JOB_NAME, job_namespace) - TRAINING_CLIENT.delete_job(JOB_NAME, job_namespace) - raise Exception(f"JAXJob E2E fails. Exception: {e}") - - TRAINING_CLIENT.update_job(schedulable_jaxjob, JOB_NAME, job_namespace) - logging.info(f"List of updated {TRAINING_CLIENT.job_kind}s") - logging.info(TRAINING_CLIENT.list_jobs(job_namespace)) - - try: - utils.verify_job_e2e(TRAINING_CLIENT, JOB_NAME, job_namespace, wait_timeout=900) - except Exception as e: - utils.print_job_results(TRAINING_CLIENT, JOB_NAME, job_namespace) - TRAINING_CLIENT.delete_job(JOB_NAME, job_namespace) - raise Exception(f"JAXJob E2E fails. Exception: {e}") - - utils.print_job_results(TRAINING_CLIENT, JOB_NAME, job_namespace) - TRAINING_CLIENT.delete_job(JOB_NAME, job_namespace) - - -@pytest.mark.skipif( - GANG_SCHEDULER_NAME in GANG_SCHEDULERS, - reason="For plain scheduling", -) -def test_sdk_e2e(job_namespace): - container = generate_container() - - worker = KubeflowOrgV1ReplicaSpec( - replicas=2, - restart_policy="OnFailure", - template=V1PodTemplateSpec( - metadata=V1ObjectMeta( - annotations={constants.ISTIO_SIDECAR_INJECTION: "false"} - ), - spec=V1PodSpec(containers=[container]), - ), - ) - - jaxjob = generate_jaxjob(job_namespace, worker) - - TRAINING_CLIENT.create_job(job=jaxjob, namespace=job_namespace) - logging.info(f"List of created {TRAINING_CLIENT.job_kind}s") - logging.info(TRAINING_CLIENT.list_jobs(job_namespace)) - - try: - utils.verify_job_e2e(TRAINING_CLIENT, JOB_NAME, job_namespace, wait_timeout=900) - except Exception as e: - utils.print_job_results(TRAINING_CLIENT, JOB_NAME, job_namespace) - TRAINING_CLIENT.delete_job(JOB_NAME, job_namespace) - raise Exception(f"JAXJob E2E fails. Exception: {e}") - - utils.print_job_results(TRAINING_CLIENT, JOB_NAME, job_namespace) - TRAINING_CLIENT.delete_job(JOB_NAME, job_namespace) - - -def generate_jaxjob( - job_namespace: str, - worker: KubeflowOrgV1ReplicaSpec, - scheduling_policy: Optional[KubeflowOrgV1SchedulingPolicy] = None, -) -> KubeflowOrgV1JAXJob: - return KubeflowOrgV1JAXJob( - api_version=constants.API_VERSION, - kind=constants.JAXJOB_KIND, - metadata=V1ObjectMeta(name=JOB_NAME, namespace=job_namespace), - spec=KubeflowOrgV1JAXJobSpec( - run_policy=KubeflowOrgV1RunPolicy( - scheduling_policy=scheduling_policy, - clean_pod_policy="None", - ), - jax_replica_specs={"Worker": worker}, - ), - ) - - -def generate_container() -> V1Container: - return V1Container( - name=CONTAINER_NAME, - image=os.getenv("JAX_JOB_IMAGE", "docker.io/kubeflow/jaxjob-dist-spmd-mnist:latest"), - resources=V1ResourceRequirements(limits={"memory": "3Gi", "cpu": "1.2"}), - ) diff --git a/sdk/python/test/e2e/test_e2e_mpijob.py b/sdk/python/test/e2e/test_e2e_mpijob.py deleted file mode 100644 index cee0e136ae..0000000000 --- a/sdk/python/test/e2e/test_e2e_mpijob.py +++ /dev/null @@ -1,219 +0,0 @@ -# Copyright 2021 kubeflow.org. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import logging -import pytest -from typing import Tuple, Optional - -from kubernetes.client import V1PodTemplateSpec -from kubernetes.client import V1ObjectMeta -from kubernetes.client import V1PodSpec -from kubernetes.client import V1Container -from kubernetes.client import V1ResourceRequirements - -from kubeflow.training import TrainingClient -from kubeflow.training import KubeflowOrgV1ReplicaSpec -from kubeflow.training import KubeflowOrgV1MPIJob -from kubeflow.training import KubeflowOrgV1MPIJobSpec -from kubeflow.training import KubeflowOrgV1RunPolicy -from kubeflow.training import KubeflowOrgV1SchedulingPolicy -from kubeflow.training.constants import constants - -import test.e2e.utils as utils -from test.e2e.constants import TEST_GANG_SCHEDULER_NAME_ENV_KEY -from test.e2e.constants import GANG_SCHEDULERS, NONE_GANG_SCHEDULERS - -logging.basicConfig(format="%(message)s") -logging.getLogger("kubeflow.training.api.training_client").setLevel(logging.DEBUG) - -TRAINING_CLIENT = TrainingClient(job_kind=constants.MPIJOB_KIND) -JOB_NAME = "mpijob-pytorch-ci-test" -CONTAINER_NAME = "mpi" -GANG_SCHEDULER_NAME = os.getenv(TEST_GANG_SCHEDULER_NAME_ENV_KEY, "") - - -@pytest.mark.skipif( - GANG_SCHEDULER_NAME in NONE_GANG_SCHEDULERS, - reason="For gang-scheduling", -) -def test_sdk_e2e_with_gang_scheduling(job_namespace): - launcher_container, worker_container = generate_containers() - - launcher = KubeflowOrgV1ReplicaSpec( - replicas=1, - restart_policy="Never", - template=V1PodTemplateSpec( - metadata=V1ObjectMeta( - annotations={constants.ISTIO_SIDECAR_INJECTION: "false"} - ), - spec=V1PodSpec( - containers=[launcher_container], - scheduler_name=utils.get_pod_spec_scheduler_name(GANG_SCHEDULER_NAME), - ), - ), - ) - - worker = KubeflowOrgV1ReplicaSpec( - replicas=1, - restart_policy="Never", - template=V1PodTemplateSpec( - metadata=V1ObjectMeta( - annotations={constants.ISTIO_SIDECAR_INJECTION: "false"} - ), - spec=V1PodSpec( - containers=[worker_container], - scheduler_name=utils.get_pod_spec_scheduler_name(GANG_SCHEDULER_NAME), - ), - ), - ) - - mpijob = generate_mpijob( - job_namespace, launcher, worker, KubeflowOrgV1SchedulingPolicy(min_available=10) - ) - patched_mpijob = generate_mpijob( - job_namespace, launcher, worker, KubeflowOrgV1SchedulingPolicy(min_available=2) - ) - - TRAINING_CLIENT.create_job(job=mpijob, namespace=job_namespace) - logging.info(f"List of created {TRAINING_CLIENT.job_kind}s") - logging.info(TRAINING_CLIENT.list_jobs(job_namespace)) - - try: - utils.verify_unschedulable_job_e2e(TRAINING_CLIENT, JOB_NAME, job_namespace) - except Exception as e: - utils.print_job_results(TRAINING_CLIENT, JOB_NAME, job_namespace) - TRAINING_CLIENT.delete_job(JOB_NAME, job_namespace) - raise Exception(f"MPIJob E2E fails. Exception: {e}") - - TRAINING_CLIENT.update_job(patched_mpijob, JOB_NAME, job_namespace) - logging.info(f"List of updated {TRAINING_CLIENT.job_kind}s") - logging.info(TRAINING_CLIENT.list_jobs(job_namespace)) - - try: - utils.verify_job_e2e(TRAINING_CLIENT, JOB_NAME, job_namespace, wait_timeout=900) - except Exception as e: - utils.print_job_results(TRAINING_CLIENT, JOB_NAME, job_namespace) - TRAINING_CLIENT.delete_job(JOB_NAME, job_namespace) - raise Exception(f"MPIJob E2E fails. Exception: {e}") - - utils.print_job_results(TRAINING_CLIENT, JOB_NAME, job_namespace) - TRAINING_CLIENT.delete_job(JOB_NAME, job_namespace) - - -@pytest.mark.skipif( - GANG_SCHEDULER_NAME in GANG_SCHEDULERS, - reason="For plain scheduling", -) -def test_sdk_e2e(job_namespace): - launcher_container, worker_container = generate_containers() - - launcher = KubeflowOrgV1ReplicaSpec( - replicas=1, - restart_policy="Never", - template=V1PodTemplateSpec( - metadata=V1ObjectMeta( - annotations={constants.ISTIO_SIDECAR_INJECTION: "false"} - ), - spec=V1PodSpec(containers=[launcher_container]), - ), - ) - - worker = KubeflowOrgV1ReplicaSpec( - replicas=1, - restart_policy="Never", - template=V1PodTemplateSpec( - metadata=V1ObjectMeta( - annotations={constants.ISTIO_SIDECAR_INJECTION: "false"} - ), - spec=V1PodSpec(containers=[worker_container]), - ), - ) - - mpijob = generate_mpijob(job_namespace, launcher, worker) - - TRAINING_CLIENT.create_job(job=mpijob, namespace=job_namespace) - logging.info(f"List of created {TRAINING_CLIENT.job_kind}s") - logging.info(TRAINING_CLIENT.list_jobs(job_namespace)) - - try: - utils.verify_job_e2e(TRAINING_CLIENT, JOB_NAME, job_namespace, wait_timeout=900) - except Exception as e: - utils.print_job_results(TRAINING_CLIENT, JOB_NAME, job_namespace) - TRAINING_CLIENT.delete_job(JOB_NAME, job_namespace) - raise Exception(f"MPIJob E2E fails. Exception: {e}") - - utils.print_job_results(TRAINING_CLIENT, JOB_NAME, job_namespace) - TRAINING_CLIENT.delete_job(JOB_NAME, job_namespace) - - -def generate_mpijob( - job_namespace: str, - launcher: KubeflowOrgV1ReplicaSpec, - worker: KubeflowOrgV1ReplicaSpec, - scheduling_policy: Optional[KubeflowOrgV1SchedulingPolicy] = None, -) -> KubeflowOrgV1MPIJob: - return KubeflowOrgV1MPIJob( - api_version=constants.API_VERSION, - kind=constants.MPIJOB_KIND, - metadata=V1ObjectMeta(name=JOB_NAME, namespace=job_namespace), - spec=KubeflowOrgV1MPIJobSpec( - slots_per_worker=1, - run_policy=KubeflowOrgV1RunPolicy( - clean_pod_policy="None", - scheduling_policy=scheduling_policy, - ), - mpi_replica_specs={"Launcher": launcher, "Worker": worker}, - ), - ) - - -def generate_containers() -> Tuple[V1Container, V1Container]: - launcher_container = V1Container( - name=CONTAINER_NAME, - image="horovod/horovod:0.28.1", - command=["mpirun"], - args=[ - "-np", - "1", - "--allow-run-as-root", - "-bind-to", - "none", - "-map-by", - "slot", - "-x", - "LD_LIBRARY_PATH", - "-x", - "PATH", - "-mca", - "pml", - "ob1", - "-mca", - "btl", - "^openib", - "python", - "/horovod/examples/pytorch/pytorch_mnist.py", - "--epochs", - "1", - ], - resources=V1ResourceRequirements(limits={"memory": "1Gi", "cpu": "0.4"}), - ) - - worker_container = V1Container( - name=CONTAINER_NAME, - image="horovod/horovod:0.28.1", - resources=V1ResourceRequirements(limits={"memory": "3Gi", "cpu": "1.2"}), - ) - - return launcher_container, worker_container diff --git a/sdk/python/test/e2e/test_e2e_paddlejob.py b/sdk/python/test/e2e/test_e2e_paddlejob.py deleted file mode 100644 index 5ebb894744..0000000000 --- a/sdk/python/test/e2e/test_e2e_paddlejob.py +++ /dev/null @@ -1,162 +0,0 @@ -# Copyright 2022 kubeflow.org. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import logging -import pytest -from typing import Optional - -from kubernetes.client import V1PodTemplateSpec -from kubernetes.client import V1ObjectMeta -from kubernetes.client import V1PodSpec -from kubernetes.client import V1Container -from kubernetes.client import V1ResourceRequirements - -from kubeflow.training import TrainingClient -from kubeflow.training import KubeflowOrgV1ReplicaSpec -from kubeflow.training import KubeflowOrgV1PaddleJob -from kubeflow.training import KubeflowOrgV1PaddleJobSpec -from kubeflow.training import KubeflowOrgV1RunPolicy -from kubeflow.training import KubeflowOrgV1SchedulingPolicy -from kubeflow.training.constants import constants - -import test.e2e.utils as utils -from test.e2e.constants import TEST_GANG_SCHEDULER_NAME_ENV_KEY -from test.e2e.constants import GANG_SCHEDULERS, NONE_GANG_SCHEDULERS - -logging.basicConfig(format="%(message)s") -logging.getLogger("kubeflow.training.api.training_client").setLevel(logging.DEBUG) - -TRAINING_CLIENT = TrainingClient(job_kind=constants.PADDLEJOB_KIND) -JOB_NAME = "paddlejob-cpu-ci-test" -CONTAINER_NAME = "paddle" -GANG_SCHEDULER_NAME = os.getenv(TEST_GANG_SCHEDULER_NAME_ENV_KEY, "") - - -@pytest.mark.skipif( - GANG_SCHEDULER_NAME in NONE_GANG_SCHEDULERS, - reason="For gang-scheduling", -) -def test_sdk_e2e_with_gang_scheduling(job_namespace): - container = generate_container() - - worker = KubeflowOrgV1ReplicaSpec( - replicas=2, - restart_policy="OnFailure", - template=V1PodTemplateSpec( - metadata=V1ObjectMeta( - annotations={constants.ISTIO_SIDECAR_INJECTION: "false"} - ), - spec=V1PodSpec( - scheduler_name=utils.get_pod_spec_scheduler_name(GANG_SCHEDULER_NAME), - containers=[container], - ), - ), - ) - - unschedulable_paddlejob = generate_paddlejob( - job_namespace, worker, KubeflowOrgV1SchedulingPolicy(min_available=10) - ) - schedulable_paddlejob = generate_paddlejob( - job_namespace, worker, KubeflowOrgV1SchedulingPolicy(min_available=2) - ) - - TRAINING_CLIENT.create_job(job=unschedulable_paddlejob, namespace=job_namespace) - logging.info(f"List of created {TRAINING_CLIENT.job_kind}s") - logging.info(TRAINING_CLIENT.list_jobs(job_namespace)) - - try: - utils.verify_unschedulable_job_e2e(TRAINING_CLIENT, JOB_NAME, job_namespace) - except Exception as e: - utils.print_job_results(TRAINING_CLIENT, JOB_NAME, job_namespace) - TRAINING_CLIENT.delete_job(JOB_NAME, job_namespace) - raise Exception(f"PaddleJob E2E fails. Exception: {e}") - - TRAINING_CLIENT.update_job(schedulable_paddlejob, JOB_NAME, job_namespace) - logging.info(f"List of updated {TRAINING_CLIENT.job_kind}s") - logging.info(TRAINING_CLIENT.list_jobs(job_namespace)) - - try: - utils.verify_job_e2e(TRAINING_CLIENT, JOB_NAME, job_namespace, wait_timeout=900) - except Exception as e: - utils.print_job_results(TRAINING_CLIENT, JOB_NAME, job_namespace) - TRAINING_CLIENT.delete_job(JOB_NAME, job_namespace) - raise Exception(f"PaddleJob E2E fails. Exception: {e}") - - utils.print_job_results(TRAINING_CLIENT, JOB_NAME, job_namespace) - TRAINING_CLIENT.delete_job(JOB_NAME, job_namespace) - - -@pytest.mark.skipif( - GANG_SCHEDULER_NAME in GANG_SCHEDULERS, - reason="For plain scheduling", -) -def test_sdk_e2e(job_namespace): - container = generate_container() - - worker = KubeflowOrgV1ReplicaSpec( - replicas=2, - restart_policy="OnFailure", - template=V1PodTemplateSpec( - metadata=V1ObjectMeta( - annotations={constants.ISTIO_SIDECAR_INJECTION: "false"} - ), - spec=V1PodSpec(containers=[container]), - ), - ) - - paddlejob = generate_paddlejob(job_namespace, worker) - - TRAINING_CLIENT.create_job(job=paddlejob, namespace=job_namespace) - logging.info(f"List of created {TRAINING_CLIENT.job_kind}s") - logging.info(TRAINING_CLIENT.list_jobs(job_namespace)) - - try: - utils.verify_job_e2e(TRAINING_CLIENT, JOB_NAME, job_namespace, wait_timeout=900) - except Exception as e: - utils.print_job_results(TRAINING_CLIENT, JOB_NAME, job_namespace) - TRAINING_CLIENT.delete_job(JOB_NAME, job_namespace) - raise Exception(f"PaddleJob E2E fails. Exception: {e}") - - utils.print_job_results(TRAINING_CLIENT, JOB_NAME, job_namespace) - TRAINING_CLIENT.delete_job(JOB_NAME, job_namespace) - - -def generate_paddlejob( - job_namespace: str, - worker: KubeflowOrgV1ReplicaSpec, - scheduling_policy: Optional[KubeflowOrgV1SchedulingPolicy] = None, -) -> KubeflowOrgV1PaddleJob: - return KubeflowOrgV1PaddleJob( - api_version=constants.API_VERSION, - kind=constants.PADDLEJOB_KIND, - metadata=V1ObjectMeta(name=JOB_NAME, namespace=job_namespace), - spec=KubeflowOrgV1PaddleJobSpec( - run_policy=KubeflowOrgV1RunPolicy( - scheduling_policy=scheduling_policy, - clean_pod_policy="None", - ), - paddle_replica_specs={"Worker": worker}, - ), - ) - - -def generate_container() -> V1Container: - return V1Container( - name=CONTAINER_NAME, - image="docker.io/paddlepaddle/paddle:2.4.0rc0-cpu", - command=["python"], - args=["-m", "paddle.distributed.launch", "run_check"], - resources=V1ResourceRequirements(limits={"memory": "2Gi", "cpu": "0.8"}), - ) diff --git a/sdk/python/test/e2e/test_e2e_pytorchjob.py b/sdk/python/test/e2e/test_e2e_pytorchjob.py deleted file mode 100644 index 5800c0f76f..0000000000 --- a/sdk/python/test/e2e/test_e2e_pytorchjob.py +++ /dev/null @@ -1,348 +0,0 @@ -# Copyright 2021 kubeflow.org. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import logging -import pytest -from typing import Optional - -from kubernetes.client import V1PodTemplateSpec -from kubernetes.client import V1ObjectMeta -from kubernetes.client import V1PodSpec -from kubernetes.client import V1Container -from kubernetes.client import V1ResourceRequirements - -from kubeflow.training import TrainingClient -from kubeflow.training import KubeflowOrgV1ReplicaSpec -from kubeflow.training import KubeflowOrgV1PyTorchJob -from kubeflow.training import KubeflowOrgV1PyTorchJobSpec -from kubeflow.training import KubeflowOrgV1RunPolicy -from kubeflow.training import KubeflowOrgV1SchedulingPolicy -from kubeflow.training import constants - -import test.e2e.utils as utils -from test.e2e.constants import TEST_GANG_SCHEDULER_NAME_ENV_KEY -from test.e2e.constants import GANG_SCHEDULERS, NONE_GANG_SCHEDULERS - -logging.basicConfig(format="%(message)s") -logging.getLogger("kubeflow.training.api.training_client").setLevel(logging.DEBUG) - -TRAINING_CLIENT = TrainingClient(job_kind=constants.PYTORCHJOB_KIND) -CONTAINER_NAME = "pytorch" -GANG_SCHEDULER_NAME = os.getenv(TEST_GANG_SCHEDULER_NAME_ENV_KEY, "") - - -@pytest.mark.skipif( - GANG_SCHEDULER_NAME in NONE_GANG_SCHEDULERS, - reason="For gang-scheduling", -) -def test_sdk_e2e_with_gang_scheduling(job_namespace): - JOB_NAME = "pytorchjob-gang-scheduling" - container = generate_container() - - master = KubeflowOrgV1ReplicaSpec( - replicas=1, - restart_policy="OnFailure", - template=V1PodTemplateSpec( - metadata=V1ObjectMeta( - annotations={constants.ISTIO_SIDECAR_INJECTION: "false"} - ), - spec=V1PodSpec( - scheduler_name=utils.get_pod_spec_scheduler_name(GANG_SCHEDULER_NAME), - containers=[container], - ), - ), - ) - - worker = KubeflowOrgV1ReplicaSpec( - replicas=1, - restart_policy="OnFailure", - template=V1PodTemplateSpec( - metadata=V1ObjectMeta( - annotations={constants.ISTIO_SIDECAR_INJECTION: "false"} - ), - spec=V1PodSpec( - scheduler_name=utils.get_pod_spec_scheduler_name(GANG_SCHEDULER_NAME), - containers=[container], - ), - ), - ) - - unschedulable_pytorchjob = generate_pytorchjob( - job_namespace, - JOB_NAME, - master, - worker, - KubeflowOrgV1SchedulingPolicy(min_available=10), - ) - schedulable_pytorchjob = generate_pytorchjob( - job_namespace, - JOB_NAME, - master, - worker, - KubeflowOrgV1SchedulingPolicy(min_available=2), - ) - - TRAINING_CLIENT.create_job(job=unschedulable_pytorchjob, namespace=job_namespace) - logging.info(f"List of created {TRAINING_CLIENT.job_kind}s") - logging.info(TRAINING_CLIENT.list_jobs(job_namespace)) - - try: - utils.verify_unschedulable_job_e2e(TRAINING_CLIENT, JOB_NAME, job_namespace) - except Exception as e: - utils.print_job_results(TRAINING_CLIENT, JOB_NAME, job_namespace) - TRAINING_CLIENT.delete_job(JOB_NAME, job_namespace) - raise Exception(f"PyTorchJob E2E fails. Exception: {e}") - - TRAINING_CLIENT.update_job(schedulable_pytorchjob, JOB_NAME, job_namespace) - logging.info(f"List of updated {TRAINING_CLIENT.job_kind}s") - logging.info(TRAINING_CLIENT.list_jobs(job_namespace)) - - try: - utils.verify_job_e2e(TRAINING_CLIENT, JOB_NAME, job_namespace, wait_timeout=900) - except Exception as e: - utils.print_job_results(TRAINING_CLIENT, JOB_NAME, job_namespace) - TRAINING_CLIENT.delete_job(JOB_NAME, job_namespace) - raise Exception(f"PyTorchJob E2E fails. Exception: {e}") - - utils.print_job_results(TRAINING_CLIENT, JOB_NAME, job_namespace) - TRAINING_CLIENT.delete_job(JOB_NAME, job_namespace) - - -@pytest.mark.skipif( - GANG_SCHEDULER_NAME in GANG_SCHEDULERS, - reason="For plain scheduling", -) -def test_sdk_e2e(job_namespace): - JOB_NAME = "pytorchjob-e2e" - container = generate_container() - - master = KubeflowOrgV1ReplicaSpec( - replicas=1, - restart_policy="OnFailure", - template=V1PodTemplateSpec( - metadata=V1ObjectMeta( - annotations={constants.ISTIO_SIDECAR_INJECTION: "false"} - ), - spec=V1PodSpec(containers=[container]), - ), - ) - - worker = KubeflowOrgV1ReplicaSpec( - replicas=1, - restart_policy="OnFailure", - template=V1PodTemplateSpec( - metadata=V1ObjectMeta( - annotations={constants.ISTIO_SIDECAR_INJECTION: "false"} - ), - spec=V1PodSpec(containers=[container]), - ), - ) - - pytorchjob = generate_pytorchjob(job_namespace, JOB_NAME, master, worker) - - TRAINING_CLIENT.create_job(job=pytorchjob, namespace=job_namespace) - logging.info(f"List of created {TRAINING_CLIENT.job_kind}s") - logging.info(TRAINING_CLIENT.list_jobs(job_namespace)) - - try: - utils.verify_job_e2e(TRAINING_CLIENT, JOB_NAME, job_namespace, wait_timeout=900) - except Exception as e: - utils.print_job_results(TRAINING_CLIENT, JOB_NAME, job_namespace) - TRAINING_CLIENT.delete_job(JOB_NAME, job_namespace) - raise Exception(f"PyTorchJob E2E fails. Exception: {e}") - - utils.print_job_results(TRAINING_CLIENT, JOB_NAME, job_namespace) - TRAINING_CLIENT.delete_job(JOB_NAME, job_namespace) - - -@pytest.mark.skipif( - GANG_SCHEDULER_NAME in GANG_SCHEDULERS, - reason="For plain scheduling", -) -def test_sdk_e2e_managed_by(job_namespace): - JOB_NAME = "pytorchjob-e2e" - container = generate_container() - - master = KubeflowOrgV1ReplicaSpec( - replicas=1, - restart_policy="OnFailure", - template=V1PodTemplateSpec( - metadata=V1ObjectMeta( - annotations={constants.ISTIO_SIDECAR_INJECTION: "false"} - ), - spec=V1PodSpec(containers=[container]), - ), - ) - - worker = KubeflowOrgV1ReplicaSpec( - replicas=1, - restart_policy="OnFailure", - template=V1PodTemplateSpec( - metadata=V1ObjectMeta( - annotations={constants.ISTIO_SIDECAR_INJECTION: "false"} - ), - spec=V1PodSpec(containers=[container]), - ), - ) - - #1. Job created with default value: 'kubeflow.org/training-operator' - job created and status updated - #2. Job created with kueue value: 'kueue.x-k8s.io/multikueue' - job created but status not updated - #3. Job created with invalid value (not acceptable by the webhook) - job not created - controllers = { - JOB_NAME+"-default-controller": 'kubeflow.org/training-operator', - JOB_NAME+"-multikueue-controller": 'kueue.x-k8s.io/multikueue', - JOB_NAME+"-invalid-controller": 'kueue.x-k8s.io/other-controller', - } - for job_name, managed_by in controllers.items(): - pytorchjob = generate_pytorchjob(job_namespace, job_name, master, worker, managed_by=managed_by) - try: - TRAINING_CLIENT.create_job(job=pytorchjob, namespace=job_namespace) - except Exception as e: - if "invalid" in str(job_name): - error_message = f"Failed to create PyTorchJob: {job_namespace}/{job_name}" - assert error_message in str(e), f"Unexpected error: {e}" - else: - raise Exception(f"PyTorchJob E2E fails. Exception: {e}") - - logging.info(f"List of created {TRAINING_CLIENT.job_kind}s") - jobs = TRAINING_CLIENT.list_jobs(job_namespace) - logging.info(jobs) - - try: - #Only jobs with valid controllers should be created, 2 out of 3 satisfy this condition: 'kubeflow.org/training-operator' and 'kueue.x-k8s.io/multikueue' - if len(jobs) != 2: - raise Exception(f"Too many PyTorchJobs created {jobs}") - - for job in jobs: - if job._metadata.name == 'kubeflow.org/training-operator': - utils.verify_job_e2e(TRAINING_CLIENT, job._metadata.name, job_namespace, wait_timeout=900) - if job._metadata.name == 'kueue.x-k8s.io/multikueue': - conditions = TRAINING_CLIENT.get_job_conditions(job._metadata.name, job_namespace, TRAINING_CLIENT.job_kind, job) - if len(conditions) != 0: - raise Exception(f"{TRAINING_CLIENT.job_kind} conditions {conditions} should not be updated, externally managed by {managed_by}") - - - except Exception as e: - utils.print_job_results(TRAINING_CLIENT, JOB_NAME, job_namespace) - TRAINING_CLIENT.delete_job(JOB_NAME, job_namespace) - raise Exception(f"PyTorchJob E2E fails. Exception: {e}") - - for job in jobs: - utils.print_job_results(TRAINING_CLIENT, job._metadata.name, job_namespace) - TRAINING_CLIENT.delete_job(job._metadata.name, job_namespace) - -@pytest.mark.skipif( - GANG_SCHEDULER_NAME in GANG_SCHEDULERS, - reason="For plain scheduling", -) -def test_sdk_e2e_create_from_func(job_namespace): - JOB_NAME = "pytorchjob-from-func" - - def train_func(): - import time - - for i in range(10): - print(f"Start training for Epoch {i}") - time.sleep(1) - - num_workers = 3 - - TRAINING_CLIENT.create_job( - name=JOB_NAME, - namespace=job_namespace, - train_func=train_func, - num_workers=num_workers, - ) - - logging.info(f"List of created {TRAINING_CLIENT.job_kind}s") - logging.info(TRAINING_CLIENT.list_jobs(job_namespace)) - - try: - utils.verify_job_e2e(TRAINING_CLIENT, JOB_NAME, job_namespace, wait_timeout=900) - except Exception as e: - utils.print_job_results(TRAINING_CLIENT, JOB_NAME, job_namespace) - TRAINING_CLIENT.delete_job(JOB_NAME, job_namespace) - raise Exception(f"PyTorchJob create from function E2E fails. Exception: {e}") - - # Verify that PyTorchJob has correct pods. - pod_names = TRAINING_CLIENT.get_job_pod_names( - name=JOB_NAME, namespace=job_namespace - ) - - if len(pod_names) != num_workers or f"{JOB_NAME}-worker-0" not in pod_names: - raise Exception(f"PyTorchJob has incorrect pods: {pod_names}") - - utils.print_job_results(TRAINING_CLIENT, JOB_NAME, job_namespace) - TRAINING_CLIENT.delete_job(JOB_NAME, job_namespace) - - -@pytest.mark.skipif( - GANG_SCHEDULER_NAME in GANG_SCHEDULERS, - reason="For plain scheduling", -) -def test_sdk_e2e_create_from_image(job_namespace): - JOB_NAME = "pytorchjob-from-image" - - TRAINING_CLIENT.create_job( - name=JOB_NAME, - namespace=job_namespace, - base_image="docker.io/hello-world", - num_workers=1, - ) - - logging.info(f"List of created {TRAINING_CLIENT.job_kind}s") - logging.info(TRAINING_CLIENT.list_jobs(job_namespace)) - - try: - utils.verify_job_e2e(TRAINING_CLIENT, JOB_NAME, job_namespace, wait_timeout=900) - except Exception as e: - utils.print_job_results(TRAINING_CLIENT, JOB_NAME, job_namespace) - TRAINING_CLIENT.delete_job(JOB_NAME, job_namespace) - raise Exception(f"PyTorchJob create from function E2E fails. Exception: {e}") - - utils.print_job_results(TRAINING_CLIENT, JOB_NAME, job_namespace) - TRAINING_CLIENT.delete_job(JOB_NAME, job_namespace) - - -def generate_pytorchjob( - job_namespace: str, - job_name: str, - master: KubeflowOrgV1ReplicaSpec, - worker: KubeflowOrgV1ReplicaSpec, - scheduling_policy: Optional[KubeflowOrgV1SchedulingPolicy] = None, - managed_by: Optional[str] = None, -) -> KubeflowOrgV1PyTorchJob: - return KubeflowOrgV1PyTorchJob( - api_version=constants.API_VERSION, - kind=constants.PYTORCHJOB_KIND, - metadata=V1ObjectMeta(name=job_name, namespace=job_namespace), - spec=KubeflowOrgV1PyTorchJobSpec( - run_policy=KubeflowOrgV1RunPolicy( - clean_pod_policy="None", - scheduling_policy=scheduling_policy, - managed_by=managed_by, - ), - pytorch_replica_specs={"Master": master, "Worker": worker}, - ), - ) - - -def generate_container() -> V1Container: - return V1Container( - name=CONTAINER_NAME, - image="kubeflow/pytorch-dist-mnist:latest", - args=["--backend", "gloo", "--epochs", "1"], - resources=V1ResourceRequirements(limits={"memory": "2Gi", "cpu": "0.8"}), - ) diff --git a/sdk/python/test/e2e/test_e2e_tfjob.py b/sdk/python/test/e2e/test_e2e_tfjob.py deleted file mode 100644 index b955074bc3..0000000000 --- a/sdk/python/test/e2e/test_e2e_tfjob.py +++ /dev/null @@ -1,168 +0,0 @@ -# Copyright 2021 kubeflow.org. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import logging -import pytest -from typing import Optional - -from kubernetes.client import V1PodTemplateSpec -from kubernetes.client import V1ObjectMeta -from kubernetes.client import V1PodSpec -from kubernetes.client import V1Container -from kubernetes.client import V1ResourceRequirements - -from kubeflow.training import TrainingClient -from kubeflow.training import KubeflowOrgV1ReplicaSpec -from kubeflow.training import KubeflowOrgV1RunPolicy -from kubeflow.training import KubeflowOrgV1TFJob -from kubeflow.training import KubeflowOrgV1TFJobSpec -from kubeflow.training import KubeflowOrgV1SchedulingPolicy -from kubeflow.training.constants import constants - -import test.e2e.utils as utils -from test.e2e.constants import TEST_GANG_SCHEDULER_NAME_ENV_KEY -from test.e2e.constants import GANG_SCHEDULERS, NONE_GANG_SCHEDULERS - -logging.basicConfig(format="%(message)s") -logging.getLogger("kubeflow.training.api.training_client").setLevel(logging.DEBUG) - - -TRAINING_CLIENT = TrainingClient(job_kind=constants.TFJOB_KIND) -JOB_NAME = "tfjob-mnist-ci-test" -CONTAINER_NAME = "tensorflow" -GANG_SCHEDULER_NAME = os.getenv(TEST_GANG_SCHEDULER_NAME_ENV_KEY, "") - - -@pytest.mark.skipif( - GANG_SCHEDULER_NAME in NONE_GANG_SCHEDULERS, - reason="For gang-scheduling", -) -def test_sdk_e2e_with_gang_scheduling(job_namespace): - container = generate_container() - - worker = KubeflowOrgV1ReplicaSpec( - replicas=1, - restart_policy="Never", - template=V1PodTemplateSpec( - metadata=V1ObjectMeta( - annotations={constants.ISTIO_SIDECAR_INJECTION: "false"} - ), - spec=V1PodSpec( - containers=[container], - scheduler_name=utils.get_pod_spec_scheduler_name(GANG_SCHEDULER_NAME), - ), - ), - ) - - unschedulable_tfjob = generate_tfjob( - job_namespace, worker, KubeflowOrgV1SchedulingPolicy(min_available=10) - ) - schedulable_tfjob = generate_tfjob( - job_namespace, worker, KubeflowOrgV1SchedulingPolicy(min_available=1) - ) - - TRAINING_CLIENT.create_job(job=unschedulable_tfjob, namespace=job_namespace) - logging.info(f"List of created {TRAINING_CLIENT.job_kind}s") - logging.info(TRAINING_CLIENT.list_jobs(job_namespace)) - - try: - utils.verify_unschedulable_job_e2e(TRAINING_CLIENT, JOB_NAME, job_namespace) - except Exception as e: - utils.print_job_results(TRAINING_CLIENT, JOB_NAME, job_namespace) - TRAINING_CLIENT.delete_job(JOB_NAME, job_namespace) - raise Exception(f"TFJob E2E fails. Exception: {e}") - - TRAINING_CLIENT.update_job(schedulable_tfjob, JOB_NAME, job_namespace) - logging.info(f"List of updated {TRAINING_CLIENT.job_kind}s") - logging.info(TRAINING_CLIENT.list_jobs(job_namespace)) - - try: - utils.verify_job_e2e(TRAINING_CLIENT, JOB_NAME, job_namespace, wait_timeout=900) - except Exception as e: - utils.print_job_results(TRAINING_CLIENT, JOB_NAME, job_namespace) - TRAINING_CLIENT.delete_job(JOB_NAME, job_namespace) - raise Exception(f"TFJob E2E fails. Exception: {e}") - - utils.print_job_results(TRAINING_CLIENT, JOB_NAME, job_namespace) - TRAINING_CLIENT.delete_job(JOB_NAME, job_namespace) - - -@pytest.mark.skipif( - GANG_SCHEDULER_NAME in GANG_SCHEDULERS, - reason="For plain scheduling", -) -def test_sdk_e2e(job_namespace): - container = generate_container() - - worker = KubeflowOrgV1ReplicaSpec( - replicas=1, - restart_policy="Never", - template=V1PodTemplateSpec( - metadata=V1ObjectMeta( - annotations={constants.ISTIO_SIDECAR_INJECTION: "false"} - ), - spec=V1PodSpec(containers=[container]), - ), - ) - - tfjob = generate_tfjob(job_namespace, worker) - - TRAINING_CLIENT.create_job(job=tfjob, namespace=job_namespace) - logging.info(f"List of created {TRAINING_CLIENT.job_kind}s") - logging.info(TRAINING_CLIENT.list_jobs(job_namespace)) - - try: - utils.verify_job_e2e(TRAINING_CLIENT, JOB_NAME, job_namespace, wait_timeout=900) - except Exception as e: - utils.print_job_results(TRAINING_CLIENT, JOB_NAME, job_namespace) - TRAINING_CLIENT.delete_job(JOB_NAME, job_namespace) - raise Exception(f"TFJob E2E fails. Exception: {e}") - - utils.print_job_results(TRAINING_CLIENT, JOB_NAME, job_namespace) - TRAINING_CLIENT.delete_job(JOB_NAME, job_namespace) - - -def generate_tfjob( - job_namespace: str, - worker: KubeflowOrgV1ReplicaSpec, - scheduling_policy: Optional[KubeflowOrgV1SchedulingPolicy] = None, -) -> KubeflowOrgV1TFJob: - return KubeflowOrgV1TFJob( - api_version=constants.API_VERSION, - kind=constants.TFJOB_KIND, - metadata=V1ObjectMeta(name=JOB_NAME, namespace=job_namespace), - spec=KubeflowOrgV1TFJobSpec( - run_policy=KubeflowOrgV1RunPolicy( - clean_pod_policy="None", - scheduling_policy=scheduling_policy, - ), - tf_replica_specs={"Worker": worker}, - ), - ) - - -def generate_container() -> V1Container: - return V1Container( - name=CONTAINER_NAME, - image="gcr.io/kubeflow-ci/tf-mnist-with-summaries:1.0", - command=[ - "python", - "/var/tf_mnist/mnist_with_summaries.py", - "--log_dir=/train/logs", - "--learning_rate=0.01", - "--batch_size=150", - ], - resources=V1ResourceRequirements(limits={"memory": "4Gi", "cpu": "1.6"}), - ) diff --git a/sdk/python/test/e2e/test_e2e_xgboostjob.py b/sdk/python/test/e2e/test_e2e_xgboostjob.py deleted file mode 100644 index c586997f54..0000000000 --- a/sdk/python/test/e2e/test_e2e_xgboostjob.py +++ /dev/null @@ -1,194 +0,0 @@ -# Copyright 2021 kubeflow.org. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import logging -import pytest -from typing import Optional - -from kubernetes.client import V1PodTemplateSpec -from kubernetes.client import V1ObjectMeta -from kubernetes.client import V1PodSpec -from kubernetes.client import V1Container -from kubernetes.client import V1ResourceRequirements - -from kubeflow.training import TrainingClient -from kubeflow.training import KubeflowOrgV1ReplicaSpec -from kubeflow.training import KubeflowOrgV1XGBoostJob -from kubeflow.training import KubeflowOrgV1XGBoostJobSpec -from kubeflow.training import KubeflowOrgV1RunPolicy -from kubeflow.training import KubeflowOrgV1SchedulingPolicy -from kubeflow.training.constants import constants - -import test.e2e.utils as utils -from test.e2e.constants import TEST_GANG_SCHEDULER_NAME_ENV_KEY -from test.e2e.constants import GANG_SCHEDULERS, NONE_GANG_SCHEDULERS - -logging.basicConfig(format="%(message)s") -logging.getLogger("kubeflow.training.api.training_client").setLevel(logging.DEBUG) - -TRAINING_CLIENT = TrainingClient(job_kind=constants.XGBOOSTJOB_KIND) -JOB_NAME = "xgboostjob-iris-ci-test" -CONTAINER_NAME = "xgboost" -GANG_SCHEDULER_NAME = os.getenv(TEST_GANG_SCHEDULER_NAME_ENV_KEY, "") - - -@pytest.mark.skipif( - GANG_SCHEDULER_NAME in NONE_GANG_SCHEDULERS, - reason="For gang-scheduling", -) -def test_sdk_e2e_with_gang_scheduling(job_namespace): - container = generate_container() - - master = KubeflowOrgV1ReplicaSpec( - replicas=1, - restart_policy="OnFailure", - template=V1PodTemplateSpec( - metadata=V1ObjectMeta( - annotations={constants.ISTIO_SIDECAR_INJECTION: "false"} - ), - spec=V1PodSpec( - containers=[container], - scheduler_name=utils.get_pod_spec_scheduler_name(GANG_SCHEDULER_NAME), - ), - ), - ) - - worker = KubeflowOrgV1ReplicaSpec( - replicas=1, - restart_policy="OnFailure", - template=V1PodTemplateSpec( - metadata=V1ObjectMeta( - annotations={constants.ISTIO_SIDECAR_INJECTION: "false"} - ), - spec=V1PodSpec( - containers=[container], - scheduler_name=utils.get_pod_spec_scheduler_name(GANG_SCHEDULER_NAME), - ), - ), - ) - - unschedulable_xgboostjob = generate_xgboostjob( - job_namespace, master, worker, KubeflowOrgV1SchedulingPolicy(min_available=10) - ) - schedulable_xgboostjob = generate_xgboostjob( - job_namespace, master, worker, KubeflowOrgV1SchedulingPolicy(min_available=2) - ) - - TRAINING_CLIENT.create_job(job=unschedulable_xgboostjob, namespace=job_namespace) - logging.info(f"List of created {TRAINING_CLIENT.job_kind}s") - logging.info(TRAINING_CLIENT.list_jobs(job_namespace)) - - try: - utils.verify_unschedulable_job_e2e(TRAINING_CLIENT, JOB_NAME, job_namespace) - except Exception as e: - utils.print_job_results(TRAINING_CLIENT, JOB_NAME, job_namespace) - TRAINING_CLIENT.delete_job(JOB_NAME, job_namespace) - raise Exception(f"XGBoostJob E2E fails. Exception: {e}") - - TRAINING_CLIENT.update_job(schedulable_xgboostjob, JOB_NAME, job_namespace) - logging.info(f"List of updated {TRAINING_CLIENT.job_kind}s") - logging.info(TRAINING_CLIENT.list_jobs(job_namespace)) - - try: - utils.verify_job_e2e(TRAINING_CLIENT, JOB_NAME, job_namespace, wait_timeout=900) - except Exception as e: - utils.print_job_results(TRAINING_CLIENT, JOB_NAME, job_namespace) - TRAINING_CLIENT.delete_job(JOB_NAME, job_namespace) - raise Exception(f"XGBoostJob E2E fails. Exception: {e}") - - utils.print_job_results(TRAINING_CLIENT, JOB_NAME, job_namespace) - TRAINING_CLIENT.delete_job(JOB_NAME, job_namespace) - - -@pytest.mark.skipif( - GANG_SCHEDULER_NAME in GANG_SCHEDULERS, - reason="For plain scheduling", -) -def test_sdk_e2e(job_namespace): - container = generate_container() - - master = KubeflowOrgV1ReplicaSpec( - replicas=1, - restart_policy="OnFailure", - template=V1PodTemplateSpec( - metadata=V1ObjectMeta( - annotations={constants.ISTIO_SIDECAR_INJECTION: "false"} - ), - spec=V1PodSpec(containers=[container]), - ), - ) - - worker = KubeflowOrgV1ReplicaSpec( - replicas=1, - restart_policy="OnFailure", - template=V1PodTemplateSpec( - metadata=V1ObjectMeta( - annotations={constants.ISTIO_SIDECAR_INJECTION: "false"} - ), - spec=V1PodSpec(containers=[container]), - ), - ) - - xgboostjob = generate_xgboostjob(job_namespace, master, worker) - - TRAINING_CLIENT.create_job(job=xgboostjob, namespace=job_namespace) - logging.info(f"List of created {TRAINING_CLIENT.job_kind}s") - logging.info(TRAINING_CLIENT.list_jobs(job_namespace)) - - try: - utils.verify_job_e2e(TRAINING_CLIENT, JOB_NAME, job_namespace, wait_timeout=900) - except Exception as e: - utils.print_job_results(TRAINING_CLIENT, JOB_NAME, job_namespace) - TRAINING_CLIENT.delete_job(JOB_NAME, job_namespace) - raise Exception(f"XGBoostJob E2E fails. Exception: {e}") - - utils.print_job_results(TRAINING_CLIENT, JOB_NAME, job_namespace) - TRAINING_CLIENT.delete_job(JOB_NAME, job_namespace) - - -def generate_xgboostjob( - job_namespace: str, - master: KubeflowOrgV1ReplicaSpec, - worker: KubeflowOrgV1ReplicaSpec, - scheduling_policy: Optional[KubeflowOrgV1SchedulingPolicy] = None, -) -> KubeflowOrgV1XGBoostJob: - return KubeflowOrgV1XGBoostJob( - api_version=constants.API_VERSION, - kind=constants.XGBOOSTJOB_KIND, - metadata=V1ObjectMeta(name=JOB_NAME, namespace=job_namespace), - spec=KubeflowOrgV1XGBoostJobSpec( - run_policy=KubeflowOrgV1RunPolicy( - clean_pod_policy="None", - scheduling_policy=scheduling_policy, - ), - xgb_replica_specs={"Master": master, "Worker": worker}, - ), - ) - - -def generate_container() -> V1Container: - return V1Container( - name=CONTAINER_NAME, - image="docker.io/kubeflow/xgboost-dist-iris:latest", - args=[ - "--job_type=Train", - "--xgboost_parameter=objective:multi:softprob,num_class:3", - "--n_estimators=10", - "--learning_rate=0.1", - "--model_path=/tmp/xgboost-model", - "--model_storage_type=local", - ], - resources=V1ResourceRequirements(limits={"memory": "2Gi", "cpu": "0.8"}), - ) diff --git a/sdk/python/test/e2e/utils.py b/sdk/python/test/e2e/utils.py deleted file mode 100644 index 7a6f81f922..0000000000 --- a/sdk/python/test/e2e/utils.py +++ /dev/null @@ -1,88 +0,0 @@ -import logging -import time - -from kubeflow.training import TrainingClient -from kubeflow.training.constants import constants -from test.e2e.constants import TEST_GANG_SCHEDULER_NAME_SCHEDULER_PLUGINS -from test.e2e.constants import DEFAULT_SCHEDULER_PLUGINS_NAME -from test.e2e.constants import TEST_GANG_SCHEDULER_NAME_VOLCANO - -logging.basicConfig(format="%(message)s") -logging.getLogger().setLevel(logging.INFO) - - -def verify_unschedulable_job_e2e(client: TrainingClient, name: str, namespace: str): - """Verify unschedulable Training Job e2e test.""" - logging.info(f"\n\n\n{client.job_kind} is creating") - job = client.wait_for_job_conditions( - name, namespace, expected_conditions={constants.JOB_CONDITION_CREATED} - ) - - logging.info("Checking 3 times that pods are not scheduled") - for num in range(3): - logging.info(f"Number of attempts: {int(num)+1}/3") - - # Job should have correct conditions - if not client.is_job_created(job=job) or client.is_job_running(job=job): - raise Exception( - f"{client.job_kind} should be in Created condition. " - f"{client.job_kind} should not be in Running condition." - ) - - logging.info("Sleeping 5 seconds...") - time.sleep(5) - - -def verify_job_e2e( - client: TrainingClient, - name: str, - namespace: str, - wait_timeout: int = 600, -): - """Verify Training Job e2e test.""" - - # Wait until Job is Succeeded. - logging.info(f"\n\n\n{client.job_kind} is running") - job = client.wait_for_job_conditions(name, namespace, wait_timeout=wait_timeout) - - # Job should have Created, Running, and Succeeded conditions. - conditions = client.get_job_conditions(job=job) - # If Job is complete fast, it has 2 conditions: Created and Succeeded. - if len(conditions) < 2: - raise Exception(f"{client.job_kind} conditions are invalid: {conditions}") - - # Job should have correct conditions. - if ( - not client.is_job_created(job=job) - or not client.is_job_succeeded(job=job) - or client.is_job_running(job=job) - or client.is_job_restarting(job=job) - or client.is_job_failed(job=job) - ): - raise Exception( - f"{client.job_kind} should be in Succeeded and Created conditions. " - f"{client.job_kind} should not be in Running, Restarting, or Failed conditions." - ) - - -def get_pod_spec_scheduler_name(gang_scheduler_name: str) -> str: - if gang_scheduler_name == TEST_GANG_SCHEDULER_NAME_SCHEDULER_PLUGINS: - return DEFAULT_SCHEDULER_PLUGINS_NAME - elif gang_scheduler_name == TEST_GANG_SCHEDULER_NAME_VOLCANO: - return TEST_GANG_SCHEDULER_NAME_VOLCANO - - return "" - - -def print_job_results(client: TrainingClient, name: str, namespace: str): - # Print Job. - logging.info(f"\n\n\n{client.job_kind} info") - logging.info(client.get_job(name, namespace)) - - # Print Job pod names. - logging.info(f"\n\n\n{client.job_kind} pod names") - logging.info(client.get_job_pod_names(name, namespace)) - - # Print Job logs. - logging.info(f"\n\n\n{client.job_kind} logs") - client.get_job_logs(name, namespace) diff --git a/test/integration/framework/framework.go b/test/integration/framework/framework.go index 6a63b95d4f..e9612f9fdf 100644 --- a/test/integration/framework/framework.go +++ b/test/integration/framework/framework.go @@ -44,7 +44,7 @@ import ( kubeflowv2 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v2alpha1" controllerv2 "github.com/kubeflow/training-operator/pkg/controller.v2" runtimecore "github.com/kubeflow/training-operator/pkg/runtime.v2/core" - webhookv2 "github.com/kubeflow/training-operator/pkg/webhook.v2" + webhooksv2 "github.com/kubeflow/training-operator/pkg/webhooks.v2" ) type Framework struct { @@ -114,7 +114,7 @@ func (f *Framework) RunManager(cfg *rest.Config) (context.Context, client.Client gomega.ExpectWithOffset(1, err).NotTo(gomega.HaveOccurred(), "controller", failedCtrlName) gomega.ExpectWithOffset(1, failedCtrlName).To(gomega.BeEmpty()) - failedWebhookName, err := webhookv2.Setup(mgr, runtimes) + failedWebhookName, err := webhooksv2.Setup(mgr, runtimes) gomega.ExpectWithOffset(1, err).NotTo(gomega.HaveOccurred(), "webhook", failedWebhookName) gomega.ExpectWithOffset(1, failedWebhookName).To(gomega.BeEmpty()) diff --git a/test/integration/webhook.v2/clustertrainingruntime_test.go b/test/integration/webhook.v2/clustertrainingruntime_test.go index 831937ed3f..d419193f7a 100644 --- a/test/integration/webhook.v2/clustertrainingruntime_test.go +++ b/test/integration/webhook.v2/clustertrainingruntime_test.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package webhookv2 +package webhooksv2 import ( "github.com/onsi/ginkgo/v2" diff --git a/test/integration/webhook.v2/suite_test.go b/test/integration/webhook.v2/suite_test.go index addf4e5d65..7229557ccb 100644 --- a/test/integration/webhook.v2/suite_test.go +++ b/test/integration/webhook.v2/suite_test.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package webhookv2 +package webhooksv2 import ( "context" diff --git a/test/integration/webhook.v2/trainingruntime_test.go b/test/integration/webhook.v2/trainingruntime_test.go index 0de7cd2250..e627d6a52f 100644 --- a/test/integration/webhook.v2/trainingruntime_test.go +++ b/test/integration/webhook.v2/trainingruntime_test.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package webhookv2 +package webhooksv2 import ( "github.com/onsi/ginkgo/v2" diff --git a/test/integration/webhook.v2/trainjob_test.go b/test/integration/webhook.v2/trainjob_test.go index a8578f007b..98b7c984a4 100644 --- a/test/integration/webhook.v2/trainjob_test.go +++ b/test/integration/webhook.v2/trainjob_test.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package webhookv2 +package webhooksv2 import ( "github.com/onsi/ginkgo/v2" diff --git a/test_job/README.md b/test_job/README.md deleted file mode 100644 index e92ab334d2..0000000000 --- a/test_job/README.md +++ /dev/null @@ -1,29 +0,0 @@ -## Test Job Controller - -This is a Test Job Controller example. As you can see, we have job crd definition under `apis/test_job/v1`. -[code-generator](https://github.com/kubernetes/code-generator) generate deepcopy, clientset and other libraries. - -`controler.v1/test_job/test_job_controller` defines a struct `TestJobController` which implements [commonv1.ControllerInterface](../pkg/apis/common/v1/interface.go) - -```yaml -├── README.md -├── apis -│   └── test_job -│   └── v1 -│   ├── constants.go -│   ├── defaults.go -│   ├── doc.go -│   ├── openapi_generated.go -│   ├── register.go -│   ├── types.go -│   ├── zz_generated.deepcopy.go -│   └── zz_generated.defaults.go -├── client -│   ├── clientset -│   ├── informers -│   └── listers -├── controller.v1 -│   └── test_job -│   └── test_job_controller.go -└── test_util -``` diff --git a/test_job/apis/test_job/v1/constants.go b/test_job/apis/test_job/v1/constants.go deleted file mode 100644 index a793c69367..0000000000 --- a/test_job/apis/test_job/v1/constants.go +++ /dev/null @@ -1,33 +0,0 @@ -// Copyright 2019 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package v1 - -import ( - kubeflowv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" -) - -const ( - // EnvKubeflowNamespace is ENV for kubeflow namespace specified by user. - EnvKubeflowNamespace = "KUBEFLOW_NAMESPACE" - - // DefaultPortName is name of the port used to communicate between workers. - DefaultPortName = "job-port" - // DefaultContainerName is the name of the TestJob container. - DefaultContainerName = "test-container" - // DefaultPort is default value of the port. - DefaultPort = 2222 - // DefaultRestartPolicy is default RestartPolicy for TFReplicaSpec. - DefaultRestartPolicy = kubeflowv1.RestartPolicyNever -) diff --git a/test_job/apis/test_job/v1/defaults.go b/test_job/apis/test_job/v1/defaults.go deleted file mode 100644 index a3ce414321..0000000000 --- a/test_job/apis/test_job/v1/defaults.go +++ /dev/null @@ -1,116 +0,0 @@ -// Copyright 2019 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package v1 - -import ( - "strings" - - v1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/runtime" - - kubeflowv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" -) - -// Int32 is a helper routine that allocates a new int32 value -// to store v and returns a pointer to it. -func Int32(v int32) *int32 { - return &v -} - -func addDefaultingFuncs(scheme *runtime.Scheme) error { - return RegisterDefaults(scheme) -} - -// setDefaultPort sets the default ports for container. -func setDefaultPort(spec *v1.PodSpec) { - index := 0 - for i, container := range spec.Containers { - if container.Name == DefaultContainerName { - index = i - break - } - } - - hasJobPort := false - for _, port := range spec.Containers[index].Ports { - if port.Name == DefaultPortName { - hasJobPort = true - break - } - } - if !hasJobPort { - spec.Containers[index].Ports = append(spec.Containers[index].Ports, v1.ContainerPort{ - Name: DefaultPortName, - ContainerPort: DefaultPort, - }) - } -} - -func setDefaultReplicas(spec *kubeflowv1.ReplicaSpec) { - if spec.Replicas == nil { - spec.Replicas = Int32(1) - } - if spec.RestartPolicy == "" { - spec.RestartPolicy = DefaultRestartPolicy - } -} - -// setTypeNamesToCamelCase sets the name of all replica types from any case to correct case. -func setTypeNamesToCamelCase(testJob *TestJob) { - setTypeNameToCamelCase(testJob, TestReplicaTypeWorker) - setTypeNameToCamelCase(testJob, TestReplicaTypeMaster) -} - -// setTypeNameToCamelCase sets the name of the replica type from any case to correct case. -// E.g. from ps to PS; from WORKER to Worker. -func setTypeNameToCamelCase(testJob *TestJob, typ TestReplicaType) { - for t := range testJob.Spec.TestReplicaSpecs { - if strings.EqualFold(string(t), string(typ)) && t != typ { - spec := testJob.Spec.TestReplicaSpecs[t] - delete(testJob.Spec.TestReplicaSpecs, t) - testJob.Spec.TestReplicaSpecs[typ] = spec - return - } - } -} - -// SetDefaults_TestJob sets any unspecified values to defaults. -func SetDefaults_TestJob(testjob *TestJob) { - // Set default RunPolicy - if testjob.Spec.RunPolicy == nil { - testjob.Spec.RunPolicy = &kubeflowv1.RunPolicy{ - CleanPodPolicy: nil, - TTLSecondsAfterFinished: nil, - ActiveDeadlineSeconds: nil, - BackoffLimit: nil, - SchedulingPolicy: nil, - } - } - - // Set default cleanpod policy to Running. - if testjob.Spec.RunPolicy.CleanPodPolicy == nil { - testjob.Spec.RunPolicy.CleanPodPolicy = kubeflowv1.CleanPodPolicyPointer(kubeflowv1.CleanPodPolicyRunning) - } - - // Update the key of TestReplicaSpecs to camel case. - setTypeNamesToCamelCase(testjob) - - for _, spec := range testjob.Spec.TestReplicaSpecs { - // Set default replicas to 1. - setDefaultReplicas(spec) - // Set default port to the container. - setDefaultPort(&spec.Template.Spec) - } -} diff --git a/test_job/apis/test_job/v1/doc.go b/test_job/apis/test_job/v1/doc.go deleted file mode 100644 index 9bee493bb1..0000000000 --- a/test_job/apis/test_job/v1/doc.go +++ /dev/null @@ -1,21 +0,0 @@ -// Copyright 2019 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// +k8s:deepcopy-gen=package,register -// +k8s:defaulter-gen=TypeMeta -// +k8s:openapi-gen=true - -// Package v1 is the v1 version of the API. -// +groupName=kubeflow.org -package v1 diff --git a/test_job/apis/test_job/v1/openapi_generated.go b/test_job/apis/test_job/v1/openapi_generated.go deleted file mode 100644 index a8d1aacd7e..0000000000 --- a/test_job/apis/test_job/v1/openapi_generated.go +++ /dev/null @@ -1,170 +0,0 @@ -//go:build !ignore_autogenerated -// +build !ignore_autogenerated - -// Copyright 2023 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by openapi-gen. DO NOT EDIT. - -// This file was autogenerated by openapi-gen. Do not edit it manually! - -package v1 - -import ( - common "k8s.io/kube-openapi/pkg/common" - spec "k8s.io/kube-openapi/pkg/validation/spec" -) - -func GetOpenAPIDefinitions(ref common.ReferenceCallback) map[string]common.OpenAPIDefinition { - return map[string]common.OpenAPIDefinition{ - "github.com/kubeflow/training-operator/test_job/apis/test_job/v1.TestJob": schema_test_job_apis_test_job_v1_TestJob(ref), - "github.com/kubeflow/training-operator/test_job/apis/test_job/v1.TestJobList": schema_test_job_apis_test_job_v1_TestJobList(ref), - "github.com/kubeflow/training-operator/test_job/apis/test_job/v1.TestJobSpec": schema_test_job_apis_test_job_v1_TestJobSpec(ref), - } -} - -func schema_test_job_apis_test_job_v1_TestJob(ref common.ReferenceCallback) common.OpenAPIDefinition { - return common.OpenAPIDefinition{ - Schema: spec.Schema{ - SchemaProps: spec.SchemaProps{ - Description: "A generic job used for unit tests.", - Type: []string{"object"}, - Properties: map[string]spec.Schema{ - "kind": { - SchemaProps: spec.SchemaProps{ - Description: "Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds", - Type: []string{"string"}, - Format: "", - }, - }, - "apiVersion": { - SchemaProps: spec.SchemaProps{ - Description: "APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources", - Type: []string{"string"}, - Format: "", - }, - }, - "metadata": { - SchemaProps: spec.SchemaProps{ - Description: "Standard object's metadata.", - Default: map[string]interface{}{}, - Ref: ref("k8s.io/apimachinery/pkg/apis/meta/v1.ObjectMeta"), - }, - }, - "spec": { - SchemaProps: spec.SchemaProps{ - Description: "Specification of the desired behavior of the TestJob.", - Default: map[string]interface{}{}, - Ref: ref("github.com/kubeflow/training-operator/test_job/apis/test_job/v1.TestJobSpec"), - }, - }, - "status": { - SchemaProps: spec.SchemaProps{ - Description: "Most recently observed status of the TestJob. This data may not be up to date. Populated by the system. Read-only.", - Default: map[string]interface{}{}, - Ref: ref("github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.JobStatus"), - }, - }, - }, - }, - }, - Dependencies: []string{ - "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.JobStatus", "github.com/kubeflow/training-operator/test_job/apis/test_job/v1.TestJobSpec", "k8s.io/apimachinery/pkg/apis/meta/v1.ObjectMeta"}, - } -} - -func schema_test_job_apis_test_job_v1_TestJobList(ref common.ReferenceCallback) common.OpenAPIDefinition { - return common.OpenAPIDefinition{ - Schema: spec.Schema{ - SchemaProps: spec.SchemaProps{ - Description: "TestJobList is a list of TestJobs.", - Type: []string{"object"}, - Properties: map[string]spec.Schema{ - "kind": { - SchemaProps: spec.SchemaProps{ - Description: "Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds", - Type: []string{"string"}, - Format: "", - }, - }, - "apiVersion": { - SchemaProps: spec.SchemaProps{ - Description: "APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources", - Type: []string{"string"}, - Format: "", - }, - }, - "metadata": { - SchemaProps: spec.SchemaProps{ - Description: "Standard list metadata.", - Default: map[string]interface{}{}, - Ref: ref("k8s.io/apimachinery/pkg/apis/meta/v1.ListMeta"), - }, - }, - "items": { - SchemaProps: spec.SchemaProps{ - Description: "List of TestJobs.", - Type: []string{"array"}, - Items: &spec.SchemaOrArray{ - Schema: &spec.Schema{ - SchemaProps: spec.SchemaProps{ - Default: map[string]interface{}{}, - Ref: ref("github.com/kubeflow/training-operator/test_job/apis/test_job/v1.TestJob"), - }, - }, - }, - }, - }, - }, - Required: []string{"items"}, - }, - }, - Dependencies: []string{ - "github.com/kubeflow/training-operator/test_job/apis/test_job/v1.TestJob", "k8s.io/apimachinery/pkg/apis/meta/v1.ListMeta"}, - } -} - -func schema_test_job_apis_test_job_v1_TestJobSpec(ref common.ReferenceCallback) common.OpenAPIDefinition { - return common.OpenAPIDefinition{ - Schema: spec.Schema{ - SchemaProps: spec.SchemaProps{ - Description: "TestJobSpec is a desired state description of the TestJob.", - Type: []string{"object"}, - Properties: map[string]spec.Schema{ - "runPolicy": { - SchemaProps: spec.SchemaProps{ - Ref: ref("github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.RunPolicy"), - }, - }, - "testReplicaSpecs": { - SchemaProps: spec.SchemaProps{ - Type: []string{"object"}, - AdditionalProperties: &spec.SchemaOrBool{ - Allows: true, - Schema: &spec.Schema{ - SchemaProps: spec.SchemaProps{ - Ref: ref("github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.ReplicaSpec"), - }, - }, - }, - }, - }, - }, - Required: []string{"testReplicaSpecs"}, - }, - }, - Dependencies: []string{ - "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.ReplicaSpec", "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1.RunPolicy"}, - } -} diff --git a/test_job/apis/test_job/v1/register.go b/test_job/apis/test_job/v1/register.go deleted file mode 100644 index 8b893be5fb..0000000000 --- a/test_job/apis/test_job/v1/register.go +++ /dev/null @@ -1,74 +0,0 @@ -// Copyright 2018 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package v1 - -import ( - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/runtime/schema" -) - -var ( - // TODO: move SchemeBuilder with zz_generated.deepcopy.go to k8s.io/api. - // localSchemeBuilder and AddToScheme will stay in k8s.io/kubernetes. - SchemeBuilder runtime.SchemeBuilder - localSchemeBuilder = &SchemeBuilder - AddToScheme = localSchemeBuilder.AddToScheme -) - -const ( - // GroupName is the group name use in this package. - GroupName = "kubeflow.org" - // Kind is the kind name. - Kind = "TestJob" - // GroupVersion is the version. - GroupVersion = "v1" - // Plural is the Plural for TestJob. - Plural = "testjobs" - // Singular is the singular for TestJob. - Singular = "testjob" - // TESTCRD is the CRD name for TestJob. - TESTCRD = "testjobs.kubeflow.org" -) - -var ( - // SchemeGroupVersion is the group version used to register these objects. - SchemeGroupVersion = schema.GroupVersion{Group: GroupName, Version: GroupVersion} - // SchemeGroupVersionKind is the GroupVersionKind of the resource. - SchemeGroupVersionKind = SchemeGroupVersion.WithKind(Kind) -) - -func init() { - // We only register manually written functions here. The registration of the - // generated functions takes place in the generated files. The separation - // makes the code compile even when the generated files are missing. - localSchemeBuilder.Register(addKnownTypes) - localSchemeBuilder.Register(addDefaultingFuncs) -} - -// Resource takes an unqualified resource and returns a Group-qualified GroupResource. -func Resource(resource string) schema.GroupResource { - return SchemeGroupVersion.WithResource(resource).GroupResource() -} - -// addKnownTypes adds the set of types defined in this package to the supplied scheme. -func addKnownTypes(scheme *runtime.Scheme) error { - scheme.AddKnownTypes(SchemeGroupVersion, - &TestJob{}, - &TestJobList{}, - ) - metav1.AddToGroupVersion(scheme, SchemeGroupVersion) - return nil -} diff --git a/test_job/apis/test_job/v1/types.go b/test_job/apis/test_job/v1/types.go deleted file mode 100644 index d60ca4dc1c..0000000000 --- a/test_job/apis/test_job/v1/types.go +++ /dev/null @@ -1,69 +0,0 @@ -// Copyright 2019 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package v1 - -import ( - kubeflowv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" -) - -// +genclient -// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object -// +resource:path=testjob - -// A generic job used for unit tests. -type TestJob struct { - metav1.TypeMeta `json:",inline"` - - // Standard object's metadata. - metav1.ObjectMeta `json:"metadata,omitempty"` - - // Specification of the desired behavior of the TestJob. - Spec TestJobSpec `json:"spec,omitempty"` - - // Most recently observed status of the TestJob. - // This data may not be up to date. - // Populated by the system. - // Read-only. - Status kubeflowv1.JobStatus `json:"status,omitempty"` -} - -// TestJobSpec is a desired state description of the TestJob. -type TestJobSpec struct { - RunPolicy *kubeflowv1.RunPolicy `json:"runPolicy,omitempty"` - TestReplicaSpecs map[TestReplicaType]*kubeflowv1.ReplicaSpec `json:"testReplicaSpecs"` -} - -// TestReplicaType is the type for TestReplica. -type TestReplicaType kubeflowv1.ReplicaType - -const ( - TestReplicaTypeWorker TestReplicaType = "Worker" - TestReplicaTypeMaster TestReplicaType = "Master" -) - -// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object -// +resource:path=testjobs - -// TestJobList is a list of TestJobs. -type TestJobList struct { - metav1.TypeMeta `json:",inline"` - - // Standard list metadata. - metav1.ListMeta `json:"metadata,omitempty"` - - // List of TestJobs. - Items []TestJob `json:"items"` -} diff --git a/test_job/apis/test_job/v1/zz_generated.deepcopy.go b/test_job/apis/test_job/v1/zz_generated.deepcopy.go deleted file mode 100644 index 7b52fb96e3..0000000000 --- a/test_job/apis/test_job/v1/zz_generated.deepcopy.go +++ /dev/null @@ -1,122 +0,0 @@ -//go:build !ignore_autogenerated -// +build !ignore_autogenerated - -// Copyright 2023 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by deepcopy-gen. DO NOT EDIT. - -package v1 - -import ( - commonv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - runtime "k8s.io/apimachinery/pkg/runtime" -) - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *TestJob) DeepCopyInto(out *TestJob) { - *out = *in - out.TypeMeta = in.TypeMeta - in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) - in.Spec.DeepCopyInto(&out.Spec) - in.Status.DeepCopyInto(&out.Status) - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TestJob. -func (in *TestJob) DeepCopy() *TestJob { - if in == nil { - return nil - } - out := new(TestJob) - in.DeepCopyInto(out) - return out -} - -// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. -func (in *TestJob) DeepCopyObject() runtime.Object { - if c := in.DeepCopy(); c != nil { - return c - } - return nil -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *TestJobList) DeepCopyInto(out *TestJobList) { - *out = *in - out.TypeMeta = in.TypeMeta - in.ListMeta.DeepCopyInto(&out.ListMeta) - if in.Items != nil { - in, out := &in.Items, &out.Items - *out = make([]TestJob, len(*in)) - for i := range *in { - (*in)[i].DeepCopyInto(&(*out)[i]) - } - } - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TestJobList. -func (in *TestJobList) DeepCopy() *TestJobList { - if in == nil { - return nil - } - out := new(TestJobList) - in.DeepCopyInto(out) - return out -} - -// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. -func (in *TestJobList) DeepCopyObject() runtime.Object { - if c := in.DeepCopy(); c != nil { - return c - } - return nil -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *TestJobSpec) DeepCopyInto(out *TestJobSpec) { - *out = *in - if in.RunPolicy != nil { - in, out := &in.RunPolicy, &out.RunPolicy - *out = new(commonv1.RunPolicy) - (*in).DeepCopyInto(*out) - } - if in.TestReplicaSpecs != nil { - in, out := &in.TestReplicaSpecs, &out.TestReplicaSpecs - *out = make(map[TestReplicaType]*commonv1.ReplicaSpec, len(*in)) - for key, val := range *in { - var outVal *commonv1.ReplicaSpec - if val == nil { - (*out)[key] = nil - } else { - in, out := &val, &outVal - *out = new(commonv1.ReplicaSpec) - (*in).DeepCopyInto(*out) - } - (*out)[key] = outVal - } - } - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TestJobSpec. -func (in *TestJobSpec) DeepCopy() *TestJobSpec { - if in == nil { - return nil - } - out := new(TestJobSpec) - in.DeepCopyInto(out) - return out -} diff --git a/test_job/apis/test_job/v1/zz_generated.defaults.go b/test_job/apis/test_job/v1/zz_generated.defaults.go deleted file mode 100644 index 9f7adba2ca..0000000000 --- a/test_job/apis/test_job/v1/zz_generated.defaults.go +++ /dev/null @@ -1,44 +0,0 @@ -//go:build !ignore_autogenerated -// +build !ignore_autogenerated - -// Copyright 2023 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by defaulter-gen. DO NOT EDIT. - -package v1 - -import ( - runtime "k8s.io/apimachinery/pkg/runtime" -) - -// RegisterDefaults adds defaulters functions to the given scheme. -// Public to allow building arbitrary schemes. -// All generated defaulters are covering - they call all nested defaulters. -func RegisterDefaults(scheme *runtime.Scheme) error { - scheme.AddTypeDefaultingFunc(&TestJob{}, func(obj interface{}) { SetObjectDefaults_TestJob(obj.(*TestJob)) }) - scheme.AddTypeDefaultingFunc(&TestJobList{}, func(obj interface{}) { SetObjectDefaults_TestJobList(obj.(*TestJobList)) }) - return nil -} - -func SetObjectDefaults_TestJob(in *TestJob) { - SetDefaults_TestJob(in) -} - -func SetObjectDefaults_TestJobList(in *TestJobList) { - for i := range in.Items { - a := &in.Items[i] - SetObjectDefaults_TestJob(a) - } -} diff --git a/test_job/client/clientset/versioned/clientset.go b/test_job/client/clientset/versioned/clientset.go deleted file mode 100644 index 262fb68507..0000000000 --- a/test_job/client/clientset/versioned/clientset.go +++ /dev/null @@ -1,119 +0,0 @@ -// Copyright 2023 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by client-gen. DO NOT EDIT. - -package versioned - -import ( - "fmt" - "net/http" - - kubeflowv1 "github.com/kubeflow/training-operator/test_job/client/clientset/versioned/typed/test_job/v1" - discovery "k8s.io/client-go/discovery" - rest "k8s.io/client-go/rest" - flowcontrol "k8s.io/client-go/util/flowcontrol" -) - -type Interface interface { - Discovery() discovery.DiscoveryInterface - KubeflowV1() kubeflowv1.KubeflowV1Interface -} - -// Clientset contains the clients for groups. Each group has exactly one -// version included in a Clientset. -type Clientset struct { - *discovery.DiscoveryClient - kubeflowV1 *kubeflowv1.KubeflowV1Client -} - -// KubeflowV1 retrieves the KubeflowV1Client -func (c *Clientset) KubeflowV1() kubeflowv1.KubeflowV1Interface { - return c.kubeflowV1 -} - -// Discovery retrieves the DiscoveryClient -func (c *Clientset) Discovery() discovery.DiscoveryInterface { - if c == nil { - return nil - } - return c.DiscoveryClient -} - -// NewForConfig creates a new Clientset for the given config. -// If config's RateLimiter is not set and QPS and Burst are acceptable, -// NewForConfig will generate a rate-limiter in configShallowCopy. -// NewForConfig is equivalent to NewForConfigAndClient(c, httpClient), -// where httpClient was generated with rest.HTTPClientFor(c). -func NewForConfig(c *rest.Config) (*Clientset, error) { - configShallowCopy := *c - - if configShallowCopy.UserAgent == "" { - configShallowCopy.UserAgent = rest.DefaultKubernetesUserAgent() - } - - // share the transport between all clients - httpClient, err := rest.HTTPClientFor(&configShallowCopy) - if err != nil { - return nil, err - } - - return NewForConfigAndClient(&configShallowCopy, httpClient) -} - -// NewForConfigAndClient creates a new Clientset for the given config and http client. -// Note the http client provided takes precedence over the configured transport values. -// If config's RateLimiter is not set and QPS and Burst are acceptable, -// NewForConfigAndClient will generate a rate-limiter in configShallowCopy. -func NewForConfigAndClient(c *rest.Config, httpClient *http.Client) (*Clientset, error) { - configShallowCopy := *c - if configShallowCopy.RateLimiter == nil && configShallowCopy.QPS > 0 { - if configShallowCopy.Burst <= 0 { - return nil, fmt.Errorf("burst is required to be greater than 0 when RateLimiter is not set and QPS is set to greater than 0") - } - configShallowCopy.RateLimiter = flowcontrol.NewTokenBucketRateLimiter(configShallowCopy.QPS, configShallowCopy.Burst) - } - - var cs Clientset - var err error - cs.kubeflowV1, err = kubeflowv1.NewForConfigAndClient(&configShallowCopy, httpClient) - if err != nil { - return nil, err - } - - cs.DiscoveryClient, err = discovery.NewDiscoveryClientForConfigAndClient(&configShallowCopy, httpClient) - if err != nil { - return nil, err - } - return &cs, nil -} - -// NewForConfigOrDie creates a new Clientset for the given config and -// panics if there is an error in the config. -func NewForConfigOrDie(c *rest.Config) *Clientset { - cs, err := NewForConfig(c) - if err != nil { - panic(err) - } - return cs -} - -// New creates a new Clientset for the given RESTClient. -func New(c rest.Interface) *Clientset { - var cs Clientset - cs.kubeflowV1 = kubeflowv1.New(c) - - cs.DiscoveryClient = discovery.NewDiscoveryClient(c) - return &cs -} diff --git a/test_job/client/clientset/versioned/doc.go b/test_job/client/clientset/versioned/doc.go deleted file mode 100644 index 2e6a61adb5..0000000000 --- a/test_job/client/clientset/versioned/doc.go +++ /dev/null @@ -1,18 +0,0 @@ -// Copyright 2023 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by client-gen. DO NOT EDIT. - -// This package has the automatically generated clientset. -package versioned diff --git a/test_job/client/clientset/versioned/fake/clientset_generated.go b/test_job/client/clientset/versioned/fake/clientset_generated.go deleted file mode 100644 index 8e8e8d6494..0000000000 --- a/test_job/client/clientset/versioned/fake/clientset_generated.go +++ /dev/null @@ -1,83 +0,0 @@ -// Copyright 2023 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by client-gen. DO NOT EDIT. - -package fake - -import ( - clientset "github.com/kubeflow/training-operator/test_job/client/clientset/versioned" - kubeflowv1 "github.com/kubeflow/training-operator/test_job/client/clientset/versioned/typed/test_job/v1" - fakekubeflowv1 "github.com/kubeflow/training-operator/test_job/client/clientset/versioned/typed/test_job/v1/fake" - "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/watch" - "k8s.io/client-go/discovery" - fakediscovery "k8s.io/client-go/discovery/fake" - "k8s.io/client-go/testing" -) - -// NewSimpleClientset returns a clientset that will respond with the provided objects. -// It's backed by a very simple object tracker that processes creates, updates and deletions as-is, -// without applying any validations and/or defaults. It shouldn't be considered a replacement -// for a real clientset and is mostly useful in simple unit tests. -func NewSimpleClientset(objects ...runtime.Object) *Clientset { - o := testing.NewObjectTracker(scheme, codecs.UniversalDecoder()) - for _, obj := range objects { - if err := o.Add(obj); err != nil { - panic(err) - } - } - - cs := &Clientset{tracker: o} - cs.discovery = &fakediscovery.FakeDiscovery{Fake: &cs.Fake} - cs.AddReactor("*", "*", testing.ObjectReaction(o)) - cs.AddWatchReactor("*", func(action testing.Action) (handled bool, ret watch.Interface, err error) { - gvr := action.GetResource() - ns := action.GetNamespace() - watch, err := o.Watch(gvr, ns) - if err != nil { - return false, nil, err - } - return true, watch, nil - }) - - return cs -} - -// Clientset implements clientset.Interface. Meant to be embedded into a -// struct to get a default implementation. This makes faking out just the method -// you want to test easier. -type Clientset struct { - testing.Fake - discovery *fakediscovery.FakeDiscovery - tracker testing.ObjectTracker -} - -func (c *Clientset) Discovery() discovery.DiscoveryInterface { - return c.discovery -} - -func (c *Clientset) Tracker() testing.ObjectTracker { - return c.tracker -} - -var ( - _ clientset.Interface = &Clientset{} - _ testing.FakeClient = &Clientset{} -) - -// KubeflowV1 retrieves the KubeflowV1Client -func (c *Clientset) KubeflowV1() kubeflowv1.KubeflowV1Interface { - return &fakekubeflowv1.FakeKubeflowV1{Fake: &c.Fake} -} diff --git a/test_job/client/clientset/versioned/fake/doc.go b/test_job/client/clientset/versioned/fake/doc.go deleted file mode 100644 index aad8add991..0000000000 --- a/test_job/client/clientset/versioned/fake/doc.go +++ /dev/null @@ -1,18 +0,0 @@ -// Copyright 2023 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by client-gen. DO NOT EDIT. - -// This package has the automatically generated fake clientset. -package fake diff --git a/test_job/client/clientset/versioned/fake/register.go b/test_job/client/clientset/versioned/fake/register.go deleted file mode 100644 index e711ffe740..0000000000 --- a/test_job/client/clientset/versioned/fake/register.go +++ /dev/null @@ -1,54 +0,0 @@ -// Copyright 2023 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by client-gen. DO NOT EDIT. - -package fake - -import ( - kubeflowv1 "github.com/kubeflow/training-operator/test_job/apis/test_job/v1" - v1 "k8s.io/apimachinery/pkg/apis/meta/v1" - runtime "k8s.io/apimachinery/pkg/runtime" - schema "k8s.io/apimachinery/pkg/runtime/schema" - serializer "k8s.io/apimachinery/pkg/runtime/serializer" - utilruntime "k8s.io/apimachinery/pkg/util/runtime" -) - -var scheme = runtime.NewScheme() -var codecs = serializer.NewCodecFactory(scheme) - -var localSchemeBuilder = runtime.SchemeBuilder{ - kubeflowv1.AddToScheme, -} - -// AddToScheme adds all types of this clientset into the given scheme. This allows composition -// of clientsets, like in: -// -// import ( -// "k8s.io/client-go/kubernetes" -// clientsetscheme "k8s.io/client-go/kubernetes/scheme" -// aggregatorclientsetscheme "k8s.io/kube-aggregator/pkg/client/clientset_generated/clientset/scheme" -// ) -// -// kclientset, _ := kubernetes.NewForConfig(c) -// _ = aggregatorclientsetscheme.AddToScheme(clientsetscheme.Scheme) -// -// After this, RawExtensions in Kubernetes types will serialize kube-aggregator types -// correctly. -var AddToScheme = localSchemeBuilder.AddToScheme - -func init() { - v1.AddToGroupVersion(scheme, schema.GroupVersion{Version: "v1"}) - utilruntime.Must(AddToScheme(scheme)) -} diff --git a/test_job/client/clientset/versioned/scheme/doc.go b/test_job/client/clientset/versioned/scheme/doc.go deleted file mode 100644 index 64cf768531..0000000000 --- a/test_job/client/clientset/versioned/scheme/doc.go +++ /dev/null @@ -1,18 +0,0 @@ -// Copyright 2023 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by client-gen. DO NOT EDIT. - -// This package contains the scheme of the automatically generated clientset. -package scheme diff --git a/test_job/client/clientset/versioned/scheme/register.go b/test_job/client/clientset/versioned/scheme/register.go deleted file mode 100644 index ad318ca7e0..0000000000 --- a/test_job/client/clientset/versioned/scheme/register.go +++ /dev/null @@ -1,54 +0,0 @@ -// Copyright 2023 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by client-gen. DO NOT EDIT. - -package scheme - -import ( - kubeflowv1 "github.com/kubeflow/training-operator/test_job/apis/test_job/v1" - v1 "k8s.io/apimachinery/pkg/apis/meta/v1" - runtime "k8s.io/apimachinery/pkg/runtime" - schema "k8s.io/apimachinery/pkg/runtime/schema" - serializer "k8s.io/apimachinery/pkg/runtime/serializer" - utilruntime "k8s.io/apimachinery/pkg/util/runtime" -) - -var Scheme = runtime.NewScheme() -var Codecs = serializer.NewCodecFactory(Scheme) -var ParameterCodec = runtime.NewParameterCodec(Scheme) -var localSchemeBuilder = runtime.SchemeBuilder{ - kubeflowv1.AddToScheme, -} - -// AddToScheme adds all types of this clientset into the given scheme. This allows composition -// of clientsets, like in: -// -// import ( -// "k8s.io/client-go/kubernetes" -// clientsetscheme "k8s.io/client-go/kubernetes/scheme" -// aggregatorclientsetscheme "k8s.io/kube-aggregator/pkg/client/clientset_generated/clientset/scheme" -// ) -// -// kclientset, _ := kubernetes.NewForConfig(c) -// _ = aggregatorclientsetscheme.AddToScheme(clientsetscheme.Scheme) -// -// After this, RawExtensions in Kubernetes types will serialize kube-aggregator types -// correctly. -var AddToScheme = localSchemeBuilder.AddToScheme - -func init() { - v1.AddToGroupVersion(Scheme, schema.GroupVersion{Version: "v1"}) - utilruntime.Must(AddToScheme(Scheme)) -} diff --git a/test_job/client/clientset/versioned/typed/test_job/v1/doc.go b/test_job/client/clientset/versioned/typed/test_job/v1/doc.go deleted file mode 100644 index 4f77d0b4f0..0000000000 --- a/test_job/client/clientset/versioned/typed/test_job/v1/doc.go +++ /dev/null @@ -1,18 +0,0 @@ -// Copyright 2023 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by client-gen. DO NOT EDIT. - -// This package has the automatically generated typed clients. -package v1 diff --git a/test_job/client/clientset/versioned/typed/test_job/v1/fake/doc.go b/test_job/client/clientset/versioned/typed/test_job/v1/fake/doc.go deleted file mode 100644 index c2e3e6984a..0000000000 --- a/test_job/client/clientset/versioned/typed/test_job/v1/fake/doc.go +++ /dev/null @@ -1,18 +0,0 @@ -// Copyright 2023 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by client-gen. DO NOT EDIT. - -// Package fake has the automatically generated clients. -package fake diff --git a/test_job/client/clientset/versioned/typed/test_job/v1/fake/fake_test_job_client.go b/test_job/client/clientset/versioned/typed/test_job/v1/fake/fake_test_job_client.go deleted file mode 100644 index 1e4e160e8a..0000000000 --- a/test_job/client/clientset/versioned/typed/test_job/v1/fake/fake_test_job_client.go +++ /dev/null @@ -1,38 +0,0 @@ -// Copyright 2023 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by client-gen. DO NOT EDIT. - -package fake - -import ( - v1 "github.com/kubeflow/training-operator/test_job/client/clientset/versioned/typed/test_job/v1" - rest "k8s.io/client-go/rest" - testing "k8s.io/client-go/testing" -) - -type FakeKubeflowV1 struct { - *testing.Fake -} - -func (c *FakeKubeflowV1) TestJobs(namespace string) v1.TestJobInterface { - return &FakeTestJobs{c, namespace} -} - -// RESTClient returns a RESTClient that is used to communicate -// with API server by this client implementation. -func (c *FakeKubeflowV1) RESTClient() rest.Interface { - var ret *rest.RESTClient - return ret -} diff --git a/test_job/client/clientset/versioned/typed/test_job/v1/fake/fake_testjob.go b/test_job/client/clientset/versioned/typed/test_job/v1/fake/fake_testjob.go deleted file mode 100644 index c4a8576209..0000000000 --- a/test_job/client/clientset/versioned/typed/test_job/v1/fake/fake_testjob.go +++ /dev/null @@ -1,140 +0,0 @@ -// Copyright 2023 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by client-gen. DO NOT EDIT. - -package fake - -import ( - "context" - - testjobv1 "github.com/kubeflow/training-operator/test_job/apis/test_job/v1" - v1 "k8s.io/apimachinery/pkg/apis/meta/v1" - labels "k8s.io/apimachinery/pkg/labels" - schema "k8s.io/apimachinery/pkg/runtime/schema" - types "k8s.io/apimachinery/pkg/types" - watch "k8s.io/apimachinery/pkg/watch" - testing "k8s.io/client-go/testing" -) - -// FakeTestJobs implements TestJobInterface -type FakeTestJobs struct { - Fake *FakeKubeflowV1 - ns string -} - -var testjobsResource = schema.GroupVersionResource{Group: "kubeflow.org", Version: "v1", Resource: "testjobs"} - -var testjobsKind = schema.GroupVersionKind{Group: "kubeflow.org", Version: "v1", Kind: "TestJob"} - -// Get takes name of the testJob, and returns the corresponding testJob object, and an error if there is any. -func (c *FakeTestJobs) Get(ctx context.Context, name string, options v1.GetOptions) (result *testjobv1.TestJob, err error) { - obj, err := c.Fake. - Invokes(testing.NewGetAction(testjobsResource, c.ns, name), &testjobv1.TestJob{}) - - if obj == nil { - return nil, err - } - return obj.(*testjobv1.TestJob), err -} - -// List takes label and field selectors, and returns the list of TestJobs that match those selectors. -func (c *FakeTestJobs) List(ctx context.Context, opts v1.ListOptions) (result *testjobv1.TestJobList, err error) { - obj, err := c.Fake. - Invokes(testing.NewListAction(testjobsResource, testjobsKind, c.ns, opts), &testjobv1.TestJobList{}) - - if obj == nil { - return nil, err - } - - label, _, _ := testing.ExtractFromListOptions(opts) - if label == nil { - label = labels.Everything() - } - list := &testjobv1.TestJobList{ListMeta: obj.(*testjobv1.TestJobList).ListMeta} - for _, item := range obj.(*testjobv1.TestJobList).Items { - if label.Matches(labels.Set(item.Labels)) { - list.Items = append(list.Items, item) - } - } - return list, err -} - -// Watch returns a watch.Interface that watches the requested testJobs. -func (c *FakeTestJobs) Watch(ctx context.Context, opts v1.ListOptions) (watch.Interface, error) { - return c.Fake. - InvokesWatch(testing.NewWatchAction(testjobsResource, c.ns, opts)) - -} - -// Create takes the representation of a testJob and creates it. Returns the server's representation of the testJob, and an error, if there is any. -func (c *FakeTestJobs) Create(ctx context.Context, testJob *testjobv1.TestJob, opts v1.CreateOptions) (result *testjobv1.TestJob, err error) { - obj, err := c.Fake. - Invokes(testing.NewCreateAction(testjobsResource, c.ns, testJob), &testjobv1.TestJob{}) - - if obj == nil { - return nil, err - } - return obj.(*testjobv1.TestJob), err -} - -// Update takes the representation of a testJob and updates it. Returns the server's representation of the testJob, and an error, if there is any. -func (c *FakeTestJobs) Update(ctx context.Context, testJob *testjobv1.TestJob, opts v1.UpdateOptions) (result *testjobv1.TestJob, err error) { - obj, err := c.Fake. - Invokes(testing.NewUpdateAction(testjobsResource, c.ns, testJob), &testjobv1.TestJob{}) - - if obj == nil { - return nil, err - } - return obj.(*testjobv1.TestJob), err -} - -// UpdateStatus was generated because the type contains a Status member. -// Add a +genclient:noStatus comment above the type to avoid generating UpdateStatus(). -func (c *FakeTestJobs) UpdateStatus(ctx context.Context, testJob *testjobv1.TestJob, opts v1.UpdateOptions) (*testjobv1.TestJob, error) { - obj, err := c.Fake. - Invokes(testing.NewUpdateSubresourceAction(testjobsResource, "status", c.ns, testJob), &testjobv1.TestJob{}) - - if obj == nil { - return nil, err - } - return obj.(*testjobv1.TestJob), err -} - -// Delete takes name of the testJob and deletes it. Returns an error if one occurs. -func (c *FakeTestJobs) Delete(ctx context.Context, name string, opts v1.DeleteOptions) error { - _, err := c.Fake. - Invokes(testing.NewDeleteActionWithOptions(testjobsResource, c.ns, name, opts), &testjobv1.TestJob{}) - - return err -} - -// DeleteCollection deletes a collection of objects. -func (c *FakeTestJobs) DeleteCollection(ctx context.Context, opts v1.DeleteOptions, listOpts v1.ListOptions) error { - action := testing.NewDeleteCollectionAction(testjobsResource, c.ns, listOpts) - - _, err := c.Fake.Invokes(action, &testjobv1.TestJobList{}) - return err -} - -// Patch applies the patch and returns the patched testJob. -func (c *FakeTestJobs) Patch(ctx context.Context, name string, pt types.PatchType, data []byte, opts v1.PatchOptions, subresources ...string) (result *testjobv1.TestJob, err error) { - obj, err := c.Fake. - Invokes(testing.NewPatchSubresourceAction(testjobsResource, c.ns, name, pt, data, subresources...), &testjobv1.TestJob{}) - - if obj == nil { - return nil, err - } - return obj.(*testjobv1.TestJob), err -} diff --git a/test_job/client/clientset/versioned/typed/test_job/v1/generated_expansion.go b/test_job/client/clientset/versioned/typed/test_job/v1/generated_expansion.go deleted file mode 100644 index 8d1d2a105d..0000000000 --- a/test_job/client/clientset/versioned/typed/test_job/v1/generated_expansion.go +++ /dev/null @@ -1,19 +0,0 @@ -// Copyright 2023 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by client-gen. DO NOT EDIT. - -package v1 - -type TestJobExpansion interface{} diff --git a/test_job/client/clientset/versioned/typed/test_job/v1/test_job_client.go b/test_job/client/clientset/versioned/typed/test_job/v1/test_job_client.go deleted file mode 100644 index 3fffb6f05b..0000000000 --- a/test_job/client/clientset/versioned/typed/test_job/v1/test_job_client.go +++ /dev/null @@ -1,105 +0,0 @@ -// Copyright 2023 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by client-gen. DO NOT EDIT. - -package v1 - -import ( - "net/http" - - v1 "github.com/kubeflow/training-operator/test_job/apis/test_job/v1" - "github.com/kubeflow/training-operator/test_job/client/clientset/versioned/scheme" - rest "k8s.io/client-go/rest" -) - -type KubeflowV1Interface interface { - RESTClient() rest.Interface - TestJobsGetter -} - -// KubeflowV1Client is used to interact with features provided by the kubeflow.org group. -type KubeflowV1Client struct { - restClient rest.Interface -} - -func (c *KubeflowV1Client) TestJobs(namespace string) TestJobInterface { - return newTestJobs(c, namespace) -} - -// NewForConfig creates a new KubeflowV1Client for the given config. -// NewForConfig is equivalent to NewForConfigAndClient(c, httpClient), -// where httpClient was generated with rest.HTTPClientFor(c). -func NewForConfig(c *rest.Config) (*KubeflowV1Client, error) { - config := *c - if err := setConfigDefaults(&config); err != nil { - return nil, err - } - httpClient, err := rest.HTTPClientFor(&config) - if err != nil { - return nil, err - } - return NewForConfigAndClient(&config, httpClient) -} - -// NewForConfigAndClient creates a new KubeflowV1Client for the given config and http client. -// Note the http client provided takes precedence over the configured transport values. -func NewForConfigAndClient(c *rest.Config, h *http.Client) (*KubeflowV1Client, error) { - config := *c - if err := setConfigDefaults(&config); err != nil { - return nil, err - } - client, err := rest.RESTClientForConfigAndClient(&config, h) - if err != nil { - return nil, err - } - return &KubeflowV1Client{client}, nil -} - -// NewForConfigOrDie creates a new KubeflowV1Client for the given config and -// panics if there is an error in the config. -func NewForConfigOrDie(c *rest.Config) *KubeflowV1Client { - client, err := NewForConfig(c) - if err != nil { - panic(err) - } - return client -} - -// New creates a new KubeflowV1Client for the given RESTClient. -func New(c rest.Interface) *KubeflowV1Client { - return &KubeflowV1Client{c} -} - -func setConfigDefaults(config *rest.Config) error { - gv := v1.SchemeGroupVersion - config.GroupVersion = &gv - config.APIPath = "/apis" - config.NegotiatedSerializer = scheme.Codecs.WithoutConversion() - - if config.UserAgent == "" { - config.UserAgent = rest.DefaultKubernetesUserAgent() - } - - return nil -} - -// RESTClient returns a RESTClient that is used to communicate -// with API server by this client implementation. -func (c *KubeflowV1Client) RESTClient() rest.Interface { - if c == nil { - return nil - } - return c.restClient -} diff --git a/test_job/client/clientset/versioned/typed/test_job/v1/testjob.go b/test_job/client/clientset/versioned/typed/test_job/v1/testjob.go deleted file mode 100644 index 5ebaa9fe1d..0000000000 --- a/test_job/client/clientset/versioned/typed/test_job/v1/testjob.go +++ /dev/null @@ -1,193 +0,0 @@ -// Copyright 2023 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by client-gen. DO NOT EDIT. - -package v1 - -import ( - "context" - "time" - - v1 "github.com/kubeflow/training-operator/test_job/apis/test_job/v1" - scheme "github.com/kubeflow/training-operator/test_job/client/clientset/versioned/scheme" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - types "k8s.io/apimachinery/pkg/types" - watch "k8s.io/apimachinery/pkg/watch" - rest "k8s.io/client-go/rest" -) - -// TestJobsGetter has a method to return a TestJobInterface. -// A group's client should implement this interface. -type TestJobsGetter interface { - TestJobs(namespace string) TestJobInterface -} - -// TestJobInterface has methods to work with TestJob resources. -type TestJobInterface interface { - Create(ctx context.Context, testJob *v1.TestJob, opts metav1.CreateOptions) (*v1.TestJob, error) - Update(ctx context.Context, testJob *v1.TestJob, opts metav1.UpdateOptions) (*v1.TestJob, error) - UpdateStatus(ctx context.Context, testJob *v1.TestJob, opts metav1.UpdateOptions) (*v1.TestJob, error) - Delete(ctx context.Context, name string, opts metav1.DeleteOptions) error - DeleteCollection(ctx context.Context, opts metav1.DeleteOptions, listOpts metav1.ListOptions) error - Get(ctx context.Context, name string, opts metav1.GetOptions) (*v1.TestJob, error) - List(ctx context.Context, opts metav1.ListOptions) (*v1.TestJobList, error) - Watch(ctx context.Context, opts metav1.ListOptions) (watch.Interface, error) - Patch(ctx context.Context, name string, pt types.PatchType, data []byte, opts metav1.PatchOptions, subresources ...string) (result *v1.TestJob, err error) - TestJobExpansion -} - -// testJobs implements TestJobInterface -type testJobs struct { - client rest.Interface - ns string -} - -// newTestJobs returns a TestJobs -func newTestJobs(c *KubeflowV1Client, namespace string) *testJobs { - return &testJobs{ - client: c.RESTClient(), - ns: namespace, - } -} - -// Get takes name of the testJob, and returns the corresponding testJob object, and an error if there is any. -func (c *testJobs) Get(ctx context.Context, name string, options metav1.GetOptions) (result *v1.TestJob, err error) { - result = &v1.TestJob{} - err = c.client.Get(). - Namespace(c.ns). - Resource("testjobs"). - Name(name). - VersionedParams(&options, scheme.ParameterCodec). - Do(ctx). - Into(result) - return -} - -// List takes label and field selectors, and returns the list of TestJobs that match those selectors. -func (c *testJobs) List(ctx context.Context, opts metav1.ListOptions) (result *v1.TestJobList, err error) { - var timeout time.Duration - if opts.TimeoutSeconds != nil { - timeout = time.Duration(*opts.TimeoutSeconds) * time.Second - } - result = &v1.TestJobList{} - err = c.client.Get(). - Namespace(c.ns). - Resource("testjobs"). - VersionedParams(&opts, scheme.ParameterCodec). - Timeout(timeout). - Do(ctx). - Into(result) - return -} - -// Watch returns a watch.Interface that watches the requested testJobs. -func (c *testJobs) Watch(ctx context.Context, opts metav1.ListOptions) (watch.Interface, error) { - var timeout time.Duration - if opts.TimeoutSeconds != nil { - timeout = time.Duration(*opts.TimeoutSeconds) * time.Second - } - opts.Watch = true - return c.client.Get(). - Namespace(c.ns). - Resource("testjobs"). - VersionedParams(&opts, scheme.ParameterCodec). - Timeout(timeout). - Watch(ctx) -} - -// Create takes the representation of a testJob and creates it. Returns the server's representation of the testJob, and an error, if there is any. -func (c *testJobs) Create(ctx context.Context, testJob *v1.TestJob, opts metav1.CreateOptions) (result *v1.TestJob, err error) { - result = &v1.TestJob{} - err = c.client.Post(). - Namespace(c.ns). - Resource("testjobs"). - VersionedParams(&opts, scheme.ParameterCodec). - Body(testJob). - Do(ctx). - Into(result) - return -} - -// Update takes the representation of a testJob and updates it. Returns the server's representation of the testJob, and an error, if there is any. -func (c *testJobs) Update(ctx context.Context, testJob *v1.TestJob, opts metav1.UpdateOptions) (result *v1.TestJob, err error) { - result = &v1.TestJob{} - err = c.client.Put(). - Namespace(c.ns). - Resource("testjobs"). - Name(testJob.Name). - VersionedParams(&opts, scheme.ParameterCodec). - Body(testJob). - Do(ctx). - Into(result) - return -} - -// UpdateStatus was generated because the type contains a Status member. -// Add a +genclient:noStatus comment above the type to avoid generating UpdateStatus(). -func (c *testJobs) UpdateStatus(ctx context.Context, testJob *v1.TestJob, opts metav1.UpdateOptions) (result *v1.TestJob, err error) { - result = &v1.TestJob{} - err = c.client.Put(). - Namespace(c.ns). - Resource("testjobs"). - Name(testJob.Name). - SubResource("status"). - VersionedParams(&opts, scheme.ParameterCodec). - Body(testJob). - Do(ctx). - Into(result) - return -} - -// Delete takes name of the testJob and deletes it. Returns an error if one occurs. -func (c *testJobs) Delete(ctx context.Context, name string, opts metav1.DeleteOptions) error { - return c.client.Delete(). - Namespace(c.ns). - Resource("testjobs"). - Name(name). - Body(&opts). - Do(ctx). - Error() -} - -// DeleteCollection deletes a collection of objects. -func (c *testJobs) DeleteCollection(ctx context.Context, opts metav1.DeleteOptions, listOpts metav1.ListOptions) error { - var timeout time.Duration - if listOpts.TimeoutSeconds != nil { - timeout = time.Duration(*listOpts.TimeoutSeconds) * time.Second - } - return c.client.Delete(). - Namespace(c.ns). - Resource("testjobs"). - VersionedParams(&listOpts, scheme.ParameterCodec). - Timeout(timeout). - Body(&opts). - Do(ctx). - Error() -} - -// Patch applies the patch and returns the patched testJob. -func (c *testJobs) Patch(ctx context.Context, name string, pt types.PatchType, data []byte, opts metav1.PatchOptions, subresources ...string) (result *v1.TestJob, err error) { - result = &v1.TestJob{} - err = c.client.Patch(pt). - Namespace(c.ns). - Resource("testjobs"). - Name(name). - SubResource(subresources...). - VersionedParams(&opts, scheme.ParameterCodec). - Body(data). - Do(ctx). - Into(result) - return -} diff --git a/test_job/client/informers/externalversions/factory.go b/test_job/client/informers/externalversions/factory.go deleted file mode 100644 index 3c0fa6ae0a..0000000000 --- a/test_job/client/informers/externalversions/factory.go +++ /dev/null @@ -1,178 +0,0 @@ -// Copyright 2023 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by informer-gen. DO NOT EDIT. - -package externalversions - -import ( - reflect "reflect" - sync "sync" - time "time" - - versioned "github.com/kubeflow/training-operator/test_job/client/clientset/versioned" - internalinterfaces "github.com/kubeflow/training-operator/test_job/client/informers/externalversions/internalinterfaces" - testjob "github.com/kubeflow/training-operator/test_job/client/informers/externalversions/test_job" - v1 "k8s.io/apimachinery/pkg/apis/meta/v1" - runtime "k8s.io/apimachinery/pkg/runtime" - schema "k8s.io/apimachinery/pkg/runtime/schema" - cache "k8s.io/client-go/tools/cache" -) - -// SharedInformerOption defines the functional option type for SharedInformerFactory. -type SharedInformerOption func(*sharedInformerFactory) *sharedInformerFactory - -type sharedInformerFactory struct { - client versioned.Interface - namespace string - tweakListOptions internalinterfaces.TweakListOptionsFunc - lock sync.Mutex - defaultResync time.Duration - customResync map[reflect.Type]time.Duration - - informers map[reflect.Type]cache.SharedIndexInformer - // startedInformers is used for tracking which informers have been started. - // This allows Start() to be called multiple times safely. - startedInformers map[reflect.Type]bool -} - -// WithCustomResyncConfig sets a custom resync period for the specified informer types. -func WithCustomResyncConfig(resyncConfig map[v1.Object]time.Duration) SharedInformerOption { - return func(factory *sharedInformerFactory) *sharedInformerFactory { - for k, v := range resyncConfig { - factory.customResync[reflect.TypeOf(k)] = v - } - return factory - } -} - -// WithTweakListOptions sets a custom filter on all listers of the configured SharedInformerFactory. -func WithTweakListOptions(tweakListOptions internalinterfaces.TweakListOptionsFunc) SharedInformerOption { - return func(factory *sharedInformerFactory) *sharedInformerFactory { - factory.tweakListOptions = tweakListOptions - return factory - } -} - -// WithNamespace limits the SharedInformerFactory to the specified namespace. -func WithNamespace(namespace string) SharedInformerOption { - return func(factory *sharedInformerFactory) *sharedInformerFactory { - factory.namespace = namespace - return factory - } -} - -// NewSharedInformerFactory constructs a new instance of sharedInformerFactory for all namespaces. -func NewSharedInformerFactory(client versioned.Interface, defaultResync time.Duration) SharedInformerFactory { - return NewSharedInformerFactoryWithOptions(client, defaultResync) -} - -// NewFilteredSharedInformerFactory constructs a new instance of sharedInformerFactory. -// Listers obtained via this SharedInformerFactory will be subject to the same filters -// as specified here. -// Deprecated: Please use NewSharedInformerFactoryWithOptions instead -func NewFilteredSharedInformerFactory(client versioned.Interface, defaultResync time.Duration, namespace string, tweakListOptions internalinterfaces.TweakListOptionsFunc) SharedInformerFactory { - return NewSharedInformerFactoryWithOptions(client, defaultResync, WithNamespace(namespace), WithTweakListOptions(tweakListOptions)) -} - -// NewSharedInformerFactoryWithOptions constructs a new instance of a SharedInformerFactory with additional options. -func NewSharedInformerFactoryWithOptions(client versioned.Interface, defaultResync time.Duration, options ...SharedInformerOption) SharedInformerFactory { - factory := &sharedInformerFactory{ - client: client, - namespace: v1.NamespaceAll, - defaultResync: defaultResync, - informers: make(map[reflect.Type]cache.SharedIndexInformer), - startedInformers: make(map[reflect.Type]bool), - customResync: make(map[reflect.Type]time.Duration), - } - - // Apply all options - for _, opt := range options { - factory = opt(factory) - } - - return factory -} - -// Start initializes all requested informers. -func (f *sharedInformerFactory) Start(stopCh <-chan struct{}) { - f.lock.Lock() - defer f.lock.Unlock() - - for informerType, informer := range f.informers { - if !f.startedInformers[informerType] { - go informer.Run(stopCh) - f.startedInformers[informerType] = true - } - } -} - -// WaitForCacheSync waits for all started informers' cache were synced. -func (f *sharedInformerFactory) WaitForCacheSync(stopCh <-chan struct{}) map[reflect.Type]bool { - informers := func() map[reflect.Type]cache.SharedIndexInformer { - f.lock.Lock() - defer f.lock.Unlock() - - informers := map[reflect.Type]cache.SharedIndexInformer{} - for informerType, informer := range f.informers { - if f.startedInformers[informerType] { - informers[informerType] = informer - } - } - return informers - }() - - res := map[reflect.Type]bool{} - for informType, informer := range informers { - res[informType] = cache.WaitForCacheSync(stopCh, informer.HasSynced) - } - return res -} - -// InternalInformerFor returns the SharedIndexInformer for obj using an internal -// client. -func (f *sharedInformerFactory) InformerFor(obj runtime.Object, newFunc internalinterfaces.NewInformerFunc) cache.SharedIndexInformer { - f.lock.Lock() - defer f.lock.Unlock() - - informerType := reflect.TypeOf(obj) - informer, exists := f.informers[informerType] - if exists { - return informer - } - - resyncPeriod, exists := f.customResync[informerType] - if !exists { - resyncPeriod = f.defaultResync - } - - informer = newFunc(f.client, resyncPeriod) - f.informers[informerType] = informer - - return informer -} - -// SharedInformerFactory provides shared informers for resources in all known -// API group versions. -type SharedInformerFactory interface { - internalinterfaces.SharedInformerFactory - ForResource(resource schema.GroupVersionResource) (GenericInformer, error) - WaitForCacheSync(stopCh <-chan struct{}) map[reflect.Type]bool - - Kubeflow() testjob.Interface -} - -func (f *sharedInformerFactory) Kubeflow() testjob.Interface { - return testjob.New(f, f.namespace, f.tweakListOptions) -} diff --git a/test_job/client/informers/externalversions/generic.go b/test_job/client/informers/externalversions/generic.go deleted file mode 100644 index de0589d2c8..0000000000 --- a/test_job/client/informers/externalversions/generic.go +++ /dev/null @@ -1,60 +0,0 @@ -// Copyright 2023 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by informer-gen. DO NOT EDIT. - -package externalversions - -import ( - "fmt" - - v1 "github.com/kubeflow/training-operator/test_job/apis/test_job/v1" - schema "k8s.io/apimachinery/pkg/runtime/schema" - cache "k8s.io/client-go/tools/cache" -) - -// GenericInformer is type of SharedIndexInformer which will locate and delegate to other -// sharedInformers based on type -type GenericInformer interface { - Informer() cache.SharedIndexInformer - Lister() cache.GenericLister -} - -type genericInformer struct { - informer cache.SharedIndexInformer - resource schema.GroupResource -} - -// Informer returns the SharedIndexInformer. -func (f *genericInformer) Informer() cache.SharedIndexInformer { - return f.informer -} - -// Lister returns the GenericLister. -func (f *genericInformer) Lister() cache.GenericLister { - return cache.NewGenericLister(f.Informer().GetIndexer(), f.resource) -} - -// ForResource gives generic access to a shared informer of the matching type -// TODO extend this to unknown resources with a client pool -func (f *sharedInformerFactory) ForResource(resource schema.GroupVersionResource) (GenericInformer, error) { - switch resource { - // Group=kubeflow.org, Version=v1 - case v1.SchemeGroupVersion.WithResource("testjobs"): - return &genericInformer{resource: resource.GroupResource(), informer: f.Kubeflow().V1().TestJobs().Informer()}, nil - - } - - return nil, fmt.Errorf("no informer found for %v", resource) -} diff --git a/test_job/client/informers/externalversions/internalinterfaces/factory_interfaces.go b/test_job/client/informers/externalversions/internalinterfaces/factory_interfaces.go deleted file mode 100644 index 4039e7214d..0000000000 --- a/test_job/client/informers/externalversions/internalinterfaces/factory_interfaces.go +++ /dev/null @@ -1,38 +0,0 @@ -// Copyright 2023 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by informer-gen. DO NOT EDIT. - -package internalinterfaces - -import ( - time "time" - - versioned "github.com/kubeflow/training-operator/test_job/client/clientset/versioned" - v1 "k8s.io/apimachinery/pkg/apis/meta/v1" - runtime "k8s.io/apimachinery/pkg/runtime" - cache "k8s.io/client-go/tools/cache" -) - -// NewInformerFunc takes versioned.Interface and time.Duration to return a SharedIndexInformer. -type NewInformerFunc func(versioned.Interface, time.Duration) cache.SharedIndexInformer - -// SharedInformerFactory a small interface to allow for adding an informer without an import cycle -type SharedInformerFactory interface { - Start(stopCh <-chan struct{}) - InformerFor(obj runtime.Object, newFunc NewInformerFunc) cache.SharedIndexInformer -} - -// TweakListOptionsFunc is a function that transforms a v1.ListOptions. -type TweakListOptionsFunc func(*v1.ListOptions) diff --git a/test_job/client/informers/externalversions/test_job/interface.go b/test_job/client/informers/externalversions/test_job/interface.go deleted file mode 100644 index 45a4cb350c..0000000000 --- a/test_job/client/informers/externalversions/test_job/interface.go +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright 2023 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by informer-gen. DO NOT EDIT. - -package test_job - -import ( - internalinterfaces "github.com/kubeflow/training-operator/test_job/client/informers/externalversions/internalinterfaces" - v1 "github.com/kubeflow/training-operator/test_job/client/informers/externalversions/test_job/v1" -) - -// Interface provides access to each of this group's versions. -type Interface interface { - // V1 provides access to shared informers for resources in V1. - V1() v1.Interface -} - -type group struct { - factory internalinterfaces.SharedInformerFactory - namespace string - tweakListOptions internalinterfaces.TweakListOptionsFunc -} - -// New returns a new Interface. -func New(f internalinterfaces.SharedInformerFactory, namespace string, tweakListOptions internalinterfaces.TweakListOptionsFunc) Interface { - return &group{factory: f, namespace: namespace, tweakListOptions: tweakListOptions} -} - -// V1 returns a new v1.Interface. -func (g *group) V1() v1.Interface { - return v1.New(g.factory, g.namespace, g.tweakListOptions) -} diff --git a/test_job/client/informers/externalversions/test_job/v1/interface.go b/test_job/client/informers/externalversions/test_job/v1/interface.go deleted file mode 100644 index be06bcfeaf..0000000000 --- a/test_job/client/informers/externalversions/test_job/v1/interface.go +++ /dev/null @@ -1,43 +0,0 @@ -// Copyright 2023 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by informer-gen. DO NOT EDIT. - -package v1 - -import ( - internalinterfaces "github.com/kubeflow/training-operator/test_job/client/informers/externalversions/internalinterfaces" -) - -// Interface provides access to all the informers in this group version. -type Interface interface { - // TestJobs returns a TestJobInformer. - TestJobs() TestJobInformer -} - -type version struct { - factory internalinterfaces.SharedInformerFactory - namespace string - tweakListOptions internalinterfaces.TweakListOptionsFunc -} - -// New returns a new Interface. -func New(f internalinterfaces.SharedInformerFactory, namespace string, tweakListOptions internalinterfaces.TweakListOptionsFunc) Interface { - return &version{factory: f, namespace: namespace, tweakListOptions: tweakListOptions} -} - -// TestJobs returns a TestJobInformer. -func (v *version) TestJobs() TestJobInformer { - return &testJobInformer{factory: v.factory, namespace: v.namespace, tweakListOptions: v.tweakListOptions} -} diff --git a/test_job/client/informers/externalversions/test_job/v1/testjob.go b/test_job/client/informers/externalversions/test_job/v1/testjob.go deleted file mode 100644 index a39ef4baa1..0000000000 --- a/test_job/client/informers/externalversions/test_job/v1/testjob.go +++ /dev/null @@ -1,88 +0,0 @@ -// Copyright 2023 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by informer-gen. DO NOT EDIT. - -package v1 - -import ( - "context" - time "time" - - testjobv1 "github.com/kubeflow/training-operator/test_job/apis/test_job/v1" - versioned "github.com/kubeflow/training-operator/test_job/client/clientset/versioned" - internalinterfaces "github.com/kubeflow/training-operator/test_job/client/informers/externalversions/internalinterfaces" - v1 "github.com/kubeflow/training-operator/test_job/client/listers/test_job/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - runtime "k8s.io/apimachinery/pkg/runtime" - watch "k8s.io/apimachinery/pkg/watch" - cache "k8s.io/client-go/tools/cache" -) - -// TestJobInformer provides access to a shared informer and lister for -// TestJobs. -type TestJobInformer interface { - Informer() cache.SharedIndexInformer - Lister() v1.TestJobLister -} - -type testJobInformer struct { - factory internalinterfaces.SharedInformerFactory - tweakListOptions internalinterfaces.TweakListOptionsFunc - namespace string -} - -// NewTestJobInformer constructs a new informer for TestJob type. -// Always prefer using an informer factory to get a shared informer instead of getting an independent -// one. This reduces memory footprint and number of connections to the server. -func NewTestJobInformer(client versioned.Interface, namespace string, resyncPeriod time.Duration, indexers cache.Indexers) cache.SharedIndexInformer { - return NewFilteredTestJobInformer(client, namespace, resyncPeriod, indexers, nil) -} - -// NewFilteredTestJobInformer constructs a new informer for TestJob type. -// Always prefer using an informer factory to get a shared informer instead of getting an independent -// one. This reduces memory footprint and number of connections to the server. -func NewFilteredTestJobInformer(client versioned.Interface, namespace string, resyncPeriod time.Duration, indexers cache.Indexers, tweakListOptions internalinterfaces.TweakListOptionsFunc) cache.SharedIndexInformer { - return cache.NewSharedIndexInformer( - &cache.ListWatch{ - ListFunc: func(options metav1.ListOptions) (runtime.Object, error) { - if tweakListOptions != nil { - tweakListOptions(&options) - } - return client.KubeflowV1().TestJobs(namespace).List(context.TODO(), options) - }, - WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) { - if tweakListOptions != nil { - tweakListOptions(&options) - } - return client.KubeflowV1().TestJobs(namespace).Watch(context.TODO(), options) - }, - }, - &testjobv1.TestJob{}, - resyncPeriod, - indexers, - ) -} - -func (f *testJobInformer) defaultInformer(client versioned.Interface, resyncPeriod time.Duration) cache.SharedIndexInformer { - return NewFilteredTestJobInformer(client, f.namespace, resyncPeriod, cache.Indexers{cache.NamespaceIndex: cache.MetaNamespaceIndexFunc}, f.tweakListOptions) -} - -func (f *testJobInformer) Informer() cache.SharedIndexInformer { - return f.factory.InformerFor(&testjobv1.TestJob{}, f.defaultInformer) -} - -func (f *testJobInformer) Lister() v1.TestJobLister { - return v1.NewTestJobLister(f.Informer().GetIndexer()) -} diff --git a/test_job/client/listers/test_job/v1/expansion_generated.go b/test_job/client/listers/test_job/v1/expansion_generated.go deleted file mode 100644 index 40672a34c6..0000000000 --- a/test_job/client/listers/test_job/v1/expansion_generated.go +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright 2023 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by lister-gen. DO NOT EDIT. - -package v1 - -// TestJobListerExpansion allows custom methods to be added to -// TestJobLister. -type TestJobListerExpansion interface{} - -// TestJobNamespaceListerExpansion allows custom methods to be added to -// TestJobNamespaceLister. -type TestJobNamespaceListerExpansion interface{} diff --git a/test_job/client/listers/test_job/v1/testjob.go b/test_job/client/listers/test_job/v1/testjob.go deleted file mode 100644 index d0018c3c4a..0000000000 --- a/test_job/client/listers/test_job/v1/testjob.go +++ /dev/null @@ -1,97 +0,0 @@ -// Copyright 2023 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by lister-gen. DO NOT EDIT. - -package v1 - -import ( - v1 "github.com/kubeflow/training-operator/test_job/apis/test_job/v1" - "k8s.io/apimachinery/pkg/api/errors" - "k8s.io/apimachinery/pkg/labels" - "k8s.io/client-go/tools/cache" -) - -// TestJobLister helps list TestJobs. -// All objects returned here must be treated as read-only. -type TestJobLister interface { - // List lists all TestJobs in the indexer. - // Objects returned here must be treated as read-only. - List(selector labels.Selector) (ret []*v1.TestJob, err error) - // TestJobs returns an object that can list and get TestJobs. - TestJobs(namespace string) TestJobNamespaceLister - TestJobListerExpansion -} - -// testJobLister implements the TestJobLister interface. -type testJobLister struct { - indexer cache.Indexer -} - -// NewTestJobLister returns a new TestJobLister. -func NewTestJobLister(indexer cache.Indexer) TestJobLister { - return &testJobLister{indexer: indexer} -} - -// List lists all TestJobs in the indexer. -func (s *testJobLister) List(selector labels.Selector) (ret []*v1.TestJob, err error) { - err = cache.ListAll(s.indexer, selector, func(m interface{}) { - ret = append(ret, m.(*v1.TestJob)) - }) - return ret, err -} - -// TestJobs returns an object that can list and get TestJobs. -func (s *testJobLister) TestJobs(namespace string) TestJobNamespaceLister { - return testJobNamespaceLister{indexer: s.indexer, namespace: namespace} -} - -// TestJobNamespaceLister helps list and get TestJobs. -// All objects returned here must be treated as read-only. -type TestJobNamespaceLister interface { - // List lists all TestJobs in the indexer for a given namespace. - // Objects returned here must be treated as read-only. - List(selector labels.Selector) (ret []*v1.TestJob, err error) - // Get retrieves the TestJob from the indexer for a given namespace and name. - // Objects returned here must be treated as read-only. - Get(name string) (*v1.TestJob, error) - TestJobNamespaceListerExpansion -} - -// testJobNamespaceLister implements the TestJobNamespaceLister -// interface. -type testJobNamespaceLister struct { - indexer cache.Indexer - namespace string -} - -// List lists all TestJobs in the indexer for a given namespace. -func (s testJobNamespaceLister) List(selector labels.Selector) (ret []*v1.TestJob, err error) { - err = cache.ListAllByNamespace(s.indexer, s.namespace, selector, func(m interface{}) { - ret = append(ret, m.(*v1.TestJob)) - }) - return ret, err -} - -// Get retrieves the TestJob from the indexer for a given namespace and name. -func (s testJobNamespaceLister) Get(name string) (*v1.TestJob, error) { - obj, exists, err := s.indexer.GetByKey(s.namespace + "/" + name) - if err != nil { - return nil, err - } - if !exists { - return nil, errors.NewNotFound(v1.Resource("testjob"), name) - } - return obj.(*v1.TestJob), nil -} diff --git a/test_job/test_util/v1/const.go b/test_job/test_util/v1/const.go deleted file mode 100644 index 2a1a36b928..0000000000 --- a/test_job/test_util/v1/const.go +++ /dev/null @@ -1,32 +0,0 @@ -// Copyright 2018 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package v1 - -import ( - "time" -) - -const ( - TestImageName = "test-image-for-kubeflow-common:latest" - TestJobName = "test-job" - LabelWorker = "worker" - - SleepInterval = 500 * time.Millisecond - ThreadCount = 1 -) - -var ( - AlwaysReady = func() bool { return true } -) diff --git a/test_job/test_util/v1/pod.go b/test_job/test_util/v1/pod.go deleted file mode 100644 index 0ed88fd210..0000000000 --- a/test_job/test_util/v1/pod.go +++ /dev/null @@ -1,95 +0,0 @@ -// Copyright 2018 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package v1 - -import ( - "fmt" - "testing" - - v1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/client-go/tools/cache" - - testjobv1 "github.com/kubeflow/training-operator/test_job/apis/test_job/v1" -) - -const ( - // labels for pods and servers. - testReplicaTypeLabel = "test-replica-type" - testReplicaIndexLabel = "test-replica-index" -) - -var ( - controllerKind = testjobv1.SchemeGroupVersionKind -) - -func NewBasePod(name string, testJob *testjobv1.TestJob, t *testing.T) *v1.Pod { - return &v1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: name, - Labels: GenLabels(testJob.Name), - Namespace: testJob.Namespace, - OwnerReferences: []metav1.OwnerReference{*metav1.NewControllerRef(testJob, controllerKind)}, - }, - } -} - -func NewPod(testJob *testjobv1.TestJob, typ string, index int, t *testing.T) *v1.Pod { - pod := NewBasePod(fmt.Sprintf("%s-%d", typ, index), testJob, t) - pod.Labels[testReplicaTypeLabel] = typ - pod.Labels[testReplicaIndexLabel] = fmt.Sprintf("%d", index) - return pod -} - -// create count pods with the given phase for the given testjob -func NewPodList(count int32, status v1.PodPhase, testJob *testjobv1.TestJob, typ string, start int32, t *testing.T) []*v1.Pod { - pods := []*v1.Pod{} - for i := int32(0); i < count; i++ { - newPod := NewPod(testJob, typ, int(start+i), t) - newPod.Status = v1.PodStatus{Phase: status} - pods = append(pods, newPod) - } - return pods -} - -func SetPodsStatuses(podIndexer cache.Indexer, testJob *testjobv1.TestJob, typ string, pendingPods, activePods, succeededPods, failedPods int32, restartCounts []int32, t *testing.T) { - var index int32 - for _, pod := range NewPodList(pendingPods, v1.PodPending, testJob, typ, index, t) { - if err := podIndexer.Add(pod); err != nil { - t.Errorf("%s: unexpected error when adding pod %v", testJob.Name, err) - } - } - index += pendingPods - for i, pod := range NewPodList(activePods, v1.PodRunning, testJob, typ, index, t) { - if restartCounts != nil { - pod.Status.ContainerStatuses = []v1.ContainerStatus{{RestartCount: restartCounts[i]}} - } - if err := podIndexer.Add(pod); err != nil { - t.Errorf("%s: unexpected error when adding pod %v", testJob.Name, err) - } - } - index += activePods - for _, pod := range NewPodList(succeededPods, v1.PodSucceeded, testJob, typ, index, t) { - if err := podIndexer.Add(pod); err != nil { - t.Errorf("%s: unexpected error when adding pod %v", testJob.Name, err) - } - } - index += succeededPods - for _, pod := range NewPodList(failedPods, v1.PodFailed, testJob, typ, index, t) { - if err := podIndexer.Add(pod); err != nil { - t.Errorf("%s: unexpected error when adding pod %v", testJob.Name, err) - } - } -} diff --git a/test_job/test_util/v1/service.go b/test_job/test_util/v1/service.go deleted file mode 100644 index cc889a4e60..0000000000 --- a/test_job/test_util/v1/service.go +++ /dev/null @@ -1,62 +0,0 @@ -// Copyright 2018 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package v1 - -import ( - "fmt" - "testing" - - v1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/client-go/tools/cache" - - testjobv1 "github.com/kubeflow/training-operator/test_job/apis/test_job/v1" -) - -func NewBaseService(name string, testJob *testjobv1.TestJob, t *testing.T) *v1.Service { - return &v1.Service{ - ObjectMeta: metav1.ObjectMeta{ - Name: name, - Labels: GenLabels(testJob.Name), - Namespace: testJob.Namespace, - OwnerReferences: []metav1.OwnerReference{*metav1.NewControllerRef(testJob, controllerKind)}, - }, - } -} - -func NewService(testJob *testjobv1.TestJob, typ string, index int, t *testing.T) *v1.Service { - service := NewBaseService(fmt.Sprintf("%s-%d", typ, index), testJob, t) - service.Labels[testReplicaTypeLabel] = typ - service.Labels[testReplicaIndexLabel] = fmt.Sprintf("%d", index) - return service -} - -// NewServiceList creates count pods with the given phase for the given Job -func NewServiceList(count int32, testJob *testjobv1.TestJob, typ string, t *testing.T) []*v1.Service { - services := []*v1.Service{} - for i := int32(0); i < count; i++ { - newService := NewService(testJob, typ, int(i), t) - services = append(services, newService) - } - return services -} - -func SetServices(serviceIndexer cache.Indexer, testJob *testjobv1.TestJob, typ string, activeWorkerServices int32, t *testing.T) { - for _, service := range NewServiceList(activeWorkerServices, testJob, typ, t) { - if err := serviceIndexer.Add(service); err != nil { - t.Errorf("unexpected error when adding service %v", err) - } - } -} diff --git a/test_job/test_util/v1/test_job_util.go b/test_job/test_util/v1/test_job_util.go deleted file mode 100644 index ffa90d54ee..0000000000 --- a/test_job/test_util/v1/test_job_util.go +++ /dev/null @@ -1,76 +0,0 @@ -// Copyright 2018 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package v1 - -import ( - "time" - - apiv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - v1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - - testjobv1 "github.com/kubeflow/training-operator/test_job/apis/test_job/v1" -) - -func NewTestJob(worker int) *testjobv1.TestJob { - testJob := &testjobv1.TestJob{ - TypeMeta: metav1.TypeMeta{ - Kind: testjobv1.Kind, - }, - ObjectMeta: metav1.ObjectMeta{ - Name: TestJobName, - Namespace: metav1.NamespaceDefault, - }, - Spec: testjobv1.TestJobSpec{ - TestReplicaSpecs: make(map[testjobv1.TestReplicaType]*apiv1.ReplicaSpec), - }, - } - - if worker > 0 { - worker := int32(worker) - workerReplicaSpec := &apiv1.ReplicaSpec{ - Replicas: &worker, - Template: NewTestReplicaSpecTemplate(), - } - testJob.Spec.TestReplicaSpecs[testjobv1.TestReplicaTypeWorker] = workerReplicaSpec - } - - return testJob -} - -func NewTestReplicaSpecTemplate() v1.PodTemplateSpec { - return v1.PodTemplateSpec{ - Spec: v1.PodSpec{ - Containers: []v1.Container{ - v1.Container{ - Name: testjobv1.DefaultContainerName, - Image: TestImageName, - Args: []string{"Fake", "Fake"}, - Ports: []v1.ContainerPort{ - v1.ContainerPort{ - Name: testjobv1.DefaultPortName, - ContainerPort: testjobv1.DefaultPort, - }, - }, - }, - }, - }, - } -} - -func SetTestJobCompletionTime(testJob *testjobv1.TestJob) { - now := metav1.Time{Time: time.Now()} - testJob.Status.CompletionTime = &now -} diff --git a/test_job/test_util/v1/util.go b/test_job/test_util/v1/util.go deleted file mode 100644 index 775219427a..0000000000 --- a/test_job/test_util/v1/util.go +++ /dev/null @@ -1,79 +0,0 @@ -// Copyright 2018 The Kubeflow Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package v1 - -import ( - "strings" - "testing" - - apiv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" - v1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/client-go/tools/cache" - - testjobv1 "github.com/kubeflow/training-operator/test_job/apis/test_job/v1" -) - -const ( - LabelGroupName = "group-name" - LabelTestJobName = "test-job-name" -) - -var ( - // KeyFunc is the short name to DeletionHandlingMetaNamespaceKeyFunc. - // IndexerInformer uses a delta queue, therefore for deletes we have to use this - // key function but it should be just fine for non delete events. - KeyFunc = cache.DeletionHandlingMetaNamespaceKeyFunc - TestGroupName = testjobv1.GroupName -) - -func GenLabels(jobName string) map[string]string { - return map[string]string{ - LabelGroupName: TestGroupName, - LabelTestJobName: strings.Replace(jobName, "/", "-", -1), - } -} - -func GenOwnerReference(testjob *testjobv1.TestJob) *metav1.OwnerReference { - boolPtr := func(b bool) *bool { return &b } - controllerRef := &metav1.OwnerReference{ - APIVersion: testjobv1.SchemeGroupVersion.String(), - Kind: testjobv1.Kind, - Name: testjob.Name, - UID: testjob.UID, - BlockOwnerDeletion: boolPtr(true), - Controller: boolPtr(true), - } - - return controllerRef -} - -func GetKey(testJob *testjobv1.TestJob, t *testing.T) string { - key, err := KeyFunc(testJob) - if err != nil { - t.Errorf("Unexpected error getting key for job %v: %v", testJob.Name, err) - return "" - } - return key -} - -func CheckCondition(testJob *testjobv1.TestJob, condition apiv1.JobConditionType, reason string) bool { - for _, v := range testJob.Status.Conditions { - if v.Type == condition && v.Status == v1.ConditionTrue && v.Reason == reason { - return true - } - } - return false -} diff --git a/third_party/library/license.txt b/third_party/library/license.txt deleted file mode 100644 index 865d93aab1..0000000000 --- a/third_party/library/license.txt +++ /dev/null @@ -1,7039 +0,0 @@ --------------------------------------------------------------------------------- -kubeflow/training-operator Apache License 2.0 https://github.com/kubeflow/training-operator/blob/master/LICENSE --------------------------------------------------------------------------------- - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "{}" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright {yyyy} {name of copyright owner} - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - --------------------------------------------------------------------------------- -GoogleCloudPlatform/gcloud-golang Apache License 2.0 https://github.com/googleapis/google-cloud-go/blob/master/LICENSE --------------------------------------------------------------------------------- - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - --------------------------------------------------------------------------------- -PuerkitoBio/purell BSD 3-Clause "New" or "Revised" License https://github.com/PuerkitoBio/purell/blob/master/LICENSE --------------------------------------------------------------------------------- -Copyright (c) 2012, Martin Angers -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - -* Neither the name of the author nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - --------------------------------------------------------------------------------- -PuerkitoBio/urlesc BSD 3-Clause "New" or "Revised" License https://github.com/PuerkitoBio/urlesc/blob/master/LICENSE --------------------------------------------------------------------------------- -Copyright (c) 2012 The Go Authors. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above -copyright notice, this list of conditions and the following disclaimer -in the documentation and/or other materials provided with the -distribution. - * Neither the name of Google Inc. nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - --------------------------------------------------------------------------------- -davecgh/go-spew ISC License https://github.com/davecgh/go-spew/blob/master/LICENSE --------------------------------------------------------------------------------- -ISC License - -Copyright (c) 2012-2016 Dave Collins - -Permission to use, copy, modify, and/or distribute this software for any -purpose with or without fee is hereby granted, provided that the above -copyright notice and this permission notice appear in all copies. - -THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES -WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR -ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN -ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF -OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - --------------------------------------------------------------------------------- -docker/distribution Apache License 2.0 https://github.com/docker/distribution/blob/master/LICENSE --------------------------------------------------------------------------------- -Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "{}" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright {yyyy} {name of copyright owner} - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - - --------------------------------------------------------------------------------- -emicklei/go-restful MIT License https://github.com/emicklei/go-restful/blob/master/LICENSE --------------------------------------------------------------------------------- -Copyright (c) 2012,2013 Ernest Micklei - -MIT License - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice shall be -included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. --------------------------------------------------------------------------------- -ghodss/yaml MIT License https://github.com/ghodss/yaml/blob/master/LICENSE --------------------------------------------------------------------------------- -The MIT License (MIT) - -Copyright (c) 2014 Sam Ghods - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. - - -Copyright (c) 2012 The Go Authors. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above -copyright notice, this list of conditions and the following disclaimer -in the documentation and/or other materials provided with the -distribution. - * Neither the name of Google Inc. nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - --------------------------------------------------------------------------------- -go-openapi/jsonpointer Apache License 2.0 https://github.com/go-openapi/jsonpointer/blob/master/LICENSE --------------------------------------------------------------------------------- - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - --------------------------------------------------------------------------------- -go-openapi/jsonreference Apache License 2.0 https://github.com/go-openapi/jsonreference/blob/master/LICENSE --------------------------------------------------------------------------------- - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - --------------------------------------------------------------------------------- -go-openapi/spec Apache License 2.0 https://github.com/go-openapi/spec/blob/master/LICENSE --------------------------------------------------------------------------------- - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - --------------------------------------------------------------------------------- -go-openapi/swag Apache License 2.0 https://github.com/go-openapi/swag/blob/master/LICENSE --------------------------------------------------------------------------------- - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - --------------------------------------------------------------------------------- -gogo/protobuf BSD 3-Clause "New" or "Revised" License https://github.com/gogo/protobuf/blob/master/LICENSE --------------------------------------------------------------------------------- -Copyright (c) 2013, The GoGo Authors. All rights reserved. - -Protocol Buffers for Go with Gadgets - -Go support for Protocol Buffers - Google's data interchange format - -Copyright 2010 The Go Authors. All rights reserved. -https://github.com/golang/protobuf - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above -copyright notice, this list of conditions and the following disclaimer -in the documentation and/or other materials provided with the -distribution. - * Neither the name of Google Inc. nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - --------------------------------------------------------------------------------- -golang/glog Apache License 2.0 https://github.com/golang/glog/blob/master/LICENSE --------------------------------------------------------------------------------- -Apache License -Version 2.0, January 2004 -http://www.apache.org/licenses/ - -TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - -1. Definitions. - -"License" shall mean the terms and conditions for use, reproduction, and -distribution as defined by Sections 1 through 9 of this document. - -"Licensor" shall mean the copyright owner or entity authorized by the copyright -owner that is granting the License. - -"Legal Entity" shall mean the union of the acting entity and all other entities -that control, are controlled by, or are under common control with that entity. -For the purposes of this definition, "control" means (i) the power, direct or -indirect, to cause the direction or management of such entity, whether by -contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the -outstanding shares, or (iii) beneficial ownership of such entity. - -"You" (or "Your") shall mean an individual or Legal Entity exercising -permissions granted by this License. - -"Source" form shall mean the preferred form for making modifications, including -but not limited to software source code, documentation source, and configuration -files. - -"Object" form shall mean any form resulting from mechanical transformation or -translation of a Source form, including but not limited to compiled object code, -generated documentation, and conversions to other media types. - -"Work" shall mean the work of authorship, whether in Source or Object form, made -available under the License, as indicated by a copyright notice that is included -in or attached to the work (an example is provided in the Appendix below). - -"Derivative Works" shall mean any work, whether in Source or Object form, that -is based on (or derived from) the Work and for which the editorial revisions, -annotations, elaborations, or other modifications represent, as a whole, an -original work of authorship. For the purposes of this License, Derivative Works -shall not include works that remain separable from, or merely link (or bind by -name) to the interfaces of, the Work and Derivative Works thereof. - -"Contribution" shall mean any work of authorship, including the original version -of the Work and any modifications or additions to that Work or Derivative Works -thereof, that is intentionally submitted to Licensor for inclusion in the Work -by the copyright owner or by an individual or Legal Entity authorized to submit -on behalf of the copyright owner. For the purposes of this definition, -"submitted" means any form of electronic, verbal, or written communication sent -to the Licensor or its representatives, including but not limited to -communication on electronic mailing lists, source code control systems, and -issue tracking systems that are managed by, or on behalf of, the Licensor for -the purpose of discussing and improving the Work, but excluding communication -that is conspicuously marked or otherwise designated in writing by the copyright -owner as "Not a Contribution." - -"Contributor" shall mean Licensor and any individual or Legal Entity on behalf -of whom a Contribution has been received by Licensor and subsequently -incorporated within the Work. - -2. Grant of Copyright License. - -Subject to the terms and conditions of this License, each Contributor hereby -grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, -irrevocable copyright license to reproduce, prepare Derivative Works of, -publicly display, publicly perform, sublicense, and distribute the Work and such -Derivative Works in Source or Object form. - -3. Grant of Patent License. - -Subject to the terms and conditions of this License, each Contributor hereby -grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, -irrevocable (except as stated in this section) patent license to make, have -made, use, offer to sell, sell, import, and otherwise transfer the Work, where -such license applies only to those patent claims licensable by such Contributor -that are necessarily infringed by their Contribution(s) alone or by combination -of their Contribution(s) with the Work to which such Contribution(s) was -submitted. If You institute patent litigation against any entity (including a -cross-claim or counterclaim in a lawsuit) alleging that the Work or a -Contribution incorporated within the Work constitutes direct or contributory -patent infringement, then any patent licenses granted to You under this License -for that Work shall terminate as of the date such litigation is filed. - -4. Redistribution. - -You may reproduce and distribute copies of the Work or Derivative Works thereof -in any medium, with or without modifications, and in Source or Object form, -provided that You meet the following conditions: - -You must give any other recipients of the Work or Derivative Works a copy of -this License; and -You must cause any modified files to carry prominent notices stating that You -changed the files; and -You must retain, in the Source form of any Derivative Works that You distribute, -all copyright, patent, trademark, and attribution notices from the Source form -of the Work, excluding those notices that do not pertain to any part of the -Derivative Works; and -If the Work includes a "NOTICE" text file as part of its distribution, then any -Derivative Works that You distribute must include a readable copy of the -attribution notices contained within such NOTICE file, excluding those notices -that do not pertain to any part of the Derivative Works, in at least one of the -following places: within a NOTICE text file distributed as part of the -Derivative Works; within the Source form or documentation, if provided along -with the Derivative Works; or, within a display generated by the Derivative -Works, if and wherever such third-party notices normally appear. The contents of -the NOTICE file are for informational purposes only and do not modify the -License. You may add Your own attribution notices within Derivative Works that -You distribute, alongside or as an addendum to the NOTICE text from the Work, -provided that such additional attribution notices cannot be construed as -modifying the License. -You may add Your own copyright statement to Your modifications and may provide -additional or different license terms and conditions for use, reproduction, or -distribution of Your modifications, or for any such Derivative Works as a whole, -provided Your use, reproduction, and distribution of the Work otherwise complies -with the conditions stated in this License. - -5. Submission of Contributions. - -Unless You explicitly state otherwise, any Contribution intentionally submitted -for inclusion in the Work by You to the Licensor shall be under the terms and -conditions of this License, without any additional terms or conditions. -Notwithstanding the above, nothing herein shall supersede or modify the terms of -any separate license agreement you may have executed with Licensor regarding -such Contributions. - -6. Trademarks. - -This License does not grant permission to use the trade names, trademarks, -service marks, or product names of the Licensor, except as required for -reasonable and customary use in describing the origin of the Work and -reproducing the content of the NOTICE file. - -7. Disclaimer of Warranty. - -Unless required by applicable law or agreed to in writing, Licensor provides the -Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, -including, without limitation, any warranties or conditions of TITLE, -NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are -solely responsible for determining the appropriateness of using or -redistributing the Work and assume any risks associated with Your exercise of -permissions under this License. - -8. Limitation of Liability. - -In no event and under no legal theory, whether in tort (including negligence), -contract, or otherwise, unless required by applicable law (such as deliberate -and grossly negligent acts) or agreed to in writing, shall any Contributor be -liable to You for damages, including any direct, indirect, special, incidental, -or consequential damages of any character arising as a result of this License or -out of the use or inability to use the Work (including but not limited to -damages for loss of goodwill, work stoppage, computer failure or malfunction, or -any and all other commercial damages or losses), even if such Contributor has -been advised of the possibility of such damages. - -9. Accepting Warranty or Additional Liability. - -While redistributing the Work or Derivative Works thereof, You may choose to -offer, and charge a fee for, acceptance of support, warranty, indemnity, or -other liability obligations and/or rights consistent with this License. However, -in accepting such obligations, You may act only on Your own behalf and on Your -sole responsibility, not on behalf of any other Contributor, and only if You -agree to indemnify, defend, and hold each Contributor harmless for any liability -incurred by, or claims asserted against, such Contributor by reason of your -accepting any such warranty or additional liability. - -END OF TERMS AND CONDITIONS - -APPENDIX: How to apply the Apache License to your work - -To apply the Apache License to your work, attach the following boilerplate -notice, with the fields enclosed by brackets "[]" replaced with your own -identifying information. (Don't include the brackets!) The text should be -enclosed in the appropriate comment syntax for the file format. We also -recommend that a file or class name and description of purpose be included on -the same "printed page" as the copyright notice for easier identification within -third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - --------------------------------------------------------------------------------- -golang/groupcache Apache License 2.0 https://github.com/golang/groupcache/blob/master/LICENSE --------------------------------------------------------------------------------- -Apache License -Version 2.0, January 2004 -http://www.apache.org/licenses/ - -TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - -1. Definitions. - -"License" shall mean the terms and conditions for use, reproduction, and -distribution as defined by Sections 1 through 9 of this document. - -"Licensor" shall mean the copyright owner or entity authorized by the copyright -owner that is granting the License. - -"Legal Entity" shall mean the union of the acting entity and all other entities -that control, are controlled by, or are under common control with that entity. -For the purposes of this definition, "control" means (i) the power, direct or -indirect, to cause the direction or management of such entity, whether by -contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the -outstanding shares, or (iii) beneficial ownership of such entity. - -"You" (or "Your") shall mean an individual or Legal Entity exercising -permissions granted by this License. - -"Source" form shall mean the preferred form for making modifications, including -but not limited to software source code, documentation source, and configuration -files. - -"Object" form shall mean any form resulting from mechanical transformation or -translation of a Source form, including but not limited to compiled object code, -generated documentation, and conversions to other media types. - -"Work" shall mean the work of authorship, whether in Source or Object form, made -available under the License, as indicated by a copyright notice that is included -in or attached to the work (an example is provided in the Appendix below). - -"Derivative Works" shall mean any work, whether in Source or Object form, that -is based on (or derived from) the Work and for which the editorial revisions, -annotations, elaborations, or other modifications represent, as a whole, an -original work of authorship. For the purposes of this License, Derivative Works -shall not include works that remain separable from, or merely link (or bind by -name) to the interfaces of, the Work and Derivative Works thereof. - -"Contribution" shall mean any work of authorship, including the original version -of the Work and any modifications or additions to that Work or Derivative Works -thereof, that is intentionally submitted to Licensor for inclusion in the Work -by the copyright owner or by an individual or Legal Entity authorized to submit -on behalf of the copyright owner. For the purposes of this definition, -"submitted" means any form of electronic, verbal, or written communication sent -to the Licensor or its representatives, including but not limited to -communication on electronic mailing lists, source code control systems, and -issue tracking systems that are managed by, or on behalf of, the Licensor for -the purpose of discussing and improving the Work, but excluding communication -that is conspicuously marked or otherwise designated in writing by the copyright -owner as "Not a Contribution." - -"Contributor" shall mean Licensor and any individual or Legal Entity on behalf -of whom a Contribution has been received by Licensor and subsequently -incorporated within the Work. - -2. Grant of Copyright License. - -Subject to the terms and conditions of this License, each Contributor hereby -grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, -irrevocable copyright license to reproduce, prepare Derivative Works of, -publicly display, publicly perform, sublicense, and distribute the Work and such -Derivative Works in Source or Object form. - -3. Grant of Patent License. - -Subject to the terms and conditions of this License, each Contributor hereby -grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, -irrevocable (except as stated in this section) patent license to make, have -made, use, offer to sell, sell, import, and otherwise transfer the Work, where -such license applies only to those patent claims licensable by such Contributor -that are necessarily infringed by their Contribution(s) alone or by combination -of their Contribution(s) with the Work to which such Contribution(s) was -submitted. If You institute patent litigation against any entity (including a -cross-claim or counterclaim in a lawsuit) alleging that the Work or a -Contribution incorporated within the Work constitutes direct or contributory -patent infringement, then any patent licenses granted to You under this License -for that Work shall terminate as of the date such litigation is filed. - -4. Redistribution. - -You may reproduce and distribute copies of the Work or Derivative Works thereof -in any medium, with or without modifications, and in Source or Object form, -provided that You meet the following conditions: - -You must give any other recipients of the Work or Derivative Works a copy of -this License; and -You must cause any modified files to carry prominent notices stating that You -changed the files; and -You must retain, in the Source form of any Derivative Works that You distribute, -all copyright, patent, trademark, and attribution notices from the Source form -of the Work, excluding those notices that do not pertain to any part of the -Derivative Works; and -If the Work includes a "NOTICE" text file as part of its distribution, then any -Derivative Works that You distribute must include a readable copy of the -attribution notices contained within such NOTICE file, excluding those notices -that do not pertain to any part of the Derivative Works, in at least one of the -following places: within a NOTICE text file distributed as part of the -Derivative Works; within the Source form or documentation, if provided along -with the Derivative Works; or, within a display generated by the Derivative -Works, if and wherever such third-party notices normally appear. The contents of -the NOTICE file are for informational purposes only and do not modify the -License. You may add Your own attribution notices within Derivative Works that -You distribute, alongside or as an addendum to the NOTICE text from the Work, -provided that such additional attribution notices cannot be construed as -modifying the License. -You may add Your own copyright statement to Your modifications and may provide -additional or different license terms and conditions for use, reproduction, or -distribution of Your modifications, or for any such Derivative Works as a whole, -provided Your use, reproduction, and distribution of the Work otherwise complies -with the conditions stated in this License. - -5. Submission of Contributions. - -Unless You explicitly state otherwise, any Contribution intentionally submitted -for inclusion in the Work by You to the Licensor shall be under the terms and -conditions of this License, without any additional terms or conditions. -Notwithstanding the above, nothing herein shall supersede or modify the terms of -any separate license agreement you may have executed with Licensor regarding -such Contributions. - -6. Trademarks. - -This License does not grant permission to use the trade names, trademarks, -service marks, or product names of the Licensor, except as required for -reasonable and customary use in describing the origin of the Work and -reproducing the content of the NOTICE file. - -7. Disclaimer of Warranty. - -Unless required by applicable law or agreed to in writing, Licensor provides the -Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, -including, without limitation, any warranties or conditions of TITLE, -NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are -solely responsible for determining the appropriateness of using or -redistributing the Work and assume any risks associated with Your exercise of -permissions under this License. - -8. Limitation of Liability. - -In no event and under no legal theory, whether in tort (including negligence), -contract, or otherwise, unless required by applicable law (such as deliberate -and grossly negligent acts) or agreed to in writing, shall any Contributor be -liable to You for damages, including any direct, indirect, special, incidental, -or consequential damages of any character arising as a result of this License or -out of the use or inability to use the Work (including but not limited to -damages for loss of goodwill, work stoppage, computer failure or malfunction, or -any and all other commercial damages or losses), even if such Contributor has -been advised of the possibility of such damages. - -9. Accepting Warranty or Additional Liability. - -While redistributing the Work or Derivative Works thereof, You may choose to -offer, and charge a fee for, acceptance of support, warranty, indemnity, or -other liability obligations and/or rights consistent with this License. However, -in accepting such obligations, You may act only on Your own behalf and on Your -sole responsibility, not on behalf of any other Contributor, and only if You -agree to indemnify, defend, and hold each Contributor harmless for any liability -incurred by, or claims asserted against, such Contributor by reason of your -accepting any such warranty or additional liability. - -END OF TERMS AND CONDITIONS - -APPENDIX: How to apply the Apache License to your work - -To apply the Apache License to your work, attach the following boilerplate -notice, with the fields enclosed by brackets "[]" replaced with your own -identifying information. (Don't include the brackets!) The text should be -enclosed in the appropriate comment syntax for the file format. We also -recommend that a file or class name and description of purpose be included on -the same "printed page" as the copyright notice for easier identification within -third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - --------------------------------------------------------------------------------- -golang/protobuf BSD 3-Clause "New" or "Revised" License https://github.com/golang/protobuf/blob/master/LICENSE --------------------------------------------------------------------------------- -Copyright 2010 The Go Authors. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above -copyright notice, this list of conditions and the following disclaimer -in the documentation and/or other materials provided with the -distribution. - * Neither the name of Google Inc. nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - --------------------------------------------------------------------------------- -google/btree Apache License 2.0 https://github.com/google/btree/blob/master/LICENSE --------------------------------------------------------------------------------- - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - --------------------------------------------------------------------------------- -google/gofuzz Apache License 2.0 https://github.com/google/gofuzz/blob/master/LICENSE --------------------------------------------------------------------------------- - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - --------------------------------------------------------------------------------- -googleapis/gnostic Apache License 2.0 https://github.com/googleapis/gnostic/blob/master/LICENSE --------------------------------------------------------------------------------- - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - - --------------------------------------------------------------------------------- -gregjones/httpcache MIT License https://github.com/gregjones/httpcache/blob/master/LICENSE.txt --------------------------------------------------------------------------------- -Copyright © 2012 Greg Jones (greg.jones@gmail.com) - -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. --------------------------------------------------------------------------------- -hashicorp/golang-lru Mozilla Public License 2.0 https://github.com/hashicorp/golang-lru/blob/master/LICENSE --------------------------------------------------------------------------------- -Mozilla Public License, version 2.0 - -1. Definitions - -1.1. "Contributor" - - means each individual or legal entity that creates, contributes to the - creation of, or owns Covered Software. - -1.2. "Contributor Version" - - means the combination of the Contributions of others (if any) used by a - Contributor and that particular Contributor's Contribution. - -1.3. "Contribution" - - means Covered Software of a particular Contributor. - -1.4. "Covered Software" - - means Source Code Form to which the initial Contributor has attached the - notice in Exhibit A, the Executable Form of such Source Code Form, and - Modifications of such Source Code Form, in each case including portions - thereof. - -1.5. "Incompatible With Secondary Licenses" - means - - a. that the initial Contributor has attached the notice described in - Exhibit B to the Covered Software; or - - b. that the Covered Software was made available under the terms of - version 1.1 or earlier of the License, but not also under the terms of - a Secondary License. - -1.6. "Executable Form" - - means any form of the work other than Source Code Form. - -1.7. "Larger Work" - - means a work that combines Covered Software with other material, in a - separate file or files, that is not Covered Software. - -1.8. "License" - - means this document. - -1.9. "Licensable" - - means having the right to grant, to the maximum extent possible, whether - at the time of the initial grant or subsequently, any and all of the - rights conveyed by this License. - -1.10. "Modifications" - - means any of the following: - - a. any file in Source Code Form that results from an addition to, - deletion from, or modification of the contents of Covered Software; or - - b. any new file in Source Code Form that contains any Covered Software. - -1.11. "Patent Claims" of a Contributor - - means any patent claim(s), including without limitation, method, - process, and apparatus claims, in any patent Licensable by such - Contributor that would be infringed, but for the grant of the License, - by the making, using, selling, offering for sale, having made, import, - or transfer of either its Contributions or its Contributor Version. - -1.12. "Secondary License" - - means either the GNU General Public License, Version 2.0, the GNU Lesser - General Public License, Version 2.1, the GNU Affero General Public - License, Version 3.0, or any later versions of those licenses. - -1.13. "Source Code Form" - - means the form of the work preferred for making modifications. - -1.14. "You" (or "Your") - - means an individual or a legal entity exercising rights under this - License. For legal entities, "You" includes any entity that controls, is - controlled by, or is under common control with You. For purposes of this - definition, "control" means (a) the power, direct or indirect, to cause - the direction or management of such entity, whether by contract or - otherwise, or (b) ownership of more than fifty percent (50%) of the - outstanding shares or beneficial ownership of such entity. - - -2. License Grants and Conditions - -2.1. Grants - - Each Contributor hereby grants You a world-wide, royalty-free, - non-exclusive license: - - a. under intellectual property rights (other than patent or trademark) - Licensable by such Contributor to use, reproduce, make available, - modify, display, perform, distribute, and otherwise exploit its - Contributions, either on an unmodified basis, with Modifications, or - as part of a Larger Work; and - - b. under Patent Claims of such Contributor to make, use, sell, offer for - sale, have made, import, and otherwise transfer either its - Contributions or its Contributor Version. - -2.2. Effective Date - - The licenses granted in Section 2.1 with respect to any Contribution - become effective for each Contribution on the date the Contributor first - distributes such Contribution. - -2.3. Limitations on Grant Scope - - The licenses granted in this Section 2 are the only rights granted under - this License. No additional rights or licenses will be implied from the - distribution or licensing of Covered Software under this License. - Notwithstanding Section 2.1(b) above, no patent license is granted by a - Contributor: - - a. for any code that a Contributor has removed from Covered Software; or - - b. for infringements caused by: (i) Your and any other third party's - modifications of Covered Software, or (ii) the combination of its - Contributions with other software (except as part of its Contributor - Version); or - - c. under Patent Claims infringed by Covered Software in the absence of - its Contributions. - - This License does not grant any rights in the trademarks, service marks, - or logos of any Contributor (except as may be necessary to comply with - the notice requirements in Section 3.4). - -2.4. Subsequent Licenses - - No Contributor makes additional grants as a result of Your choice to - distribute the Covered Software under a subsequent version of this - License (see Section 10.2) or under the terms of a Secondary License (if - permitted under the terms of Section 3.3). - -2.5. Representation - - Each Contributor represents that the Contributor believes its - Contributions are its original creation(s) or it has sufficient rights to - grant the rights to its Contributions conveyed by this License. - -2.6. Fair Use - - This License is not intended to limit any rights You have under - applicable copyright doctrines of fair use, fair dealing, or other - equivalents. - -2.7. Conditions - - Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted in - Section 2.1. - - -3. Responsibilities - -3.1. Distribution of Source Form - - All distribution of Covered Software in Source Code Form, including any - Modifications that You create or to which You contribute, must be under - the terms of this License. You must inform recipients that the Source - Code Form of the Covered Software is governed by the terms of this - License, and how they can obtain a copy of this License. You may not - attempt to alter or restrict the recipients' rights in the Source Code - Form. - -3.2. Distribution of Executable Form - - If You distribute Covered Software in Executable Form then: - - a. such Covered Software must also be made available in Source Code Form, - as described in Section 3.1, and You must inform recipients of the - Executable Form how they can obtain a copy of such Source Code Form by - reasonable means in a timely manner, at a charge no more than the cost - of distribution to the recipient; and - - b. You may distribute such Executable Form under the terms of this - License, or sublicense it under different terms, provided that the - license for the Executable Form does not attempt to limit or alter the - recipients' rights in the Source Code Form under this License. - -3.3. Distribution of a Larger Work - - You may create and distribute a Larger Work under terms of Your choice, - provided that You also comply with the requirements of this License for - the Covered Software. If the Larger Work is a combination of Covered - Software with a work governed by one or more Secondary Licenses, and the - Covered Software is not Incompatible With Secondary Licenses, this - License permits You to additionally distribute such Covered Software - under the terms of such Secondary License(s), so that the recipient of - the Larger Work may, at their option, further distribute the Covered - Software under the terms of either this License or such Secondary - License(s). - -3.4. Notices - - You may not remove or alter the substance of any license notices - (including copyright notices, patent notices, disclaimers of warranty, or - limitations of liability) contained within the Source Code Form of the - Covered Software, except that You may alter any license notices to the - extent required to remedy known factual inaccuracies. - -3.5. Application of Additional Terms - - You may choose to offer, and to charge a fee for, warranty, support, - indemnity or liability obligations to one or more recipients of Covered - Software. However, You may do so only on Your own behalf, and not on - behalf of any Contributor. You must make it absolutely clear that any - such warranty, support, indemnity, or liability obligation is offered by - You alone, and You hereby agree to indemnify every Contributor for any - liability incurred by such Contributor as a result of warranty, support, - indemnity or liability terms You offer. You may include additional - disclaimers of warranty and limitations of liability specific to any - jurisdiction. - -4. Inability to Comply Due to Statute or Regulation - - If it is impossible for You to comply with any of the terms of this License - with respect to some or all of the Covered Software due to statute, - judicial order, or regulation then You must: (a) comply with the terms of - this License to the maximum extent possible; and (b) describe the - limitations and the code they affect. Such description must be placed in a - text file included with all distributions of the Covered Software under - this License. Except to the extent prohibited by statute or regulation, - such description must be sufficiently detailed for a recipient of ordinary - skill to be able to understand it. - -5. Termination - -5.1. The rights granted under this License will terminate automatically if You - fail to comply with any of its terms. However, if You become compliant, - then the rights granted under this License from a particular Contributor - are reinstated (a) provisionally, unless and until such Contributor - explicitly and finally terminates Your grants, and (b) on an ongoing - basis, if such Contributor fails to notify You of the non-compliance by - some reasonable means prior to 60 days after You have come back into - compliance. Moreover, Your grants from a particular Contributor are - reinstated on an ongoing basis if such Contributor notifies You of the - non-compliance by some reasonable means, this is the first time You have - received notice of non-compliance with this License from such - Contributor, and You become compliant prior to 30 days after Your receipt - of the notice. - -5.2. If You initiate litigation against any entity by asserting a patent - infringement claim (excluding declaratory judgment actions, - counter-claims, and cross-claims) alleging that a Contributor Version - directly or indirectly infringes any patent, then the rights granted to - You by any and all Contributors for the Covered Software under Section - 2.1 of this License shall terminate. - -5.3. In the event of termination under Sections 5.1 or 5.2 above, all end user - license agreements (excluding distributors and resellers) which have been - validly granted by You or Your distributors under this License prior to - termination shall survive termination. - -6. Disclaimer of Warranty - - Covered Software is provided under this License on an "as is" basis, - without warranty of any kind, either expressed, implied, or statutory, - including, without limitation, warranties that the Covered Software is free - of defects, merchantable, fit for a particular purpose or non-infringing. - The entire risk as to the quality and performance of the Covered Software - is with You. Should any Covered Software prove defective in any respect, - You (not any Contributor) assume the cost of any necessary servicing, - repair, or correction. This disclaimer of warranty constitutes an essential - part of this License. No use of any Covered Software is authorized under - this License except under this disclaimer. - -7. Limitation of Liability - - Under no circumstances and under no legal theory, whether tort (including - negligence), contract, or otherwise, shall any Contributor, or anyone who - distributes Covered Software as permitted above, be liable to You for any - direct, indirect, special, incidental, or consequential damages of any - character including, without limitation, damages for lost profits, loss of - goodwill, work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses, even if such party shall have been - informed of the possibility of such damages. This limitation of liability - shall not apply to liability for death or personal injury resulting from - such party's negligence to the extent applicable law prohibits such - limitation. Some jurisdictions do not allow the exclusion or limitation of - incidental or consequential damages, so this exclusion and limitation may - not apply to You. - -8. Litigation - - Any litigation relating to this License may be brought only in the courts - of a jurisdiction where the defendant maintains its principal place of - business and such litigation shall be governed by laws of that - jurisdiction, without reference to its conflict-of-law provisions. Nothing - in this Section shall prevent a party's ability to bring cross-claims or - counter-claims. - -9. Miscellaneous - - This License represents the complete agreement concerning the subject - matter hereof. If any provision of this License is held to be - unenforceable, such provision shall be reformed only to the extent - necessary to make it enforceable. Any law or regulation which provides that - the language of a contract shall be construed against the drafter shall not - be used to construe this License against a Contributor. - - -10. Versions of the License - -10.1. New Versions - - Mozilla Foundation is the license steward. Except as provided in Section - 10.3, no one other than the license steward has the right to modify or - publish new versions of this License. Each version will be given a - distinguishing version number. - -10.2. Effect of New Versions - - You may distribute the Covered Software under the terms of the version - of the License under which You originally received the Covered Software, - or under the terms of any subsequent version published by the license - steward. - -10.3. Modified Versions - - If you create software not governed by this License, and you want to - create a new license for such software, you may create and use a - modified version of this License if you rename the license and remove - any references to the name of the license steward (except to note that - such modified license differs from this License). - -10.4. Distributing Source Code Form that is Incompatible With Secondary - Licenses If You choose to distribute Source Code Form that is - Incompatible With Secondary Licenses under the terms of this version of - the License, the notice described in Exhibit B of this License must be - attached. - -Exhibit A - Source Code Form License Notice - - This Source Code Form is subject to the - terms of the Mozilla Public License, v. - 2.0. If a copy of the MPL was not - distributed with this file, You can - obtain one at - http://mozilla.org/MPL/2.0/. - -If it is not possible or desirable to put the notice in a particular file, -then You may include the notice in a location (such as a LICENSE file in a -relevant directory) where a recipient would be likely to look for such a -notice. - -You may add additional accurate notices of copyright ownership. - -Exhibit B - "Incompatible With Secondary Licenses" Notice - - This Source Code Form is "Incompatible - With Secondary Licenses", as defined by - the Mozilla Public License, v. 2.0. - --------------------------------------------------------------------------------- -imdario/mergo BSD 3-Clause "New" or "Revised" License https://github.com/imdario/mergo/blob/master/LICENSE --------------------------------------------------------------------------------- -Copyright (c) 2013 Dario Castañé. All rights reserved. -Copyright (c) 2012 The Go Authors. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above -copyright notice, this list of conditions and the following disclaimer -in the documentation and/or other materials provided with the -distribution. - * Neither the name of Google Inc. nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - --------------------------------------------------------------------------------- -json-iterator/go MIT License https://github.com/json-iterator/go/blob/master/LICENSE --------------------------------------------------------------------------------- -MIT License - -Copyright (c) 2016 json-iterator - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. - --------------------------------------------------------------------------------- -konsorten/go-windows-terminal-sequences MIT License https://github.com/konsorten/go-windows-terminal-sequences/blob/master/LICENSE --------------------------------------------------------------------------------- -(The MIT License) - -Copyright (c) 2017 marvin + konsorten GmbH (open-source@konsorten.de) - -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the 'Software'), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - --------------------------------------------------------------------------------- -kubernetes-sigs/kube-batch Apache License 2.0 https://github.com/kubernetes-sigs/kube-batch/blob/master/LICENSE --------------------------------------------------------------------------------- - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "{}" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright {yyyy} {name of copyright owner} - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - --------------------------------------------------------------------------------- -mailru/easyjson MIT License https://github.com/mailru/easyjson/blob/master/LICENSE --------------------------------------------------------------------------------- -Copyright (c) 2016 Mail.Ru Group - -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - --------------------------------------------------------------------------------- -modern-go/concurrent Apache License 2.0 https://github.com/modern-go/concurrent/blob/master/LICENSE --------------------------------------------------------------------------------- - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - --------------------------------------------------------------------------------- -modern-go/reflect2 Apache License 2.0 https://github.com/modern-go/reflect2/blob/master/LICENSE --------------------------------------------------------------------------------- - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - --------------------------------------------------------------------------------- -onrik/logrus MIT License https://github.com/onrik/logrus/blob/master/LICENSE --------------------------------------------------------------------------------- -MIT License - -Copyright (c) 2016 Andrey - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. - --------------------------------------------------------------------------------- -opencontainers/go-digest Apache License 2.0 https://github.com/opencontainers/go-digest/blob/master/LICENSE --------------------------------------------------------------------------------- - - Apache License - Version 2.0, January 2004 - https://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - Copyright 2016 Docker, Inc. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - https://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - --------------------------------------------------------------------------------- -petar/GoLLRB BSD 3-Clause "New" or "Revised" License https://github.com/petar/GoLLRB/blob/master/LICENSE --------------------------------------------------------------------------------- -Copyright (c) 2010, Petar Maymounkov -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - -(*) Redistributions of source code must retain the above copyright notice, this list -of conditions and the following disclaimer. - -(*) Redistributions in binary form must reproduce the above copyright notice, this -list of conditions and the following disclaimer in the documentation and/or -other materials provided with the distribution. - -(*) Neither the name of Petar Maymounkov nor the names of its contributors may be -used to endorse or promote products derived from this software without specific -prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR -ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON -ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - --------------------------------------------------------------------------------- -peterbourgon/diskv MIT License https://github.com/peterbourgon/diskv/blob/master/LICENSE --------------------------------------------------------------------------------- -Copyright (c) 2011-2012 Peter Bourgon - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. - --------------------------------------------------------------------------------- -pmezard/go-difflib BSD 2-Clause "New" or "Revised" License https://github.com/pmezard/go-difflib/blob/master/LICENSE --------------------------------------------------------------------------------- -Copyright (c) 2013, Patrick Mezard -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - The names of its contributors may not be used to endorse or promote -products derived from this software without specific prior written -permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - --------------------------------------------------------------------------------- -sirupsen/logrus MIT License https://github.com/sirupsen/logrus/blob/master/LICENSE --------------------------------------------------------------------------------- -The MIT License (MIT) - -Copyright (c) 2014 Simon Eskildsen - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. - --------------------------------------------------------------------------------- -spf13/pflag BSD 3-Clause "New" or "Revised" License https://github.com/spf13/pflag/blob/master/LICENSE --------------------------------------------------------------------------------- -Copyright (c) 2012 Alex Ogier. All rights reserved. -Copyright (c) 2012 The Go Authors. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above -copyright notice, this list of conditions and the following disclaimer -in the documentation and/or other materials provided with the -distribution. - * Neither the name of Google Inc. nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - --------------------------------------------------------------------------------- -stretchr/testify MIT License https://github.com/stretchr/testify/blob/master/LICENSE --------------------------------------------------------------------------------- -MIT License - -Copyright (c) 2012-2018 Mat Ryer and Tyler Bunnell - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. - --------------------------------------------------------------------------------- -golang/crypto BSD 3-Clause "New" or "Revised" License https://github.com/golang/crypto/blob/master/LICENSE --------------------------------------------------------------------------------- -Copyright (c) 2009 The Go Authors. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above -copyright notice, this list of conditions and the following disclaimer -in the documentation and/or other materials provided with the -distribution. - * Neither the name of Google Inc. nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - --------------------------------------------------------------------------------- -golang/net BSD 3-Clause "New" or "Revised" License https://github.com/golang/net/blob/master/LICENSE --------------------------------------------------------------------------------- -Copyright (c) 2009 The Go Authors. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above -copyright notice, this list of conditions and the following disclaimer -in the documentation and/or other materials provided with the -distribution. - * Neither the name of Google Inc. nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - --------------------------------------------------------------------------------- -golang/oauth2 BSD 3-Clause "New" or "Revised" License https://github.com/golang/oauth2/blob/master/LICENSE --------------------------------------------------------------------------------- -Copyright (c) 2009 The Go Authors. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above -copyright notice, this list of conditions and the following disclaimer -in the documentation and/or other materials provided with the -distribution. - * Neither the name of Google Inc. nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - --------------------------------------------------------------------------------- -golang/sys BSD 3-Clause "New" or "Revised" License https://github.com/golang/sys/blob/master/LICENSE --------------------------------------------------------------------------------- -Copyright (c) 2009 The Go Authors. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above -copyright notice, this list of conditions and the following disclaimer -in the documentation and/or other materials provided with the -distribution. - * Neither the name of Google Inc. nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - --------------------------------------------------------------------------------- -golang/text BSD 3-Clause "New" or "Revised" License https://github.com/golang/text/blob/master/LICENSE --------------------------------------------------------------------------------- -Copyright (c) 2009 The Go Authors. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above -copyright notice, this list of conditions and the following disclaimer -in the documentation and/or other materials provided with the -distribution. - * Neither the name of Google Inc. nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - --------------------------------------------------------------------------------- -golang/time BSD 3-Clause "New" or "Revised" License https://github.com/golang/time/blob/master/LICENSE --------------------------------------------------------------------------------- -Copyright (c) 2009 The Go Authors. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above -copyright notice, this list of conditions and the following disclaimer -in the documentation and/or other materials provided with the -distribution. - * Neither the name of Google Inc. nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - --------------------------------------------------------------------------------- -golang/tools BSD 3-Clause "New" or "Revised" License https://github.com/golang/tools/blob/master/LICENSE --------------------------------------------------------------------------------- -Copyright (c) 2009 The Go Authors. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above -copyright notice, this list of conditions and the following disclaimer -in the documentation and/or other materials provided with the -distribution. - * Neither the name of Google Inc. nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - --------------------------------------------------------------------------------- -golang/appengine Apache License 2.0 https://github.com/golang/appengine/blob/master/LICENSE --------------------------------------------------------------------------------- - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - --------------------------------------------------------------------------------- -go-inf/inf BSD 3-Clause "New" or "Revised" License https://github.com/go-inf/inf/blob/master/LICENSE --------------------------------------------------------------------------------- -Copyright (c) 2012 Péter Surányi. Portions Copyright (c) 2009 The Go -Authors. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above -copyright notice, this list of conditions and the following disclaimer -in the documentation and/or other materials provided with the -distribution. - * Neither the name of Google Inc. nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - --------------------------------------------------------------------------------- -square/go-jose Apache License 2.0 https://github.com/square/go-jose/blob/master/LICENSE --------------------------------------------------------------------------------- - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - --------------------------------------------------------------------------------- -go-yaml/yaml Apache License 2.0 https://github.com/go-yaml/yaml/blob/v2/LICENSE --------------------------------------------------------------------------------- - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "{}" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright {yyyy} {name of copyright owner} - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - --------------------------------------------------------------------------------- -kubernetes/api Apache License 2.0 https://github.com/kubernetes/api/blob/master/LICENSE --------------------------------------------------------------------------------- - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - --------------------------------------------------------------------------------- -kubernetes/apiextensions-apiserver Apache License 2.0 https://github.com/kubernetes/apiextensions-apiserver/blob/master/LICENSE --------------------------------------------------------------------------------- - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - --------------------------------------------------------------------------------- -kubernetes/apimachinery Apache License 2.0 https://github.com/kubernetes/apimachinery/blob/master/LICENSE --------------------------------------------------------------------------------- - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - --------------------------------------------------------------------------------- -kubernetes/apiserver Apache License 2.0 https://github.com/kubernetes/apiserver/blob/master/LICENSE --------------------------------------------------------------------------------- - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - --------------------------------------------------------------------------------- -kubernetes/client-go Apache License 2.0 https://github.com/kubernetes/client-go/blob/master/LICENSE --------------------------------------------------------------------------------- - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - --------------------------------------------------------------------------------- -kubernetes/code-generator Apache License 2.0 https://github.com/kubernetes/code-generator/blob/master/LICENSE --------------------------------------------------------------------------------- - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - --------------------------------------------------------------------------------- -kubernetes/gengo Apache License 2.0 https://github.com/kubernetes/gengo/blob/master/LICENSE --------------------------------------------------------------------------------- - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2014 The Kubernetes Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - --------------------------------------------------------------------------------- -kubernetes/klog Apache License 2.0 https://github.com/kubernetes/klog/blob/master/LICENSE --------------------------------------------------------------------------------- -Apache License -Version 2.0, January 2004 -http://www.apache.org/licenses/ - -TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - -1. Definitions. - -"License" shall mean the terms and conditions for use, reproduction, and -distribution as defined by Sections 1 through 9 of this document. - -"Licensor" shall mean the copyright owner or entity authorized by the copyright -owner that is granting the License. - -"Legal Entity" shall mean the union of the acting entity and all other entities -that control, are controlled by, or are under common control with that entity. -For the purposes of this definition, "control" means (i) the power, direct or -indirect, to cause the direction or management of such entity, whether by -contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the -outstanding shares, or (iii) beneficial ownership of such entity. - -"You" (or "Your") shall mean an individual or Legal Entity exercising -permissions granted by this License. - -"Source" form shall mean the preferred form for making modifications, including -but not limited to software source code, documentation source, and configuration -files. - -"Object" form shall mean any form resulting from mechanical transformation or -translation of a Source form, including but not limited to compiled object code, -generated documentation, and conversions to other media types. - -"Work" shall mean the work of authorship, whether in Source or Object form, made -available under the License, as indicated by a copyright notice that is included -in or attached to the work (an example is provided in the Appendix below). - -"Derivative Works" shall mean any work, whether in Source or Object form, that -is based on (or derived from) the Work and for which the editorial revisions, -annotations, elaborations, or other modifications represent, as a whole, an -original work of authorship. For the purposes of this License, Derivative Works -shall not include works that remain separable from, or merely link (or bind by -name) to the interfaces of, the Work and Derivative Works thereof. - -"Contribution" shall mean any work of authorship, including the original version -of the Work and any modifications or additions to that Work or Derivative Works -thereof, that is intentionally submitted to Licensor for inclusion in the Work -by the copyright owner or by an individual or Legal Entity authorized to submit -on behalf of the copyright owner. For the purposes of this definition, -"submitted" means any form of electronic, verbal, or written communication sent -to the Licensor or its representatives, including but not limited to -communication on electronic mailing lists, source code control systems, and -issue tracking systems that are managed by, or on behalf of, the Licensor for -the purpose of discussing and improving the Work, but excluding communication -that is conspicuously marked or otherwise designated in writing by the copyright -owner as "Not a Contribution." - -"Contributor" shall mean Licensor and any individual or Legal Entity on behalf -of whom a Contribution has been received by Licensor and subsequently -incorporated within the Work. - -2. Grant of Copyright License. - -Subject to the terms and conditions of this License, each Contributor hereby -grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, -irrevocable copyright license to reproduce, prepare Derivative Works of, -publicly display, publicly perform, sublicense, and distribute the Work and such -Derivative Works in Source or Object form. - -3. Grant of Patent License. - -Subject to the terms and conditions of this License, each Contributor hereby -grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, -irrevocable (except as stated in this section) patent license to make, have -made, use, offer to sell, sell, import, and otherwise transfer the Work, where -such license applies only to those patent claims licensable by such Contributor -that are necessarily infringed by their Contribution(s) alone or by combination -of their Contribution(s) with the Work to which such Contribution(s) was -submitted. If You institute patent litigation against any entity (including a -cross-claim or counterclaim in a lawsuit) alleging that the Work or a -Contribution incorporated within the Work constitutes direct or contributory -patent infringement, then any patent licenses granted to You under this License -for that Work shall terminate as of the date such litigation is filed. - -4. Redistribution. - -You may reproduce and distribute copies of the Work or Derivative Works thereof -in any medium, with or without modifications, and in Source or Object form, -provided that You meet the following conditions: - -You must give any other recipients of the Work or Derivative Works a copy of -this License; and -You must cause any modified files to carry prominent notices stating that You -changed the files; and -You must retain, in the Source form of any Derivative Works that You distribute, -all copyright, patent, trademark, and attribution notices from the Source form -of the Work, excluding those notices that do not pertain to any part of the -Derivative Works; and -If the Work includes a "NOTICE" text file as part of its distribution, then any -Derivative Works that You distribute must include a readable copy of the -attribution notices contained within such NOTICE file, excluding those notices -that do not pertain to any part of the Derivative Works, in at least one of the -following places: within a NOTICE text file distributed as part of the -Derivative Works; within the Source form or documentation, if provided along -with the Derivative Works; or, within a display generated by the Derivative -Works, if and wherever such third-party notices normally appear. The contents of -the NOTICE file are for informational purposes only and do not modify the -License. You may add Your own attribution notices within Derivative Works that -You distribute, alongside or as an addendum to the NOTICE text from the Work, -provided that such additional attribution notices cannot be construed as -modifying the License. -You may add Your own copyright statement to Your modifications and may provide -additional or different license terms and conditions for use, reproduction, or -distribution of Your modifications, or for any such Derivative Works as a whole, -provided Your use, reproduction, and distribution of the Work otherwise complies -with the conditions stated in this License. - -5. Submission of Contributions. - -Unless You explicitly state otherwise, any Contribution intentionally submitted -for inclusion in the Work by You to the Licensor shall be under the terms and -conditions of this License, without any additional terms or conditions. -Notwithstanding the above, nothing herein shall supersede or modify the terms of -any separate license agreement you may have executed with Licensor regarding -such Contributions. - -6. Trademarks. - -This License does not grant permission to use the trade names, trademarks, -service marks, or product names of the Licensor, except as required for -reasonable and customary use in describing the origin of the Work and -reproducing the content of the NOTICE file. - -7. Disclaimer of Warranty. - -Unless required by applicable law or agreed to in writing, Licensor provides the -Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, -including, without limitation, any warranties or conditions of TITLE, -NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are -solely responsible for determining the appropriateness of using or -redistributing the Work and assume any risks associated with Your exercise of -permissions under this License. - -8. Limitation of Liability. - -In no event and under no legal theory, whether in tort (including negligence), -contract, or otherwise, unless required by applicable law (such as deliberate -and grossly negligent acts) or agreed to in writing, shall any Contributor be -liable to You for damages, including any direct, indirect, special, incidental, -or consequential damages of any character arising as a result of this License or -out of the use or inability to use the Work (including but not limited to -damages for loss of goodwill, work stoppage, computer failure or malfunction, or -any and all other commercial damages or losses), even if such Contributor has -been advised of the possibility of such damages. - -9. Accepting Warranty or Additional Liability. - -While redistributing the Work or Derivative Works thereof, You may choose to -offer, and charge a fee for, acceptance of support, warranty, indemnity, or -other liability obligations and/or rights consistent with this License. However, -in accepting such obligations, You may act only on Your own behalf and on Your -sole responsibility, not on behalf of any other Contributor, and only if You -agree to indemnify, defend, and hold each Contributor harmless for any liability -incurred by, or claims asserted against, such Contributor by reason of your -accepting any such warranty or additional liability. - -END OF TERMS AND CONDITIONS - -APPENDIX: How to apply the Apache License to your work - -To apply the Apache License to your work, attach the following boilerplate -notice, with the fields enclosed by brackets "[]" replaced with your own -identifying information. (Don't include the brackets!) The text should be -enclosed in the appropriate comment syntax for the file format. We also -recommend that a file or class name and description of purpose be included on -the same "printed page" as the copyright notice for easier identification within -third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - --------------------------------------------------------------------------------- -kubernetes/kube-openapi Apache License 2.0 https://github.com/kubernetes/kube-openapi/blob/master/LICENSE --------------------------------------------------------------------------------- - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - --------------------------------------------------------------------------------- -kubernetes/kubernetes Apache License 2.0 https://github.com/kubernetes/kubernetes/blob/master/LICENSE --------------------------------------------------------------------------------- - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/third_party_licenses/additional_license_info.csv b/third_party_licenses/additional_license_info.csv deleted file mode 100644 index f83258e2e6..0000000000 --- a/third_party_licenses/additional_license_info.csv +++ /dev/null @@ -1,14 +0,0 @@ -https://github.com/googleapis/google-cloud-go/blob/master/LICENSE,Apache License 2.0 -https://github.com/ghodss/yaml/blob/master/LICENSE,MIT License -https://github.com/go-openapi/spec/blob/master/LICENSE,Apache License 2.0 -https://github.com/gogo/protobuf/blob/master/LICENSE,BSD 3-Clause "New" or "Revised" License -https://github.com/opencontainers/go-digest/blob/master/LICENSE,Apache License 2.0 -https://github.com/pmezard/go-difflib/blob/master/LICENSE,BSD 2-Clause "New" or "Revised" License -https://github.com/golang/crypto/blob/master/LICENSE,BSD 3-Clause "New" or "Revised" License -https://github.com/golang/net/blob/master/LICENSE,BSD 3-Clause "New" or "Revised" License -https://github.com/golang/sys/blob/master/LICENSE,BSD 3-Clause "New" or "Revised" License -https://github.com/golang/text/blob/master/LICENSE,BSD 3-Clause "New" or "Revised" License -https://github.com/golang/time/blob/master/LICENSE,BSD 3-Clause "New" or "Revised" License -https://github.com/golang/tools/blob/master/LICENSE,BSD 3-Clause "New" or "Revised" License -https://github.com/go-inf/inf/blob/master/LICENSE,BSD 3-Clause "New" or "Revised" License -https://github.com/go-yaml/yaml/blob/v2/LICENSE,Apache License 2.0 diff --git a/third_party_licenses/dep.txt b/third_party_licenses/dep.txt deleted file mode 100644 index 7a9c790319..0000000000 --- a/third_party_licenses/dep.txt +++ /dev/null @@ -1,57 +0,0 @@ -kubeflow/training-operator -cloud.google.com/go -github.com/PuerkitoBio/purell -github.com/PuerkitoBio/urlesc -github.com/davecgh/go-spew -github.com/docker/distribution -github.com/emicklei/go-restful -github.com/ghodss/yaml -github.com/go-openapi/jsonpointer -github.com/go-openapi/jsonreference -github.com/go-openapi/spec -github.com/go-openapi/swag -github.com/gogo/protobuf -github.com/golang/glog -github.com/golang/groupcache -github.com/golang/protobuf -github.com/google/btree -github.com/google/gofuzz -github.com/googleapis/gnostic -github.com/gregjones/httpcache -github.com/hashicorp/golang-lru -github.com/imdario/mergo -github.com/json-iterator/go -github.com/konsorten/go-windows-terminal-sequences -github.com/kubernetes-sigs/kube-batch -github.com/mailru/easyjson -github.com/modern-go/concurrent -github.com/modern-go/reflect2 -github.com/onrik/logrus -github.com/opencontainers/go-digest -github.com/petar/GoLLRB -github.com/peterbourgon/diskv -github.com/pmezard/go-difflib -github.com/sirupsen/logrus -github.com/spf13/pflag -github.com/stretchr/testify -golang.org/x/crypto -golang.org/x/net -golang.org/x/oauth2 -golang.org/x/sys -golang.org/x/text -golang.org/x/time -golang.org/x/tools -google.golang.org/appengine -gopkg.in/inf.v0 -gopkg.in/square/go-jose.v2 -gopkg.in/yaml.v2 -k8s.io/api -k8s.io/apiextensions-apiserver -k8s.io/apimachinery -k8s.io/apiserver -k8s.io/client-go -k8s.io/code-generator -k8s.io/gengo -k8s.io/klog -k8s.io/kube-openapi -k8s.io/kubernetes diff --git a/third_party_licenses/dep_repo.manual.csv b/third_party_licenses/dep_repo.manual.csv deleted file mode 100644 index 090c2b7ebc..0000000000 --- a/third_party_licenses/dep_repo.manual.csv +++ /dev/null @@ -1 +0,0 @@ -kubeflow/training-operator,kubeflow/training-operator diff --git a/third_party_licenses/license_info.csv b/third_party_licenses/license_info.csv deleted file mode 100644 index 542dc47ce3..0000000000 --- a/third_party_licenses/license_info.csv +++ /dev/null @@ -1,57 +0,0 @@ -kubeflow/training-operator,https://github.com/kubeflow/training-operator/blob/master/LICENSE,Apache License 2.0,https://raw.githubusercontent.com/kubeflow/training-operator/master/LICENSE -GoogleCloudPlatform/gcloud-golang,https://github.com/googleapis/google-cloud-go/blob/master/LICENSE,Apache License 2.0,https://raw.githubusercontent.com/googleapis/google-cloud-go/master/LICENSE -PuerkitoBio/purell,https://github.com/PuerkitoBio/purell/blob/master/LICENSE,BSD 3-Clause "New" or "Revised" License,https://raw.githubusercontent.com/PuerkitoBio/purell/master/LICENSE -PuerkitoBio/urlesc,https://github.com/PuerkitoBio/urlesc/blob/master/LICENSE,BSD 3-Clause "New" or "Revised" License,https://raw.githubusercontent.com/PuerkitoBio/urlesc/master/LICENSE -davecgh/go-spew,https://github.com/davecgh/go-spew/blob/master/LICENSE,ISC License,https://raw.githubusercontent.com/davecgh/go-spew/master/LICENSE -docker/distribution,https://github.com/docker/distribution/blob/master/LICENSE,Apache License 2.0,https://raw.githubusercontent.com/docker/distribution/master/LICENSE -emicklei/go-restful,https://github.com/emicklei/go-restful/blob/master/LICENSE,MIT License,https://raw.githubusercontent.com/emicklei/go-restful/master/LICENSE -ghodss/yaml,https://github.com/ghodss/yaml/blob/master/LICENSE,MIT License,https://raw.githubusercontent.com/ghodss/yaml/master/LICENSE -go-openapi/jsonpointer,https://github.com/go-openapi/jsonpointer/blob/master/LICENSE,Apache License 2.0,https://raw.githubusercontent.com/go-openapi/jsonpointer/master/LICENSE -go-openapi/jsonreference,https://github.com/go-openapi/jsonreference/blob/master/LICENSE,Apache License 2.0,https://raw.githubusercontent.com/go-openapi/jsonreference/master/LICENSE -go-openapi/spec,https://github.com/go-openapi/spec/blob/master/LICENSE,Apache License 2.0,https://raw.githubusercontent.com/go-openapi/spec/master/LICENSE -go-openapi/swag,https://github.com/go-openapi/swag/blob/master/LICENSE,Apache License 2.0,https://raw.githubusercontent.com/go-openapi/swag/master/LICENSE -gogo/protobuf,https://github.com/gogo/protobuf/blob/master/LICENSE,BSD 3-Clause "New" or "Revised" License,https://raw.githubusercontent.com/gogo/protobuf/master/LICENSE -golang/glog,https://github.com/golang/glog/blob/master/LICENSE,Apache License 2.0,https://raw.githubusercontent.com/golang/glog/master/LICENSE -golang/groupcache,https://github.com/golang/groupcache/blob/master/LICENSE,Apache License 2.0,https://raw.githubusercontent.com/golang/groupcache/master/LICENSE -golang/protobuf,https://github.com/golang/protobuf/blob/master/LICENSE,BSD 3-Clause "New" or "Revised" License,https://raw.githubusercontent.com/golang/protobuf/master/LICENSE -google/btree,https://github.com/google/btree/blob/master/LICENSE,Apache License 2.0,https://raw.githubusercontent.com/google/btree/master/LICENSE -google/gofuzz,https://github.com/google/gofuzz/blob/master/LICENSE,Apache License 2.0,https://raw.githubusercontent.com/google/gofuzz/master/LICENSE -googleapis/gnostic,https://github.com/googleapis/gnostic/blob/master/LICENSE,Apache License 2.0,https://raw.githubusercontent.com/googleapis/gnostic/master/LICENSE -gregjones/httpcache,https://github.com/gregjones/httpcache/blob/master/LICENSE.txt,MIT License,https://raw.githubusercontent.com/gregjones/httpcache/master/LICENSE.txt -hashicorp/golang-lru,https://github.com/hashicorp/golang-lru/blob/master/LICENSE,Mozilla Public License 2.0,https://raw.githubusercontent.com/hashicorp/golang-lru/master/LICENSE -imdario/mergo,https://github.com/imdario/mergo/blob/master/LICENSE,BSD 3-Clause "New" or "Revised" License,https://raw.githubusercontent.com/imdario/mergo/master/LICENSE -json-iterator/go,https://github.com/json-iterator/go/blob/master/LICENSE,MIT License,https://raw.githubusercontent.com/json-iterator/go/master/LICENSE -konsorten/go-windows-terminal-sequences,https://github.com/konsorten/go-windows-terminal-sequences/blob/master/LICENSE,MIT License,https://raw.githubusercontent.com/konsorten/go-windows-terminal-sequences/master/LICENSE -kubernetes-sigs/kube-batch,https://github.com/kubernetes-sigs/kube-batch/blob/master/LICENSE,Apache License 2.0,https://raw.githubusercontent.com/kubernetes-sigs/kube-batch/master/LICENSE -mailru/easyjson,https://github.com/mailru/easyjson/blob/master/LICENSE,MIT License,https://raw.githubusercontent.com/mailru/easyjson/master/LICENSE -modern-go/concurrent,https://github.com/modern-go/concurrent/blob/master/LICENSE,Apache License 2.0,https://raw.githubusercontent.com/modern-go/concurrent/master/LICENSE -modern-go/reflect2,https://github.com/modern-go/reflect2/blob/master/LICENSE,Apache License 2.0,https://raw.githubusercontent.com/modern-go/reflect2/master/LICENSE -onrik/logrus,https://github.com/onrik/logrus/blob/master/LICENSE,MIT License,https://raw.githubusercontent.com/onrik/logrus/master/LICENSE -opencontainers/go-digest,https://github.com/opencontainers/go-digest/blob/master/LICENSE,Apache License 2.0,https://raw.githubusercontent.com/opencontainers/go-digest/master/LICENSE -petar/GoLLRB,https://github.com/petar/GoLLRB/blob/master/LICENSE,BSD 3-Clause "New" or "Revised" License,https://raw.githubusercontent.com/petar/GoLLRB/master/LICENSE -peterbourgon/diskv,https://github.com/peterbourgon/diskv/blob/master/LICENSE,MIT License,https://raw.githubusercontent.com/peterbourgon/diskv/master/LICENSE -pmezard/go-difflib,https://github.com/pmezard/go-difflib/blob/master/LICENSE,BSD 2-Clause "New" or "Revised" License,https://raw.githubusercontent.com/pmezard/go-difflib/master/LICENSE -sirupsen/logrus,https://github.com/sirupsen/logrus/blob/master/LICENSE,MIT License,https://raw.githubusercontent.com/sirupsen/logrus/master/LICENSE -spf13/pflag,https://github.com/spf13/pflag/blob/master/LICENSE,BSD 3-Clause "New" or "Revised" License,https://raw.githubusercontent.com/spf13/pflag/master/LICENSE -stretchr/testify,https://github.com/stretchr/testify/blob/master/LICENSE,MIT License,https://raw.githubusercontent.com/stretchr/testify/master/LICENSE -golang/crypto,https://github.com/golang/crypto/blob/master/LICENSE,BSD 3-Clause "New" or "Revised" License,https://raw.githubusercontent.com/golang/crypto/master/LICENSE -golang/net,https://github.com/golang/net/blob/master/LICENSE,BSD 3-Clause "New" or "Revised" License,https://raw.githubusercontent.com/golang/net/master/LICENSE -golang/oauth2,https://github.com/golang/oauth2/blob/master/LICENSE,BSD 3-Clause "New" or "Revised" License,https://raw.githubusercontent.com/golang/oauth2/master/LICENSE -golang/sys,https://github.com/golang/sys/blob/master/LICENSE,BSD 3-Clause "New" or "Revised" License,https://raw.githubusercontent.com/golang/sys/master/LICENSE -golang/text,https://github.com/golang/text/blob/master/LICENSE,BSD 3-Clause "New" or "Revised" License,https://raw.githubusercontent.com/golang/text/master/LICENSE -golang/time,https://github.com/golang/time/blob/master/LICENSE,BSD 3-Clause "New" or "Revised" License,https://raw.githubusercontent.com/golang/time/master/LICENSE -golang/tools,https://github.com/golang/tools/blob/master/LICENSE,BSD 3-Clause "New" or "Revised" License,https://raw.githubusercontent.com/golang/tools/master/LICENSE -golang/appengine,https://github.com/golang/appengine/blob/master/LICENSE,Apache License 2.0,https://raw.githubusercontent.com/golang/appengine/master/LICENSE -go-inf/inf,https://github.com/go-inf/inf/blob/master/LICENSE,BSD 3-Clause "New" or "Revised" License,https://raw.githubusercontent.com/go-inf/inf/master/LICENSE -square/go-jose,https://github.com/square/go-jose/blob/master/LICENSE,Apache License 2.0,https://raw.githubusercontent.com/square/go-jose/master/LICENSE -go-yaml/yaml,https://github.com/go-yaml/yaml/blob/v2/LICENSE,Apache License 2.0,https://raw.githubusercontent.com/go-yaml/yaml/v2/LICENSE -kubernetes/api,https://github.com/kubernetes/api/blob/master/LICENSE,Apache License 2.0,https://raw.githubusercontent.com/kubernetes/api/master/LICENSE -kubernetes/apiextensions-apiserver,https://github.com/kubernetes/apiextensions-apiserver/blob/master/LICENSE,Apache License 2.0,https://raw.githubusercontent.com/kubernetes/apiextensions-apiserver/master/LICENSE -kubernetes/apimachinery,https://github.com/kubernetes/apimachinery/blob/master/LICENSE,Apache License 2.0,https://raw.githubusercontent.com/kubernetes/apimachinery/master/LICENSE -kubernetes/apiserver,https://github.com/kubernetes/apiserver/blob/master/LICENSE,Apache License 2.0,https://raw.githubusercontent.com/kubernetes/apiserver/master/LICENSE -kubernetes/client-go,https://github.com/kubernetes/client-go/blob/master/LICENSE,Apache License 2.0,https://raw.githubusercontent.com/kubernetes/client-go/master/LICENSE -kubernetes/code-generator,https://github.com/kubernetes/code-generator/blob/master/LICENSE,Apache License 2.0,https://raw.githubusercontent.com/kubernetes/code-generator/master/LICENSE -kubernetes/gengo,https://github.com/kubernetes/gengo/blob/master/LICENSE,Apache License 2.0,https://raw.githubusercontent.com/kubernetes/gengo/master/LICENSE -kubernetes/klog,https://github.com/kubernetes/klog/blob/master/LICENSE,Apache License 2.0,https://raw.githubusercontent.com/kubernetes/klog/master/LICENSE -kubernetes/kube-openapi,https://github.com/kubernetes/kube-openapi/blob/master/LICENSE,Apache License 2.0,https://raw.githubusercontent.com/kubernetes/kube-openapi/master/LICENSE -kubernetes/kubernetes,https://github.com/kubernetes/kubernetes/blob/master/LICENSE,Apache License 2.0,https://raw.githubusercontent.com/kubernetes/kubernetes/master/LICENSE diff --git a/third_party_licenses/repo.txt b/third_party_licenses/repo.txt deleted file mode 100644 index 9337aa1303..0000000000 --- a/third_party_licenses/repo.txt +++ /dev/null @@ -1,57 +0,0 @@ -kubeflow/training-operator -GoogleCloudPlatform/gcloud-golang -PuerkitoBio/purell -PuerkitoBio/urlesc -davecgh/go-spew -docker/distribution -emicklei/go-restful -ghodss/yaml -go-openapi/jsonpointer -go-openapi/jsonreference -go-openapi/spec -go-openapi/swag -gogo/protobuf -golang/glog -golang/groupcache -golang/protobuf -google/btree -google/gofuzz -googleapis/gnostic -gregjones/httpcache -hashicorp/golang-lru -imdario/mergo -json-iterator/go -konsorten/go-windows-terminal-sequences -kubernetes-sigs/kube-batch -mailru/easyjson -modern-go/concurrent -modern-go/reflect2 -onrik/logrus -opencontainers/go-digest -petar/GoLLRB -peterbourgon/diskv -pmezard/go-difflib -sirupsen/logrus -spf13/pflag -stretchr/testify -golang/crypto -golang/net -golang/oauth2 -golang/sys -golang/text -golang/time -golang/tools -golang/appengine -go-inf/inf -square/go-jose -go-yaml/yaml -kubernetes/api -kubernetes/apiextensions-apiserver -kubernetes/apimachinery -kubernetes/apiserver -kubernetes/client-go -kubernetes/code-generator -kubernetes/gengo -kubernetes/klog -kubernetes/kube-openapi -kubernetes/kubernetes diff --git a/vendor.go b/vendor.go deleted file mode 100644 index 1d17379018..0000000000 --- a/vendor.go +++ /dev/null @@ -1,13 +0,0 @@ -//go:build vendor - -package main - -// This file exists to trick "go mod vendor" to include "main" packages. -// It is not expected to build, the build tag above is only to prevent this -// file from being included in builds. - -import ( - _ "k8s.io/code-generator" -) - -func main() {} From 63c8386dff3bed56854afd6f95a6fd60cf25d0bf Mon Sep 17 00:00:00 2001 From: Andrey Velichkevich Date: Thu, 16 Jan 2025 16:38:13 +0000 Subject: [PATCH 02/22] Refactor README and Makefile Signed-off-by: Andrey Velichkevich --- Makefile | 137 +++++++++++++++-------------------------- README.md | 100 ++++++++---------------------- generate | 1 + go.mod | 8 +-- go.sum | 6 -- hack/verify-codegen.sh | 30 +-------- 6 files changed, 79 insertions(+), 203 deletions(-) create mode 100644 generate diff --git a/Makefile b/Makefile index d031a45fb4..5e01696243 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,3 @@ -# Image URL to use all building/pushing image targets -IMG ?= kubeflow/training-operator:latest -# CRD generation options -CRD_OPTIONS ?= "crd:generateEmbeddedObjectMeta=true,maxDescLen=400" - # Get the currently used golang install path (in GOPATH/bin, unless GOBIN is set) ifeq (,$(shell go env GOBIN)) GOBIN=$(shell go env GOPATH)/bin @@ -16,8 +11,6 @@ endif SHELL = /usr/bin/env bash -o pipefail .SHELLFLAGS = -ec -all: build - ##@ General # The help target prints out all targets with their descriptions organized @@ -36,6 +29,40 @@ help: ## Display this help. ##@ Development +PROJECT_DIR := $(shell dirname $(abspath $(lastword $(MAKEFILE_LIST)))) + + +## Tool Binaries +LOCALBIN ?= $(PROJECT_DIR)/bin +CONTROLLER_GEN ?= $(LOCALBIN)/controller-gen +ENVTEST ?= $(LOCALBIN)/setup-envtest + +ENVTEST_K8S_VERSION ?= 1.31 + +.PHONY: envtest +envtest: ## Download the setup-envtest binary if required. + test -s $(ENVTEST) || GOBIN=$(LOCALBIN) go install sigs.k8s.io/controller-runtime/tools/setup-envtest@release-0.19 + +.PHONY: controller-gen +controller-gen: ## Download the controller-gen binary if required. + test -s $(CONTROLLER_GEN) || GOBIN=$(LOCALBIN) go install sigs.k8s.io/controller-tools/cmd/controller-gen@v0.16.5 + +# Download external CRDs for the integration testings. +EXTERNAL_CRDS_DIR ?= $(PROJECT_DIR)/manifests/external-crds + +JOBSET_ROOT = $(shell go list -m -mod=readonly -f "{{.Dir}}" sigs.k8s.io/jobset) +.PHONY: jobset-operator-crd +jobset-operator-crd: ## Copy the CRDs from the JobSet repository to the manifests/external-crds directory. + mkdir -p $(EXTERNAL_CRDS_DIR)/jobset-operator/ + cp -f $(JOBSET_ROOT)/config/components/crd/bases/* $(EXTERNAL_CRDS_DIR)/jobset-operator/ + +SCHEDULER_PLUGINS_ROOT = $(shell go list -m -f "{{.Dir}}" sigs.k8s.io/scheduler-plugins) +.PHONY: scheduler-plugins-crd +scheduler-plugins-crd: ## Copy the CRDs from the Scheduler Plugins repository to the manifests/external-crds directory. + mkdir -p $(EXTERNAL_CRDS_DIR)/scheduler-plugins/ + cp -f $(SCHEDULER_PLUGINS_ROOT)/manifests/coscheduling/* $(EXTERNAL_CRDS_DIR)/scheduler-plugins + +.PHONY: manifests manifests: controller-gen ## Generate manifests. $(CONTROLLER_GEN) "crd:generateEmbeddedObjectMeta=true" rbac:roleName=training-operator-v2 webhook \ paths="./pkg/apis/kubeflow.org/v2alpha1/...;./pkg/controller.v2/...;./pkg/runtime.v2/...;./pkg/webhooks.v2/...;./pkg/cert/..." \ @@ -43,101 +70,37 @@ manifests: controller-gen ## Generate manifests. output:rbac:artifacts:config=manifests/v2/base/rbac \ output:webhook:artifacts:config=manifests/v2/base/webhook +.PHONY: generate generate: go-mod-download manifests ## Generate APIs and SDK. $(CONTROLLER_GEN) object:headerFile="hack/boilerplate/boilerplate.go.txt" paths="./pkg/apis/..." hack/update-codegen.sh hack/python-sdk-v2/gen-sdk.sh -fmt: ## Run go fmt against code. +.PHONY: go-mod-download +go-mod-download: ## Run go mod download to download modules. + go mod download + +.PHONY: fmt +fmt: ## Run go fmt against the code. go fmt ./... -vet: ## Run go vet against code. +.PHONY: vet +vet: ## Run go vet against the code. go vet ./... GOLANGCI_LINT=$(shell which golangci-lint) -golangci-lint: +.PHONY: golangci-lint +golangci-lint: ## Run golangci-lint to verify Go files. ifeq ($(GOLANGCI_LINT),) curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(shell go env GOPATH)/bin v1.61.0 $(info golangci-lint has been installed) endif golangci-lint run --timeout 5m --go 1.23 ./... -ENVTEST_K8S_VERSION ?= 1.31 -HAS_SETUP_ENVTEST := $(shell command -v setup-envtest;) - -testall: manifests generate fmt vet golangci-lint test ## Run tests. - -test: envtest - KUBEBUILDER_ASSETS="$(shell setup-envtest use $(ENVTEST_K8S_VERSION) -p path)" \ - go test ./pkg/apis/kubeflow.org/v1/... ./pkg/cert/... ./pkg/common/... ./pkg/config/... ./pkg/controller.v1/... ./pkg/core/... ./pkg/util/... ./pkg/webhooks/... -coverprofile cover.out - -.PHONY: test-integrationv2 -test-integrationv2: envtest jobset-operator-crd scheduler-plugins-crd - KUBEBUILDER_ASSETS="$(shell setup-envtest use $(ENVTEST_K8S_VERSION) -p path)" go test ./test/... -coverprofile cover.out - -.PHONY: testv2 -testv2: +.PHONY: test +test: ## Run Go unit test. go test ./pkg/apis/kubeflow.org/v2alpha1/... ./pkg/controller.v2/... ./pkg/runtime.v2/... ./pkg/webhooks.v2/... ./pkg/util.v2/... -coverprofile cover.out -envtest: -ifndef HAS_SETUP_ENVTEST - go install sigs.k8s.io/controller-runtime/tools/setup-envtest@release-0.19 - @echo "setup-envtest has been installed" -endif - @echo "setup-envtest has already installed" - -build: generate fmt vet ## Build manager binary. - go build -o bin/manager cmd/training-operator.v1/main.go - -run: manifests generate fmt vet ## Run a controller from your host. - go run ./cmd/training-operator.v1/main.go - -docker-build: test ## Build docker image with the manager. - docker build -t ${IMG} -f build/images/training-operator/Dockerfile . - -docker-push: ## Push docker image with the manager. - docker push ${IMG} - -##@ Deployment - -install: manifests kustomize ## Install CRDs into the K8s cluster specified in ~/.kube/config. - $(KUSTOMIZE) build manifests/base/crds | kubectl apply --server-side -f - - -uninstall: manifests kustomize ## Uninstall CRDs from the K8s cluster specified in ~/.kube/config. - $(KUSTOMIZE) build manifests/base/crds | kubectl delete -f - - -deploy: manifests kustomize ## Deploy controller to the K8s cluster specified in ~/.kube/config. - cd manifests/overlays/standalone && $(KUSTOMIZE) edit set image kubeflow/training-operator=${IMG} - $(KUSTOMIZE) build manifests/overlays/standalone | kubectl apply -f - - -undeploy: ## Undeploy controller from the K8s cluster specified in ~/.kube/config. - $(KUSTOMIZE) build manifests/overlays/standalone | kubectl delete -f - - -PROJECT_DIR := $(shell dirname $(abspath $(lastword $(MAKEFILE_LIST)))) - -.PHONY: go-mod-download -go-mod-download: - go mod download - -CONTROLLER_GEN = $(shell pwd)/bin/controller-gen -controller-gen: ## Download controller-gen locally if necessary. - GOBIN=$(PROJECT_DIR)/bin go install sigs.k8s.io/controller-tools/cmd/controller-gen@v0.16.5 - -KUSTOMIZE = $(shell pwd)/bin/kustomize -kustomize: ## Download kustomize locally if necessary. - GOBIN=$(PROJECT_DIR)/bin go install sigs.k8s.io/kustomize/kustomize/v5@v5.4.3 - -## Download external CRDs for the integration testings. -EXTERNAL_CRDS_DIR ?= $(PROJECT_DIR)/manifests/external-crds - -JOBSET_ROOT = $(shell go list -m -mod=readonly -f "{{.Dir}}" sigs.k8s.io/jobset) -.PHONY: jobset-operator-crd -jobset-operator-crd: ## Copy the CRDs from the jobset-operator to the manifests/external-crds directory. - mkdir -p $(EXTERNAL_CRDS_DIR)/jobset-operator/ - cp -f $(JOBSET_ROOT)/config/components/crd/bases/* $(EXTERNAL_CRDS_DIR)/jobset-operator/ - -SCHEDULER_PLUGINS_ROOT = $(shell go list -m -f "{{.Dir}}" sigs.k8s.io/scheduler-plugins) -.PHONY: scheduler-plugins-crd -scheduler-plugins-crd: - mkdir -p $(EXTERNAL_CRDS_DIR)/scheduler-plugins/ - cp -f $(SCHEDULER_PLUGINS_ROOT)/manifests/coscheduling/* $(EXTERNAL_CRDS_DIR)/scheduler-plugins +.PHONY: test-integration +test-integration: envtest jobset-operator-crd scheduler-plugins-crd ## Run Go integration test. + KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) -p path)" go test ./test/... -coverprofile cover.out diff --git a/README.md b/README.md index 681999e5f5..54b2a91615 100644 --- a/README.md +++ b/README.md @@ -1,112 +1,62 @@ -# Kubeflow Training Operator +# Kubeflow Trainer [![Build Status](https://github.com/kubeflow/training-operator/actions/workflows/test-go.yaml/badge.svg?branch=master)](https://github.com/kubeflow/training-operator/actions/workflows/test-go.yaml?branch=master) [![Coverage Status](https://coveralls.io/repos/github/kubeflow/training-operator/badge.svg?branch=master)](https://coveralls.io/github/kubeflow/training-operator?branch=master) [![Go Report Card](https://goreportcard.com/badge/github.com/kubeflow/training-operator)](https://goreportcard.com/report/github.com/kubeflow/training-operator) -## Overview - -Kubeflow Training Operator is a Kubernetes-native project for fine-tuning and -scalable distributed training of machine learning (ML) models created with various ML frameworks -such as PyTorch, TensorFlow, HuggingFace, [JAX](https://jax.readthedocs.io/en/latest/), DeepSpeed, XGBoost, PaddlePaddle and others. - -You can run high-performance computing (HPC) tasks with the Training Operator and `MPIJob` since it -supports running Message Passing Interface (MPI) on Kubernetes which is heavily used for HPC. -The Training Operator implements the V1 API version of MPI Operator. For the MPI Operator V2 version, -please follow [this guide](https://www.kubeflow.org/docs/components/training/user-guides/mpi/) to -install MPI Operator V2. - -The Training Operator allows you to use Kubernetes workloads to effectively train your large models -via [Kubernetes Custom Resources APIs](https://kubernetes.io/docs/concepts/extend-kubernetes/api-extension/custom-resources/) -or using the Training Operator Python SDK. - -## Prerequisites - -Please check [the official Kubeflow documentation](https://www.kubeflow.org/docs/components/training/installation/#prerequisites) -for prerequisites to install the Training Operator. - -## Installation - -Please follow [the Kubeflow Training Operator guide](https://www.kubeflow.org/docs/components/training/installation/#installing-the-training-operator) -for the detailed instructions on how to install Training Operator. - -### Installing the Control Plane +TODO (andreyvelich): Add logo -Run the following command to install the latest stable release of the Training Operator control plane: `v1.8.0`. +TODO (andreyvelich): Add diagram -```bash -kubectl apply --server-side -k "github.com/kubeflow/training-operator.git/manifests/overlays/standalone?ref=v1.8.0" -``` +## Overview -Run the following command to install the latest changes of the Training Operator control plane: +Kubeflow Trainer is a Kubernetes-native project designed for large language models (LLMs) +fine-tuning and enabling scalable, distributed training of machine learning (ML) models across +various frameworks, including PyTorch, JAX, TensorFlow, and others. -```bash -kubectl apply --server-side -k "github.com/kubeflow/training-operator/manifests/overlays/standalone" -``` +You can integrate other ML libraries such as [HuggingFace](https://huggingface.co), +[DeepSpeed](https://github.com/microsoft/DeepSpeed), or [Megatron-LM](https://github.com/NVIDIA/Megatron-LM) +with Kubeflow Training to orchestrate their ML training on Kubernetes. -### Installing the Python SDK +Kubeflow Trainer allows you effortlessly develop your LLMs with the Kubeflow Python SDK and +build Kubernetes-native Training Runtimes with Kubernetes Custom Resources APIs. -The Training Operator [implements a Python SDK](https://pypi.org/project/kubeflow-training/) -to simplify creation of distributed training and fine-tuning jobs for Data Scientists. +## Kubeflow Trainer Introduction -Run the following command to install the latest stable release of the Training SDK: +The following KubeCon + CloudNativeCon 2024 talk provides an overview of Kubeflow Trainer capabilities: -``` -pip install -U kubeflow-training -``` +[![Kubeflow Trainer](https://img.youtube.com/vi/Lgy4ir1AhYw/0.jpg)](https://www.youtube.com/watch?v=Lgy4ir1AhYw) ## Getting Started -Please refer to [the getting started guide](https://www.kubeflow.org/docs/components/training/getting-started/#getting-started-with-pytorchjob) -to quickly create your first distributed training job using the Python SDK. - -If you want to work directly with Kubernetes Custom Resources provided by Training Operator, -follow [the PyTorchJob MNIST guide](https://www.kubeflow.org/docs/components/training/pytorch/#creating-a-pytorch-training-job). +Please check [the official Kubeflow documentation](https://www.kubeflow.org/docs/components/training/getting-started) +to install and get started with Kubeflow Trainer. ## Community The following links provide information on how to get involved in the community: -- Attend [the bi-weekly AutoML and Training Working Group](https://bit.ly/2PWVCkV) community meeting. - Join our [`#kubeflow-training` Slack channel](https://www.kubeflow.org/docs/about/community/#kubeflow-slack). +- Attend [the bi-weekly AutoML and Training Working Group](https://bit.ly/2PWVCkV) community meeting. - Check out [who is using the Training Operator](ADOPTERS.md). -This is a part of Kubeflow, so please see [readme in kubeflow/kubeflow](https://github.com/kubeflow/kubeflow#get-involved) to get in touch with the community. - ## Contributing Please refer to the [CONTRIBUTING guide](CONTRIBUTING.md). -## Change Log +## Changelog Please refer to the [CHANGELOG](CHANGELOG.md). -## Version Matrix - -The following table lists the most recent few versions of the operator. - -| Operator Version | API Version | Kubernetes Version | -| ---------------------- | ----------- | ------------------ | -| `v1.4.x` | `v1` | 1.23+ | -| `v1.5.x` | `v1` | 1.23+ | -| `v1.6.x` | `v1` | 1.23+ | -| `v1.7.x` | `v1` | 1.25+ | -| `v1.8.x` | `v1` | 1.27+ | -| `latest` (master HEAD) | `v1` | 1.27+ | - -## Reference +## Kubeflow Training Operator V1 -For a complete reference of the custom resource definitions, please refer to the API Definition. +Kubeflow Trainer project is currently in alpha status, and APIs may change. +If you are using Kubeflow Training Operator V1, please refer [to this migration document](/docs/components/training/operator-guides/migration). -- [TensorFlow API Definition](pkg/apis/kubeflow.org/v1/tensorflow_types.go) -- [PyTorch API Definition](pkg/apis/kubeflow.org/v1/pytorch_types.go) -- [XGBoost API Definition](pkg/apis/kubeflow.org/v1/xgboost_types.go) -- [MPI API Definition](pkg/apis/kubeflow.org/v1/mpi_types.go) -- [PaddlePaddle API Definition](pkg/apis/kubeflow.org/v1/paddlepaddle_types.go) -- [JAX API Definition](pkg/apis/kubeflow.org/v1/jax_types.go) +Kubeflow Community will maintain the Training Operator V1 source code at +[the `release-1.9` branch](https://github.com/kubeflow/training-operator/tree/release-1.9). -For details on the Training Operator custom resources APIs, refer to -[the following API documentation](docs/api/kubeflow.org_v1_generated.asciidoc) +You can find the documentation for Kubeflow Training V1 in [these guides](https://www.kubeflow.org/docs/components/training/legacy-v1). ## Acknowledgement diff --git a/generate b/generate new file mode 100644 index 0000000000..9daeafb986 --- /dev/null +++ b/generate @@ -0,0 +1 @@ +test diff --git a/go.mod b/go.mod index b0717b2bf9..0b9ac1d313 100644 --- a/go.mod +++ b/go.mod @@ -8,9 +8,6 @@ require ( github.com/onsi/ginkgo/v2 v2.20.1 github.com/onsi/gomega v1.35.1 github.com/open-policy-agent/cert-controller v0.12.0 - github.com/prometheus/client_golang v1.20.2 - github.com/sirupsen/logrus v1.9.3 - github.com/stretchr/testify v1.9.0 go.uber.org/zap v1.27.0 k8s.io/api v0.31.3 k8s.io/apimachinery v0.31.3 @@ -24,8 +21,6 @@ require ( sigs.k8s.io/kueue v0.6.3 sigs.k8s.io/scheduler-plugins v0.28.9 sigs.k8s.io/structured-merge-diff/v4 v4.4.1 - sigs.k8s.io/yaml v1.4.0 - volcano.sh/apis v1.9.0 ) require ( @@ -58,7 +53,7 @@ require ( github.com/modern-go/reflect2 v1.0.2 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/pkg/errors v0.9.1 // indirect - github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect + github.com/prometheus/client_golang v1.20.2 // indirect github.com/prometheus/client_model v0.6.1 // indirect github.com/prometheus/common v0.55.0 // indirect github.com/prometheus/procfs v0.15.1 // indirect @@ -85,4 +80,5 @@ require ( k8s.io/apiextensions-apiserver v0.31.2 // indirect k8s.io/gengo/v2 v2.0.0-20240228010128-51d4e06bde70 // indirect sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect + sigs.k8s.io/yaml v1.4.0 // indirect ) diff --git a/go.sum b/go.sum index f0bee8f0a9..2511b03550 100644 --- a/go.sum +++ b/go.sum @@ -94,13 +94,10 @@ github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0leargg github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8= github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4= -github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= -github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= -github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= @@ -175,7 +172,6 @@ gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= -gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= k8s.io/api v0.31.3 h1:umzm5o8lFbdN/hIXbrK9oRpOproJO62CV1zqxXrLgk8= @@ -212,5 +208,3 @@ sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+s sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08= sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY= -volcano.sh/apis v1.9.0 h1:e+9yEbQOi6HvgaayAxYULT6n+59mkYvmqjKhp9Z06sY= -volcano.sh/apis v1.9.0/go.mod h1:yXNfsZRzAOq6EUyPJYFrlMorh1XsYQGonGWyr4IiznM= diff --git a/hack/verify-codegen.sh b/hack/verify-codegen.sh index c8f3ae33b0..1d66e1c743 100755 --- a/hack/verify-codegen.sh +++ b/hack/verify-codegen.sh @@ -16,35 +16,7 @@ set -o errexit set -o nounset -set -o pipefail -CURRENT_DIR=$(dirname "${BASH_SOURCE[0]}") -DIFFROOT="${CURRENT_DIR}" -TMP_DIFFROOT="$(mktemp -d -t "$(basename "$0").XXXXXX")" - -cleanup() { - rm -rf "${TMP_DIFFROOT}" -} -trap "cleanup" EXIT SIGINT - -cleanup - -mkdir -p "${TMP_DIFFROOT}" -cp -a "${DIFFROOT}"/* "${TMP_DIFFROOT}" - -echo $TMP_DIFFROOT -echo $DIFFROOT - -# Generate files. make generate -echo "diffing ${DIFFROOT} against freshly generated codegen" -ret=0 -diff -Naupr -x.gitignore "${DIFFROOT}" "${TMP_DIFFROOT}" || ret=$? - -if [[ $ret -eq 0 ]]; then - echo "${DIFFROOT} up to date." -else - echo "${DIFFROOT} is out of date. Please run make generate" - exit 1 -fi +git diff --exit-code || echo -e "\n\nPlease run make generate to update files" && exit 1 From bf1c532cc4c5b02f2042c5241be6c204bc60715b Mon Sep 17 00:00:00 2001 From: Andrey Velichkevich Date: Thu, 16 Jan 2025 20:45:31 +0000 Subject: [PATCH 03/22] Remove v1 GitHub workflows Signed-off-by: Andrey Velichkevich --- .../workflows/build-and-publish-images.yaml | 75 ----------------- .github/workflows/e2e-test-train-api.yaml | 61 -------------- .github/workflows/pre-commit.yaml | 8 +- .../workflows/publish-conformance-images.yaml | 24 ------ .github/workflows/publish-core-images.yaml | 54 ------------- .github/workflows/publish-example-images.yaml | 80 ------------------- .github/workflows/publish-images.yaml | 70 ++++++++++++++++ .../template-publish-image/action.yaml | 2 +- .github/workflows/test-e2e.yaml | 29 +++++++ .github/workflows/test-example-notebooks.yaml | 39 --------- .github/workflows/test-go.yaml | 70 +++++++++++++--- .github/workflows/test-python.yaml | 5 +- .github/workflows/unittests.yaml | 56 ------------- Makefile | 1 + hack/verify-codegen.sh | 22 ----- 15 files changed, 165 insertions(+), 431 deletions(-) delete mode 100644 .github/workflows/build-and-publish-images.yaml delete mode 100644 .github/workflows/e2e-test-train-api.yaml delete mode 100644 .github/workflows/publish-conformance-images.yaml delete mode 100644 .github/workflows/publish-core-images.yaml delete mode 100644 .github/workflows/publish-example-images.yaml create mode 100644 .github/workflows/publish-images.yaml create mode 100644 .github/workflows/test-e2e.yaml delete mode 100644 .github/workflows/test-example-notebooks.yaml delete mode 100644 .github/workflows/unittests.yaml delete mode 100755 hack/verify-codegen.sh diff --git a/.github/workflows/build-and-publish-images.yaml b/.github/workflows/build-and-publish-images.yaml deleted file mode 100644 index 237300192e..0000000000 --- a/.github/workflows/build-and-publish-images.yaml +++ /dev/null @@ -1,75 +0,0 @@ -# Reusable workflows for publishing Training Operator images. -name: Build And Publish Images - -on: - workflow_call: - inputs: - component-name: - required: true - type: string - platforms: - required: true - type: string - dockerfile: - required: true - type: string - context: - required: false - type: string - default: . - tag-prefix: - required: false - type: string - default: v1 - secrets: - DOCKERHUB_USERNAME: - required: false - DOCKERHUB_TOKEN: - required: false - -jobs: - build-and-publish: - name: Publish Image - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v4 - - - name: Free-Up Disk Space - uses: ./.github/workflows/free-up-disk-space - - - name: Docker Login - # Trigger workflow only for kubeflow/training-operator repository with specific branch (master, v.*-branch, release-*) or tag (v.*). - if: >- - github.repository == 'kubeflow/training-operator' && - (github.ref == 'refs/heads/master' || (startsWith(github.ref, 'refs/heads/v') && endsWith(github.ref, '-branch')) || startsWith(github.ref, 'refs/heads/release-') || startsWith(github.ref, 'refs/tags/v')) - uses: docker/login-action@v3 - with: - username: ${{ secrets.DOCKERHUB_USERNAME }} - password: ${{ secrets.DOCKERHUB_TOKEN }} - - - name: Publish Component ${{ inputs.component-name }} - # Trigger workflow only for kubeflow/training-operator repository with specific branch (master, v.*-branch, release-*) or tag (v.*). - if: >- - github.repository == 'kubeflow/training-operator' && - (github.ref == 'refs/heads/master' || (startsWith(github.ref, 'refs/heads/v') && endsWith(github.ref, '-branch')) || startsWith(github.ref, 'refs/heads/release-') || startsWith(github.ref, 'refs/tags/v')) - id: publish - uses: ./.github/workflows/template-publish-image - with: - image: docker.io/kubeflow/${{ inputs.component-name }} - dockerfile: ${{ inputs.dockerfile }} - platforms: ${{ inputs.platforms }} - context: ${{ inputs.context }} - push: true - tag-prefix: ${{ inputs.tag-prefix }} - - - name: Test Build For Component ${{ inputs.component-name }} - if: steps.publish.outcome == 'skipped' - uses: ./.github/workflows/template-publish-image - with: - image: docker.io/kubeflow/${{ inputs.component-name }} - dockerfile: ${{ inputs.dockerfile }} - platforms: ${{ inputs.platforms }} - context: ${{ inputs.context }} - push: false - tag-prefix: ${{ inputs.tag-prefix }} diff --git a/.github/workflows/e2e-test-train-api.yaml b/.github/workflows/e2e-test-train-api.yaml deleted file mode 100644 index 045c3b19e2..0000000000 --- a/.github/workflows/e2e-test-train-api.yaml +++ /dev/null @@ -1,61 +0,0 @@ -name: E2E Test with train API -on: - - pull_request - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true - -jobs: - e2e-test: - runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - kubernetes-version: ["v1.31.4"] - python-version: ["3.9", "3.10", "3.11"] - steps: - - name: Checkout - uses: actions/checkout@v4 - - - name: Setup E2E Tests - uses: ./.github/workflows/setup-e2e-test - with: - kubernetes-version: ${{ matrix.kubernetes-version }} - python-version: ${{ matrix.python-version }} - - - name: Build trainer - run: | - ./scripts/gha/build-trainer.sh - env: - TRAINER_CI_IMAGE: kubeflowtraining/trainer:test - - - name: Load trainer - run: | - kind load docker-image ${{ env.TRAINER_CI_IMAGE }} --name ${{ env.KIND_CLUSTER }} - env: - KIND_CLUSTER: training-operator-cluster - TRAINER_CI_IMAGE: kubeflowtraining/trainer:test - - - name: Build storage initializer - run: | - ./scripts/gha/build-storage-initializer.sh - env: - STORAGE_INITIALIZER_CI_IMAGE: kubeflowtraining/storage-initializer:test - TRAINER_CI_IMAGE: kubeflowtraining/trainer:test - - - name: Load storage initializer - run: | - kind load docker-image ${{ env.STORAGE_INITIALIZER_CI_IMAGE }} --name ${{ env.KIND_CLUSTER }} - env: - KIND_CLUSTER: training-operator-cluster - STORAGE_INITIALIZER_CI_IMAGE: kubeflowtraining/storage-initializer:test - - - name: Run tests - run: | - pip install pytest - python3 -m pip install -e sdk/python[huggingface] - pytest -s sdk/python/test/e2e-fine-tune-llm/test_e2e_pytorch_fine_tune_llm.py --log-cli-level=debug - env: - STORAGE_INITIALIZER_IMAGE: kubeflowtraining/storage-initializer:test - TRAINER_TRANSFORMER_IMAGE: kubeflowtraining/trainer:test diff --git a/.github/workflows/pre-commit.yaml b/.github/workflows/pre-commit.yaml index 2b11178bf9..b1399192bb 100644 --- a/.github/workflows/pre-commit.yaml +++ b/.github/workflows/pre-commit.yaml @@ -3,12 +3,12 @@ name: pre-commit on: pull_request: push: - branches: [main] + branches: [master] jobs: pre-commit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 - - uses: actions/setup-python@v3 - - uses: pre-commit/action@v3.0.1 + - uses: actions/checkout@v3 + - uses: actions/setup-python@v3 + - uses: pre-commit/action@v3.0.1 diff --git a/.github/workflows/publish-conformance-images.yaml b/.github/workflows/publish-conformance-images.yaml deleted file mode 100644 index c1a18d1e28..0000000000 --- a/.github/workflows/publish-conformance-images.yaml +++ /dev/null @@ -1,24 +0,0 @@ -name: Publish Training Operator Conformance Test Images - -on: - - push - - pull_request - -jobs: - conformance-test: - name: Publish Image - uses: ./.github/workflows/build-and-publish-images.yaml - with: - component-name: ${{ matrix.component-name }} - platforms: linux/amd64,linux/arm64,linux/ppc64le - dockerfile: ${{ matrix.dockerfile }} - secrets: - DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} - DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }} - - strategy: - fail-fast: false - matrix: - include: - - component-name: training-operator-conformance - dockerfile: sdk/python/Dockerfile.conformance diff --git a/.github/workflows/publish-core-images.yaml b/.github/workflows/publish-core-images.yaml deleted file mode 100644 index 5835a38126..0000000000 --- a/.github/workflows/publish-core-images.yaml +++ /dev/null @@ -1,54 +0,0 @@ -name: Publish Training Operator Core Images - -on: - - push - - pull_request - -jobs: - core: - name: Publish Image - uses: ./.github/workflows/build-and-publish-images.yaml - with: - component-name: ${{ matrix.component-name }} - platforms: ${{ matrix.platforms }} - dockerfile: ${{ matrix.dockerfile }} - context: ${{ matrix.context }} - tag-prefix: ${{ matrix.tag-prefix }} - secrets: - DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} - DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }} - - strategy: - fail-fast: false - matrix: - include: - - component-name: training-operator - dockerfile: build/images/training-operator/Dockerfile - platforms: linux/amd64,linux/arm64,linux/ppc64le - tag-prefix: v1 - - component-name: training-operator-v2 - dockerfile: cmd/training-operator.v2alpha1/Dockerfile - platforms: linux/amd64,linux/arm64,linux/ppc64le - tag-prefix: v2alpha1 - - component-name: model-initializer-v2 - dockerfile: cmd/initializer_v2/model/Dockerfile - platforms: linux/amd64,linux/arm64 - tag-prefix: v2 - - component-name: dataset-initializer-v2 - dockerfile: cmd/initializer_v2/dataset/Dockerfile - platforms: linux/amd64,linux/arm64 - tag-prefix: v2 - - component-name: kubectl-delivery - dockerfile: build/images/kubectl-delivery/Dockerfile - platforms: linux/amd64,linux/arm64,linux/ppc64le - tag-prefix: v1 - - component-name: storage-initializer - dockerfile: sdk/python/kubeflow/storage_initializer/Dockerfile - context: sdk/python/kubeflow/storage_initializer - platforms: linux/amd64,linux/arm64 - tag-prefix: v1 - - component-name: trainer-huggingface - dockerfile: sdk/python/kubeflow/trainer/Dockerfile - context: sdk/python/kubeflow/trainer - platforms: linux/amd64,linux/arm64 - tag-prefix: v1 diff --git a/.github/workflows/publish-example-images.yaml b/.github/workflows/publish-example-images.yaml deleted file mode 100644 index 5012714b57..0000000000 --- a/.github/workflows/publish-example-images.yaml +++ /dev/null @@ -1,80 +0,0 @@ -name: Publish Training Operator Example Images - -on: - - push - - pull_request - -jobs: - example: - name: Publish Image - uses: ./.github/workflows/build-and-publish-images.yaml - with: - component-name: ${{ matrix.component-name }} - platforms: ${{ matrix.platforms }} - dockerfile: ${{ matrix.dockerfile }} - context: ${{ matrix.context }} - secrets: - DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} - DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }} - - strategy: - fail-fast: false - matrix: - include: - - component-name: tf-dist-mnist-test - platforms: linux/amd64,linux/arm64 - dockerfile: examples/tensorflow/dist-mnist/Dockerfile - - component-name: tf-multi-worker-strategy - platforms: linux/amd64,linux/arm64 - dockerfile: examples/tensorflow/distribution_strategy/Dockerfile - - component-name: tf-mnist-with-summaries - platforms: linux/amd64,linux/arm64 - dockerfile: examples/tensorflow/mnist_with_summaries/Dockerfile - - component-name: tf-smoke - platforms: linux/amd64,linux/arm64 - dockerfile: examples/tensorflow/tf_sample/Dockerfile - - component-name: pytorch-dist-sendrecv-test - platforms: linux/amd64,linux/arm64 - dockerfile: examples/pytorch/smoke-dist/Dockerfile - - component-name: pytorch-elastic-example-imagenet - platforms: linux/amd64,linux/arm64 - dockerfile: examples/pytorch/elastic/imagenet/Dockerfile - - component-name: pytorch-elastic-example-echo - platforms: linux/amd64,linux/arm64 - dockerfile: examples/pytorch/elastic/echo/Dockerfile - - component-name: pytorch-torchrun-cpu - platforms: linux/amd64,linux/arm64 - dockerfile: examples/pytorch/cpu-demo/Dockerfile - context: examples/pytorch/cpu-demo - - component-name: xgboost-dist-iris - platforms: linux/amd64,linux/arm64 - dockerfile: examples/xgboost/xgboost-dist/Dockerfile - context: examples/xgboost/xgboost-dist - - component-name: lightgbm-dist-py-test - platforms: linux/amd64,linux/arm64 - dockerfile: examples/xgboost/lightgbm-dist/Dockerfile - context: examples/xgboost/lightgbm-dist - - component-name: xgboost-dist-rabit-test - platforms: linux/amd64,linux/arm64 - dockerfile: examples/xgboost/smoke-dist/Dockerfile - context: examples/xgboost/smoke-dist - - component-name: pytorch-dist-mnist - platforms: linux/amd64,linux/arm64 - dockerfile: examples/pytorch/mnist/Dockerfile - context: examples/pytorch/mnist - - component-name: pytorch-dist-mnist-mpi - platforms: linux/amd64,linux/arm64 - dockerfile: examples/pytorch/mnist/Dockerfile-mpi - context: examples/pytorch/mnist - - component-name: jaxjob-simple - platforms: linux/amd64,linux/arm64 - dockerfile: examples/jax/cpu-demo/Dockerfile - context: examples/jax/cpu-demo - - component-name: pytorch-deepspeed-demo - platforms: linux/amd64 - dockerfile: examples/pytorch/deepspeed-demo/Dockerfile - context: examples/pytorch/deepspeed-demo - - component-name: jaxjob-dist-spmd-mnist - platforms: linux/amd64,linux/arm64 - dockerfile: examples/jax/jax-dist-spmd-mnist/Dockerfile - context: examples/jax/jax-dist-spmd-mnist/ diff --git a/.github/workflows/publish-images.yaml b/.github/workflows/publish-images.yaml new file mode 100644 index 0000000000..3aa0f5048c --- /dev/null +++ b/.github/workflows/publish-images.yaml @@ -0,0 +1,70 @@ +name: Publish Kubeflow Trainer Images + +on: + - push + - pull_request + +jobs: + build-and-publish: + name: Build and Publish Images + runs-on: ubuntu-latest + + strategy: + fail-fast: false + matrix: + include: + - component-name: training-operator-v2 + dockerfile: cmd/training-operator.v2alpha1/Dockerfile + platforms: linux/amd64,linux/arm64,linux/ppc64le + tag-prefix: v2alpha1 + - component-name: model-initializer-v2 + dockerfile: cmd/initializer_v2/model/Dockerfile + platforms: linux/amd64,linux/arm64 + tag-prefix: v2 + - component-name: dataset-initializer-v2 + dockerfile: cmd/initializer_v2/dataset/Dockerfile + platforms: linux/amd64,linux/arm64 + tag-prefix: v2 + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Free-Up Disk Space + uses: ./.github/workflows/free-up-disk-space + + - name: Docker Login + # Trigger workflow only for kubeflow/training-operator repository with specific branch (master, release-*) or tag (v.*). + if: >- + github.repository == 'kubeflow/training-operator' && + (github.ref == 'refs/heads/master' || startsWith(github.ref, 'refs/heads/release-') || startsWith(github.ref, 'refs/tags/v')) + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + + - name: Publish Component ${{ matrix.component-name }} + # Trigger workflow only for kubeflow/training-operator repository with specific branch (master, release-*) or tag (v.*). + if: >- + github.repository == 'kubeflow/training-operator' && + (github.ref == 'refs/heads/master' || startsWith(github.ref, 'refs/heads/release-') || startsWith(github.ref, 'refs/tags/v')) + id: publish + uses: ./.github/workflows/template-publish-image + with: + image: docker.io/kubeflow/${{ matrix.component-name }} + dockerfile: ${{ matrix.dockerfile }} + platforms: ${{ matrix.platforms }} + context: ${{ matrix.context }} + tag-prefix: ${{ matrix.tag-prefix }} + push: true + + - name: Test Build For Component ${{ matrix.component-name }} + if: steps.publish.outcome == 'skipped' + uses: ./.github/workflows/template-publish-image + with: + image: docker.io/kubeflow/${{ matrix.component-name }} + dockerfile: ${{ matrix.dockerfile }} + platforms: ${{ matrix.platforms }} + context: ${{ matrix.context }} + tag-prefix: ${{ matrix.tag-prefix }} + push: false diff --git a/.github/workflows/template-publish-image/action.yaml b/.github/workflows/template-publish-image/action.yaml index 0987f134ab..324364b01c 100644 --- a/.github/workflows/template-publish-image/action.yaml +++ b/.github/workflows/template-publish-image/action.yaml @@ -22,7 +22,7 @@ inputs: tag-prefix: required: false default: v1 - description: Prefix for the image tag, e.g. v1 or v2alpha1 + description: Prefix for the image tag, e.g. v2alpha1 runs: using: composite diff --git a/.github/workflows/test-e2e.yaml b/.github/workflows/test-e2e.yaml new file mode 100644 index 0000000000..b9e0583973 --- /dev/null +++ b/.github/workflows/test-e2e.yaml @@ -0,0 +1,29 @@ +name: E2E Test + +on: + - push + - pull_request + +jobs: + e2e-test: + name: E2E Test + runs-on: ubuntu-latest + + strategy: + fail-fast: false + matrix: + python-version: ["3.10", "3.11"] + + steps: + - name: Check out code + uses: actions/checkout@v4 + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Install dependencies + run: | + echo "TODO (andreyvelich): Implement E2E Tests" + # pip install -U './sdk_v2' diff --git a/.github/workflows/test-example-notebooks.yaml b/.github/workflows/test-example-notebooks.yaml deleted file mode 100644 index 0ee767e165..0000000000 --- a/.github/workflows/test-example-notebooks.yaml +++ /dev/null @@ -1,39 +0,0 @@ -name: Test example notebooks - -on: - - pull_request - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true - -jobs: - create-pytorchjob-notebook-test: - runs-on: ubuntu-latest - timeout-minutes: 30 - strategy: - fail-fast: false - matrix: - kubernetes-version: ["v1.28.7", "v1.29.2", "v1.30.6"] - python-version: ["3.9", "3.10", "3.11"] - steps: - - name: Checkout - uses: actions/checkout@v4 - - - name: Setup E2E Tests - uses: ./.github/workflows/setup-e2e-test - with: - kubernetes-version: ${{ matrix.kubernetes-version }} - python-version: ${{ matrix.python-version }} - - - name: Install Python Dependencies - run: | - pip install papermill==2.6.0 jupyter==1.1.1 ipykernel==6.29.5 - - - name: Run Jupyter Notebook with Papermill - shell: bash - run: | - ./scripts/run-notebook.sh \ - -i ./examples/pytorch/image-classification/create-pytorchjob.ipynb \ - -n default \ - -k ./sdk/python diff --git a/.github/workflows/test-go.yaml b/.github/workflows/test-go.yaml index b314f73724..776aa195f3 100644 --- a/.github/workflows/test-go.yaml +++ b/.github/workflows/test-go.yaml @@ -1,12 +1,12 @@ -name: Go generate Test +name: Unit and Integration Test - Go on: - push - pull_request jobs: - test: - name: Test + generate: + name: Generate runs-on: ubuntu-latest env: GOPATH: ${{ github.workspace }}/go @@ -15,7 +15,7 @@ jobs: working-directory: ${{ env.GOPATH }}/src/github.com/kubeflow/training-operator steps: - - name: Check out code + - name: Checkout code uses: actions/checkout@v4 with: path: ${{ env.GOPATH }}/src/github.com/kubeflow/training-operator @@ -27,20 +27,68 @@ jobs: - name: Check Go modules run: | - go mod tidy && pushd hack/swagger && go mod tidy && popd && git add go.* && + go mod tidy && git diff --cached --exit-code || (echo 'Please run "go mod tidy" to sync Go modules' && exit 1); - name: Check auto-generated assets run: | - make generate && git add pkg sdk manifests && + make generate && git diff --cached --exit-code || (echo 'Please run "make generate" to generate assets' && exit 1); - - name: Verify gofmt + - name: Check go fmt run: | - make fmt && git add pkg cmd && + make fmt && git diff --cached --exit-code || (echo 'Please run "make fmt" to verify gofmt' && exit 1); - - name: Verify govet + - name: Check go fmt run: | - make vet && git add pkg cmd && + make vet git diff --cached --exit-code || (echo 'Please run "make vet" to verify govet' && exit 1); - - name: Verify golint + - name: Check golangci lint run: | make golangci-lint + test: + name: Test + runs-on: ubuntu-latest + env: + GOPATH: ${{ github.workspace }}/go + defaults: + run: + working-directory: ${{ env.GOPATH }}/src/github.com/kubeflow/training-operator + + strategy: + fail-fast: false + matrix: + # Kubernetes versions for setup-envtest integration tests. + kubernetes-version: ["1.29.3", "1.30.0", "1.31.0"] + + steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + path: ${{ env.GOPATH }}/src/github.com/kubeflow/training-operator + + - name: Setup Go + uses: actions/setup-go@v5 + with: + go-version-file: ${{ env.GOPATH }}/src/github.com/kubeflow/training-operator/go.mod + + - name: Run Go unit tests + run: | + make test + + - name: Run Go integration tests + run: | + make test-integration ENVTEST_K8S_VERSION=${{ matrix.kubernetes-version }} + + - name: Coveralls report + uses: shogo82148/actions-goveralls@v1 + with: + path-to-profile: cover.out + working-directory: ${{ env.GOPATH }}/src/github.com/kubeflow/training-operator + parallel: true + + finish: + needs: test + runs-on: ubuntu-latest + steps: + - uses: shogo82148/actions-goveralls@v1 + with: + parallel-finished: true diff --git a/.github/workflows/test-python.yaml b/.github/workflows/test-python.yaml index 854995e065..961c1f67c2 100644 --- a/.github/workflows/test-python.yaml +++ b/.github/workflows/test-python.yaml @@ -1,4 +1,4 @@ -name: Python Test +name: Unit and Integration Test - Python on: - push @@ -12,9 +12,6 @@ jobs: strategy: fail-fast: false matrix: - # TODO (tenzen-y): Once we resolve this compatibility issue, we will test against all Python versions. - # REF: https://github.com/kubeflow/training-operator/issues/2096 - # python-version: ["3.8", "3.9", "3.10", "3.11"] python-version: ["3.10", "3.11"] steps: diff --git a/.github/workflows/unittests.yaml b/.github/workflows/unittests.yaml deleted file mode 100644 index 66a48de9e7..0000000000 --- a/.github/workflows/unittests.yaml +++ /dev/null @@ -1,56 +0,0 @@ -name: Go Test - -on: - - push - - pull_request - -jobs: - test: - name: Test - runs-on: ubuntu-latest - env: - GOPATH: ${{ github.workspace }}/go - defaults: - run: - working-directory: ${{ env.GOPATH }}/src/github.com/kubeflow/training-operator - - strategy: - fail-fast: false - matrix: - # Detail: `setup-envtest list` - kubernetes-version: ["1.28.3", "1.29.3", "1.30.0", "1.31.0"] - - steps: - - name: Check out code - uses: actions/checkout@v4 - with: - path: ${{ env.GOPATH }}/src/github.com/kubeflow/training-operator - - - name: Setup Go - uses: actions/setup-go@v5 - with: - go-version-file: ${{ env.GOPATH }}/src/github.com/kubeflow/training-operator/go.mod - - - name: Run Go test for v1 - run: | - make test ENVTEST_K8S_VERSION=${{ matrix.kubernetes-version }} - - - name: Run Go test for v2 - run: | - make testv2 - make test-integrationv2 ENVTEST_K8S_VERSION=${{ matrix.kubernetes-version }} - - - name: Coveralls report - uses: shogo82148/actions-goveralls@v1 - with: - path-to-profile: cover.out - working-directory: ${{ env.GOPATH }}/src/github.com/kubeflow/training-operator - parallel: true - - finish: - needs: test - runs-on: ubuntu-latest - steps: - - uses: shogo82148/actions-goveralls@v1 - with: - parallel-finished: true diff --git a/Makefile b/Makefile index 5e01696243..6dce85d1ed 100644 --- a/Makefile +++ b/Makefile @@ -103,4 +103,5 @@ test: ## Run Go unit test. .PHONY: test-integration test-integration: envtest jobset-operator-crd scheduler-plugins-crd ## Run Go integration test. + echo "Run tests for Kubernetes $(ENVTEST_K8S_VERSION)" KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) -p path)" go test ./test/... -coverprofile cover.out diff --git a/hack/verify-codegen.sh b/hack/verify-codegen.sh deleted file mode 100755 index 1d66e1c743..0000000000 --- a/hack/verify-codegen.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/bash - -# Copyright 2024 The Kubeflow Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -set -o errexit -set -o nounset - -make generate - -git diff --exit-code || echo -e "\n\nPlease run make generate to update files" && exit 1 From 41ca29e1efe164e2728218fe962cab38c19ddbc0 Mon Sep 17 00:00:00 2001 From: Andrey Velichkevich Date: Thu, 16 Jan 2025 20:52:48 +0000 Subject: [PATCH 04/22] Remove finish action from Go Signed-off-by: Andrey Velichkevich --- .github/workflows/test-e2e.yaml | 2 +- .github/workflows/test-go.yaml | 8 -------- Makefile | 1 - 3 files changed, 1 insertion(+), 10 deletions(-) diff --git a/.github/workflows/test-e2e.yaml b/.github/workflows/test-e2e.yaml index b9e0583973..de329583ae 100644 --- a/.github/workflows/test-e2e.yaml +++ b/.github/workflows/test-e2e.yaml @@ -12,7 +12,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.10", "3.11"] + kubernetes-version: ["1.29.3", "1.30.0", "1.31.0"] steps: - name: Check out code diff --git a/.github/workflows/test-go.yaml b/.github/workflows/test-go.yaml index 776aa195f3..b5b7db0dd8 100644 --- a/.github/workflows/test-go.yaml +++ b/.github/workflows/test-go.yaml @@ -84,11 +84,3 @@ jobs: path-to-profile: cover.out working-directory: ${{ env.GOPATH }}/src/github.com/kubeflow/training-operator parallel: true - - finish: - needs: test - runs-on: ubuntu-latest - steps: - - uses: shogo82148/actions-goveralls@v1 - with: - parallel-finished: true diff --git a/Makefile b/Makefile index 6dce85d1ed..5e01696243 100644 --- a/Makefile +++ b/Makefile @@ -103,5 +103,4 @@ test: ## Run Go unit test. .PHONY: test-integration test-integration: envtest jobset-operator-crd scheduler-plugins-crd ## Run Go integration test. - echo "Run tests for Kubernetes $(ENVTEST_K8S_VERSION)" KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) -p path)" go test ./test/... -coverprofile cover.out From 4128f60a2e3d902c022fa883288767eeef78e84b Mon Sep 17 00:00:00 2001 From: Andrey Velichkevich Date: Fri, 17 Jan 2025 23:55:21 +0000 Subject: [PATCH 05/22] Rename Kubeflow Training to Kubeflow Trainer Signed-off-by: Andrey Velichkevich --- .github/ISSUE_TEMPLATE/bug_report.yaml | 4 +- .github/ISSUE_TEMPLATE/config.yml | 10 ++-- .github/ISSUE_TEMPLATE/feature_request.yaml | 8 +-- .github/PULL_REQUEST_TEMPLATE.md | 2 +- .github/issue_label_bot.yaml | 5 -- ...images.yaml => build-and-push-images.yaml} | 2 +- .../{stale.yaml => github-stale.yaml} | 0 ...st.yaml => github-trigger-rerun-test.yaml} | 0 .github/workflows/pre-commit.yaml | 14 ----- .github/workflows/setup-e2e-test/action.yaml | 57 ------------------- .../action.yaml | 54 +++++++++++++++++- .../template-publish-image/action.yaml | 2 +- .github/workflows/test-python.yaml | 7 +++ ADOPTERS.md | 6 +- README.md | 8 +-- docs/README.md | 6 +- sdk_v2/pyproject.toml | 2 +- 17 files changed, 85 insertions(+), 102 deletions(-) delete mode 100644 .github/issue_label_bot.yaml rename .github/workflows/{publish-images.yaml => build-and-push-images.yaml} (98%) rename .github/workflows/{stale.yaml => github-stale.yaml} (100%) rename .github/workflows/{trigger-rerun-test.yaml => github-trigger-rerun-test.yaml} (100%) delete mode 100644 .github/workflows/pre-commit.yaml delete mode 100644 .github/workflows/setup-e2e-test/action.yaml rename .github/workflows/{free-up-disk-space => template-e2e-test}/action.yaml (50%) diff --git a/.github/ISSUE_TEMPLATE/bug_report.yaml b/.github/ISSUE_TEMPLATE/bug_report.yaml index 9ba575835a..1033f1f14e 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yaml +++ b/.github/ISSUE_TEMPLATE/bug_report.yaml @@ -1,11 +1,11 @@ name: Bug Report -description: Tell us about a problem you are experiencing with Training Operator +description: Tell us about a problem you are experiencing with Kubeflow Trainer labels: ["kind/bug", "lifecycle/needs-triage"] body: - type: markdown attributes: value: | - Thanks for taking the time to fill out this Training Operator bug report! + Thanks for taking the time to fill out this Kubeflow Trainer bug report! - type: textarea id: problem attributes: diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml index 4ef63bee65..4d41e64768 100644 --- a/.github/ISSUE_TEMPLATE/config.yml +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -1,12 +1,12 @@ blank_issues_enabled: true contact_links: - - name: Training Operator Documentation - url: https://www.kubeflow.org/docs/components/training/ + - name: Kubeflow Trainer Documentation + url: https://www.kubeflow.org/docs/components/trainer/ about: Much help can be found in the docs - - name: Kubeflow Training Operator Slack Channel + - name: Kubeflow Trainer Slack Channel url: https://www.kubeflow.org/docs/about/community/#kubeflow-slack-channels - about: Ask the Training Operator community on CNCF Slack - - name: Kubeflow Training Operator Community Meeting + about: Ask the Kubeflow Trainer community on CNCF Slack + - name: Kubeflow Training and AutoML WG Community Meeting url: https://bit.ly/2PWVCkV about: Join the Kubeflow Training working group meeting diff --git a/.github/ISSUE_TEMPLATE/feature_request.yaml b/.github/ISSUE_TEMPLATE/feature_request.yaml index 860b6cf124..64a6d99afa 100644 --- a/.github/ISSUE_TEMPLATE/feature_request.yaml +++ b/.github/ISSUE_TEMPLATE/feature_request.yaml @@ -1,18 +1,18 @@ name: Feature Request -description: Suggest an idea for Training Operator +description: Suggest an idea for Kubeflow Trainer labels: ["kind/feature", "lifecycle/needs-triage"] body: - type: markdown attributes: value: | - Thanks for taking the time to fill out this Training Operator feature request! + Thanks for taking the time to fill out this Kubeflow Trainer feature request! - type: textarea id: feature attributes: label: What you would like to be added? description: | - A clear and concise description of what you want to add to Training Operator. - Please consider to write Training Operator enhancement proposal if it is a large feature request. + A clear and concise description of what you want to add to Kubeflow Trainer. + Please consider to write Kubeflow Enhancement Proposal (KEP) if it is a large feature request. validations: required: true - type: textarea diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 0327c913ab..593435460c 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -12,4 +12,4 @@ Fixes # **Checklist:** -- [ ] [Docs](https://www.kubeflow.org/docs/components/training/) included if any changes are user facing +- [ ] [Docs](https://www.kubeflow.org/docs/components/trainer/) included if any changes are user facing diff --git a/.github/issue_label_bot.yaml b/.github/issue_label_bot.yaml deleted file mode 100644 index 0dcef9fc4e..0000000000 --- a/.github/issue_label_bot.yaml +++ /dev/null @@ -1,5 +0,0 @@ -# for https://mlbot.net a Github bot that labels issues using KubeFlow -label-alias: - bug: 'kind/bug' - feature_request: 'kind/feature' - question: 'kind/question' diff --git a/.github/workflows/publish-images.yaml b/.github/workflows/build-and-push-images.yaml similarity index 98% rename from .github/workflows/publish-images.yaml rename to .github/workflows/build-and-push-images.yaml index 3aa0f5048c..49e14016cf 100644 --- a/.github/workflows/publish-images.yaml +++ b/.github/workflows/build-and-push-images.yaml @@ -1,4 +1,4 @@ -name: Publish Kubeflow Trainer Images +name: Build and Publish Images on: - push diff --git a/.github/workflows/stale.yaml b/.github/workflows/github-stale.yaml similarity index 100% rename from .github/workflows/stale.yaml rename to .github/workflows/github-stale.yaml diff --git a/.github/workflows/trigger-rerun-test.yaml b/.github/workflows/github-trigger-rerun-test.yaml similarity index 100% rename from .github/workflows/trigger-rerun-test.yaml rename to .github/workflows/github-trigger-rerun-test.yaml diff --git a/.github/workflows/pre-commit.yaml b/.github/workflows/pre-commit.yaml deleted file mode 100644 index b1399192bb..0000000000 --- a/.github/workflows/pre-commit.yaml +++ /dev/null @@ -1,14 +0,0 @@ -name: pre-commit - -on: - pull_request: - push: - branches: [master] - -jobs: - pre-commit: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - uses: actions/setup-python@v3 - - uses: pre-commit/action@v3.0.1 diff --git a/.github/workflows/setup-e2e-test/action.yaml b/.github/workflows/setup-e2e-test/action.yaml deleted file mode 100644 index 900151ea07..0000000000 --- a/.github/workflows/setup-e2e-test/action.yaml +++ /dev/null @@ -1,57 +0,0 @@ -name: Setup E2E test template -description: A composite action to setup e2e tests - -inputs: - kubernetes-version: - required: true - description: Kubernetes version - python-version: - required: true - description: Python version - gang-scheduler-name: - required: false - default: "none" - description: Gang scheduler name - -runs: - using: composite - steps: - - name: Free-Up Disk Space - uses: ./.github/workflows/free-up-disk-space - - - name: Setup Python - uses: actions/setup-python@v5 - with: - python-version: ${{ inputs.python-version }} - - - name: Setup Go - uses: actions/setup-go@v5 - with: - go-version-file: go.mod - - - name: Create k8s Kind Cluster - uses: helm/kind-action@v1.11.0 - with: - node_image: kindest/node:${{ inputs.kubernetes-version }} - cluster_name: training-operator-cluster - kubectl_version: ${{ inputs.kubernetes-version }} - - - name: Build training-operator - shell: bash - run: | - ./scripts/gha/build-image.sh - env: - TRAINING_CI_IMAGE: kubeflowtraining/training-operator:test - - - name: Deploy training operator - shell: bash - run: | - ./scripts/gha/setup-training-operator.sh - docker system prune -a -f - docker system df - df -h - env: - KIND_CLUSTER: training-operator-cluster - TRAINING_CI_IMAGE: kubeflowtraining/training-operator:test - GANG_SCHEDULER_NAME: ${{ inputs.gang-scheduler-name }} - KUBERNETES_VERSION: ${{ inputs.kubernetes-version }} diff --git a/.github/workflows/free-up-disk-space/action.yaml b/.github/workflows/template-e2e-test/action.yaml similarity index 50% rename from .github/workflows/free-up-disk-space/action.yaml rename to .github/workflows/template-e2e-test/action.yaml index c85e44e8c5..82a12e2121 100644 --- a/.github/workflows/free-up-disk-space/action.yaml +++ b/.github/workflows/template-e2e-test/action.yaml @@ -1,5 +1,18 @@ -name: Free-Up Disk Space -description: Remove Non-Essential Tools And Move Docker Data Directory to /mnt/docker +# TODO (andreyvelich): Refactor this once we have e2e test for Kubeflow Trainer. +name: Setup E2E test template +description: A composite action to setup e2e tests + +inputs: + kubernetes-version: + required: true + description: Kubernetes version + python-version: + required: true + description: Python version + gang-scheduler-name: + required: false + default: "none" + description: Gang scheduler name runs: using: composite @@ -47,3 +60,40 @@ runs: sudo systemctl start docker echo "Docker service status:" sudo systemctl --no-pager -l -o short status docker + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: ${{ inputs.python-version }} + + - name: Setup Go + uses: actions/setup-go@v5 + with: + go-version-file: go.mod + + - name: Create k8s Kind Cluster + uses: helm/kind-action@v1.11.0 + with: + node_image: kindest/node:${{ inputs.kubernetes-version }} + cluster_name: training-operator-cluster + kubectl_version: ${{ inputs.kubernetes-version }} + + - name: Build training-operator + shell: bash + run: | + ./scripts/gha/build-image.sh + env: + TRAINING_CI_IMAGE: kubeflowtraining/training-operator:test + + - name: Deploy training operator + shell: bash + run: | + ./scripts/gha/setup-training-operator.sh + docker system prune -a -f + docker system df + df -h + env: + KIND_CLUSTER: training-operator-cluster + TRAINING_CI_IMAGE: kubeflowtraining/training-operator:test + GANG_SCHEDULER_NAME: ${{ inputs.gang-scheduler-name }} + KUBERNETES_VERSION: ${{ inputs.kubernetes-version }} diff --git a/.github/workflows/template-publish-image/action.yaml b/.github/workflows/template-publish-image/action.yaml index 324364b01c..14deecc41b 100644 --- a/.github/workflows/template-publish-image/action.yaml +++ b/.github/workflows/template-publish-image/action.yaml @@ -21,7 +21,7 @@ inputs: description: whether to push container images or not tag-prefix: required: false - default: v1 + default: v2alpha1 description: Prefix for the image tag, e.g. v2alpha1 runs: diff --git a/.github/workflows/test-python.yaml b/.github/workflows/test-python.yaml index 961c1f67c2..50df15ebf6 100644 --- a/.github/workflows/test-python.yaml +++ b/.github/workflows/test-python.yaml @@ -5,6 +5,13 @@ on: - pull_request jobs: + pre-commit: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v3 + - uses: pre-commit/action@v3.0.1 + python-test: name: Test runs-on: ubuntu-latest diff --git a/ADOPTERS.md b/ADOPTERS.md index f2ec640ad7..abdde8ca9d 100644 --- a/ADOPTERS.md +++ b/ADOPTERS.md @@ -1,6 +1,8 @@ -# Adopters of Kubeflow Training Operator +# Adopters of Kubeflow Trainer -This page contains a list of organizations who are using Kubeflow Training Operator. If you'd like to be included here, please send a pull request which modifies this file. Please keep the list in alphabetical order. +This page contains a list of organizations who are using Kubeflow Trainer. +If you'd like to be included here, please send a pull request which modifies this file. +Please keep the list in alphabetical order. | Organization | Contact | | ------------ | ------- | diff --git a/README.md b/README.md index 54b2a91615..58c58b93a3 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,7 @@ The following KubeCon + CloudNativeCon 2024 talk provides an overview of Kubeflo ## Getting Started -Please check [the official Kubeflow documentation](https://www.kubeflow.org/docs/components/training/getting-started) +Please check [the official Kubeflow documentation](https://www.kubeflow.org/docs/components/trainer/getting-started) to install and get started with Kubeflow Trainer. ## Community @@ -38,7 +38,7 @@ The following links provide information on how to get involved in the community: - Join our [`#kubeflow-training` Slack channel](https://www.kubeflow.org/docs/about/community/#kubeflow-slack). - Attend [the bi-weekly AutoML and Training Working Group](https://bit.ly/2PWVCkV) community meeting. -- Check out [who is using the Training Operator](ADOPTERS.md). +- Check out [who is using Kubeflow Trainer](ADOPTERS.md). ## Contributing @@ -51,12 +51,12 @@ Please refer to the [CHANGELOG](CHANGELOG.md). ## Kubeflow Training Operator V1 Kubeflow Trainer project is currently in alpha status, and APIs may change. -If you are using Kubeflow Training Operator V1, please refer [to this migration document](/docs/components/training/operator-guides/migration). +If you are using Kubeflow Training Operator V1, please refer [to this migration document](/docs/components/trainer/operator-guides/migration). Kubeflow Community will maintain the Training Operator V1 source code at [the `release-1.9` branch](https://github.com/kubeflow/training-operator/tree/release-1.9). -You can find the documentation for Kubeflow Training V1 in [these guides](https://www.kubeflow.org/docs/components/training/legacy-v1). +You can find the documentation for Kubeflow Training Operator V1 in [these guides](https://www.kubeflow.org/docs/components/trainer/legacy-v1). ## Acknowledgement diff --git a/docs/README.md b/docs/README.md index 383701aec3..090f4c174d 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1,5 +1,5 @@ -# Training Operator Documentation +# Kubeflow Trainer Documentation -Welcome to Kubeflow Training Operator! +Welcome to Kubeflow Trainer! -The Training Operator documentation is available on [kubeflow.org](https://www.kubeflow.org/docs/components/training/). +The Kubeflow Trainer documentation is available on [kubeflow.org](https://www.kubeflow.org/docs/components/trainer/). diff --git a/sdk_v2/pyproject.toml b/sdk_v2/pyproject.toml index 52bcb98888..b17b23255f 100644 --- a/sdk_v2/pyproject.toml +++ b/sdk_v2/pyproject.toml @@ -37,7 +37,7 @@ dependencies = [ [project.urls] Homepage = "https://github.com/kubeflow/training-operator" -Documentation = "https://www.kubeflow.org/docs/components/training/" +Documentation = "https://www.kubeflow.org/docs/components/trainer/" Source = "https://github.com/kubeflow/training-operator" [tool.hatch.build.targets.wheel] From 48b4b4995ce4328c02c809c1d3088f8e11b5480e Mon Sep 17 00:00:00 2001 From: Andrey Velichkevich Date: Fri, 17 Jan 2025 23:59:00 +0000 Subject: [PATCH 06/22] Rename comment Signed-off-by: Andrey Velichkevich --- .github/workflows/template-publish-image/action.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/template-publish-image/action.yaml b/.github/workflows/template-publish-image/action.yaml index 14deecc41b..a11b9d6db1 100644 --- a/.github/workflows/template-publish-image/action.yaml +++ b/.github/workflows/template-publish-image/action.yaml @@ -1,4 +1,4 @@ -# Composite action to publish Training Operator images. +# Composite action to publish Kubeflow Trainer images. name: Build And Publish Container Images description: Build Multiplatform Supporting Container Images From 80180ff0f3e9d38bf65668be8f7f27c09b2d8a39 Mon Sep 17 00:00:00 2001 From: Andrey Velichkevich Date: Sat, 18 Jan 2025 00:06:34 +0000 Subject: [PATCH 07/22] Add steps for free-up space Signed-off-by: Andrey Velichkevich --- .github/workflows/build-and-push-images.yaml | 3 -- .../template-publish-image/action.yaml | 44 +++++++++++++++++++ 2 files changed, 44 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build-and-push-images.yaml b/.github/workflows/build-and-push-images.yaml index 49e14016cf..c4fd1832f1 100644 --- a/.github/workflows/build-and-push-images.yaml +++ b/.github/workflows/build-and-push-images.yaml @@ -30,9 +30,6 @@ jobs: - name: Checkout uses: actions/checkout@v4 - - name: Free-Up Disk Space - uses: ./.github/workflows/free-up-disk-space - - name: Docker Login # Trigger workflow only for kubeflow/training-operator repository with specific branch (master, release-*) or tag (v.*). if: >- diff --git a/.github/workflows/template-publish-image/action.yaml b/.github/workflows/template-publish-image/action.yaml index a11b9d6db1..31613a4452 100644 --- a/.github/workflows/template-publish-image/action.yaml +++ b/.github/workflows/template-publish-image/action.yaml @@ -27,6 +27,50 @@ inputs: runs: using: composite steps: + # This step is a Workaround to avoid the "No space left on device" error. + # ref: https://github.com/actions/runner-images/issues/2840 + - name: Remove unnecessary files + shell: bash + run: | + echo "Disk usage before cleanup:" + df -hT + + sudo rm -rf /usr/share/dotnet + sudo rm -rf /opt/ghc + sudo rm -rf /usr/local/share/boost + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + sudo rm -rf /usr/local/lib/android + sudo rm -rf /usr/local/share/powershell + sudo rm -rf /usr/share/swift + + echo "Disk usage after cleanup:" + df -hT + + - name: Prune docker images + shell: bash + run: | + docker image prune -a -f + docker system df + df -hT + + - name: Move docker data directory + shell: bash + run: | + echo "Stopping docker service ..." + sudo systemctl stop docker + DOCKER_DEFAULT_ROOT_DIR=/var/lib/docker + DOCKER_ROOT_DIR=/mnt/docker + echo "Moving ${DOCKER_DEFAULT_ROOT_DIR} -> ${DOCKER_ROOT_DIR}" + sudo mv ${DOCKER_DEFAULT_ROOT_DIR} ${DOCKER_ROOT_DIR} + echo "Creating symlink ${DOCKER_DEFAULT_ROOT_DIR} -> ${DOCKER_ROOT_DIR}" + sudo ln -s ${DOCKER_ROOT_DIR} ${DOCKER_DEFAULT_ROOT_DIR} + echo "$(sudo ls -l ${DOCKER_DEFAULT_ROOT_DIR})" + echo "Starting docker service ..." + sudo systemctl daemon-reload + sudo systemctl start docker + echo "Docker service status:" + sudo systemctl --no-pager -l -o short status docker + - name: Setup QEMU uses: docker/setup-qemu-action@v3 with: From dab4abcd7d63b925173d6ccb2f0868a9d3a59f5a Mon Sep 17 00:00:00 2001 From: Andrey Velichkevich Date: Sat, 18 Jan 2025 19:26:14 +0000 Subject: [PATCH 08/22] Update Python tests Signed-off-by: Andrey Velichkevich --- .github/workflows/test-python.yaml | 30 ++++++++----------- Makefile | 25 ++++++++++++++-- test/__init__.py | 0 test/integration/__init__.py | 0 test/integration/initializer_v2/__init__.py | 0 test/integration/initializer_v2/conftest.py | 8 +++++ .../initializer_v2/dataset_test.py | 2 +- test/integration/initializer_v2/model_test.py | 2 +- test/integration/initializer_v2/utils.py | 9 ------ 9 files changed, 44 insertions(+), 32 deletions(-) delete mode 100644 test/__init__.py delete mode 100644 test/integration/__init__.py delete mode 100644 test/integration/initializer_v2/__init__.py delete mode 100644 test/integration/initializer_v2/utils.py diff --git a/.github/workflows/test-python.yaml b/.github/workflows/test-python.yaml index 50df15ebf6..7bb515a108 100644 --- a/.github/workflows/test-python.yaml +++ b/.github/workflows/test-python.yaml @@ -8,21 +8,24 @@ jobs: pre-commit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 - - uses: actions/setup-python@v3 + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 - uses: pre-commit/action@v3.0.1 - python-test: + test: name: Test runs-on: ubuntu-latest + env: + PYTHONPATH: ${{ github.workspace }}:${PYTHONPATH} strategy: fail-fast: false matrix: - python-version: ["3.10", "3.11"] + # Python version to run unit and integration tests. + python-version: ["3.11"] steps: - - name: Check out code + - name: Checkout code uses: actions/checkout@v4 - name: Setup Python @@ -30,19 +33,10 @@ jobs: with: python-version: ${{ matrix.python-version }} - - name: Install dependencies + - name: Run Python unit tests run: | - pip install pytest python-dateutil urllib3 kubernetes - pip install -U './sdk/python[huggingface]' + make test-python - - name: Run unit test for training sdk + - name: Run Python integration tests. run: | - pytest ./sdk/python/kubeflow/training/api/training_client_test.py - - - name: Run Python unit tests for v2 - run: | - pip install -U './sdk_v2' - export PYTHONPATH="${{ github.workspace }}:$PYTHONPATH" - pytest ./pkg/initializer_v2/model - pytest ./pkg/initializer_v2/dataset - pytest ./pkg/initializer_v2/utils + make test-python-integration diff --git a/Makefile b/Makefile index 5e01696243..7ad56ba11f 100644 --- a/Makefile +++ b/Makefile @@ -31,14 +31,14 @@ help: ## Display this help. PROJECT_DIR := $(shell dirname $(abspath $(lastword $(MAKEFILE_LIST)))) - -## Tool Binaries +# Tool Binaries LOCALBIN ?= $(PROJECT_DIR)/bin CONTROLLER_GEN ?= $(LOCALBIN)/controller-gen ENVTEST ?= $(LOCALBIN)/setup-envtest ENVTEST_K8S_VERSION ?= 1.31 +# Instructions to download tools for development. .PHONY: envtest envtest: ## Download the setup-envtest binary if required. test -s $(ENVTEST) || GOBIN=$(LOCALBIN) go install sigs.k8s.io/controller-runtime/tools/setup-envtest@release-0.19 @@ -47,7 +47,7 @@ envtest: ## Download the setup-envtest binary if required. controller-gen: ## Download the controller-gen binary if required. test -s $(CONTROLLER_GEN) || GOBIN=$(LOCALBIN) go install sigs.k8s.io/controller-tools/cmd/controller-gen@v0.16.5 -# Download external CRDs for the integration testings. +# Download external CRDs for Go integration testings. EXTERNAL_CRDS_DIR ?= $(PROJECT_DIR)/manifests/external-crds JOBSET_ROOT = $(shell go list -m -mod=readonly -f "{{.Dir}}" sigs.k8s.io/jobset) @@ -62,6 +62,7 @@ scheduler-plugins-crd: ## Copy the CRDs from the Scheduler Plugins repository to mkdir -p $(EXTERNAL_CRDS_DIR)/scheduler-plugins/ cp -f $(SCHEDULER_PLUGINS_ROOT)/manifests/coscheduling/* $(EXTERNAL_CRDS_DIR)/scheduler-plugins +# Instructions for code generation. .PHONY: manifests manifests: controller-gen ## Generate manifests. $(CONTROLLER_GEN) "crd:generateEmbeddedObjectMeta=true" rbac:roleName=training-operator-v2 webhook \ @@ -80,6 +81,7 @@ generate: go-mod-download manifests ## Generate APIs and SDK. go-mod-download: ## Run go mod download to download modules. go mod download +# Instructions for code formatting. .PHONY: fmt fmt: ## Run go fmt against the code. go fmt ./... @@ -97,6 +99,7 @@ ifeq ($(GOLANGCI_LINT),) endif golangci-lint run --timeout 5m --go 1.23 ./... +# Instructions to run tests. .PHONY: test test: ## Run Go unit test. go test ./pkg/apis/kubeflow.org/v2alpha1/... ./pkg/controller.v2/... ./pkg/runtime.v2/... ./pkg/webhooks.v2/... ./pkg/util.v2/... -coverprofile cover.out @@ -104,3 +107,19 @@ test: ## Run Go unit test. .PHONY: test-integration test-integration: envtest jobset-operator-crd scheduler-plugins-crd ## Run Go integration test. KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) -p path)" go test ./test/... -coverprofile cover.out + +test-python: ## Run Python unit test. + export PYTHONPATH=$(PROJECT_DIR) + pip install pytest + pip install -r ./cmd/initializer_v2/dataset/requirements.txt + + pytest ./pkg/initializer_v2/dataset + pytest ./pkg/initializer_v2/model + pytest ./pkg/initializer_v2/utils + +test-python-integration: ## Run Python integration test. + export PYTHONPATH=$(PROJECT_DIR) + pip install pytest + pip install -r ./cmd/initializer_v2/dataset/requirements.txt + + pytest ./test/integration/initializer_v2 diff --git a/test/__init__.py b/test/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/test/integration/__init__.py b/test/integration/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/test/integration/initializer_v2/__init__.py b/test/integration/initializer_v2/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/test/integration/initializer_v2/conftest.py b/test/integration/initializer_v2/conftest.py index 955b395e5f..af8c37d291 100644 --- a/test/integration/initializer_v2/conftest.py +++ b/test/integration/initializer_v2/conftest.py @@ -45,3 +45,11 @@ def configure_path(path_var: str): shutil.rmtree(temp_dir, ignore_errors=True) os.environ.clear() os.environ.update(original_env) + + +def verify_downloaded_files(dir_path, expected_files): + """Verify downloaded files""" + if expected_files: + actual_files = set(os.listdir(dir_path)) + missing_files = set(expected_files) - actual_files + assert not missing_files, f"Missing expected files: {missing_files}" diff --git a/test/integration/initializer_v2/dataset_test.py b/test/integration/initializer_v2/dataset_test.py index b1f18477b1..b470cb6549 100644 --- a/test/integration/initializer_v2/dataset_test.py +++ b/test/integration/initializer_v2/dataset_test.py @@ -1,8 +1,8 @@ import os import runpy -from test.integration.initializer_v2.utils import verify_downloaded_files import pytest +from conftest import verify_downloaded_files import pkg.initializer_v2.utils.utils as utils diff --git a/test/integration/initializer_v2/model_test.py b/test/integration/initializer_v2/model_test.py index ca9bf928ba..009445eb43 100644 --- a/test/integration/initializer_v2/model_test.py +++ b/test/integration/initializer_v2/model_test.py @@ -1,8 +1,8 @@ import os import runpy -from test.integration.initializer_v2.utils import verify_downloaded_files import pytest +from conftest import verify_downloaded_files import pkg.initializer_v2.utils.utils as utils diff --git a/test/integration/initializer_v2/utils.py b/test/integration/initializer_v2/utils.py deleted file mode 100644 index 6828b78dc2..0000000000 --- a/test/integration/initializer_v2/utils.py +++ /dev/null @@ -1,9 +0,0 @@ -import os - - -def verify_downloaded_files(dir_path, expected_files): - """Verify downloaded files""" - if expected_files: - actual_files = set(os.listdir(dir_path)) - missing_files = set(expected_files) - actual_files - assert not missing_files, f"Missing expected files: {missing_files}" From 0e1c94c0d8f17388fe8abf755ee7f4a21a74f3e0 Mon Sep 17 00:00:00 2001 From: Andrey Velichkevich Date: Sat, 18 Jan 2025 19:27:01 +0000 Subject: [PATCH 09/22] Remove integration workflow Signed-off-by: Andrey Velichkevich --- .github/workflows/integration-tests.yaml | 106 ----------------------- 1 file changed, 106 deletions(-) delete mode 100644 .github/workflows/integration-tests.yaml diff --git a/.github/workflows/integration-tests.yaml b/.github/workflows/integration-tests.yaml deleted file mode 100644 index 47a17627e9..0000000000 --- a/.github/workflows/integration-tests.yaml +++ /dev/null @@ -1,106 +0,0 @@ -name: integration test -on: - - pull_request - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true - -jobs: - integration-test: - runs-on: ubuntu-latest - - # Almost similar to the following: - # - # ```yaml - # strategy: - # fail-fast: false - # matrix: - # kubernetes-version: ["v1.27.11", "v1.28.7", "v1.29.2"] - # gang-scheduler-name: ["none", "scheduler-plugins", "volcano"] - # ``` - # The difference is that each combination is randomly assigned various Python versions - # to verify Python SDK operations. - strategy: - fail-fast: false - matrix: - # TODO (tenzen-y): Add volcano. - include: - - kubernetes-version: v1.30.6 - gang-scheduler-name: "none" - python-version: "3.10" - - kubernetes-version: v1.29.2 - gang-scheduler-name: "none" - python-version: "3.10" - - kubernetes-version: v1.28.7 - gang-scheduler-name: "none" - python-version: "3.8" - - kubernetes-version: v1.30.6 - gang-scheduler-name: "scheduler-plugins" - python-version: "3.10" - - kubernetes-version: v1.29.2 - gang-scheduler-name: "scheduler-plugins" - python-version: "3.9" - - kubernetes-version: v1.28.7 - gang-scheduler-name: "scheduler-plugins" - python-version: "3.11" - - kubernetes-version: v1.30.6 - gang-scheduler-name: "volcano" - python-version: "3.10" - - kubernetes-version: v1.29.2 - gang-scheduler-name: "volcano" - python-version: "3.8" - - kubernetes-version: v1.28.7 - gang-scheduler-name: "volcano" - python-version: "3.10" - - steps: - - name: Checkout - uses: actions/checkout@v4 - - - name: Setup E2E Tests - uses: ./.github/workflows/setup-e2e-test - with: - kubernetes-version: ${{ matrix.kubernetes-version }} - python-version: ${{ matrix.python-version }} - gang-scheduler-name: ${{ matrix.gang-scheduler-name }} - - - name: Build JAX Job Example Image - run: | - ./scripts/gha/build-jax-mnist-image.sh - env: - JAX_JOB_CI_IMAGE: kubeflow/jaxjob-dist-spmd-mnist:test - - - name: Load JAX Job Example Image - run: | - kind load docker-image ${{ env.JAX_JOB_CI_IMAGE }} --name ${{ env.KIND_CLUSTER }} - env: - KIND_CLUSTER: training-operator-cluster - JAX_JOB_CI_IMAGE: kubeflow/jaxjob-dist-spmd-mnist:test - - - name: Run tests - run: | - pip install pytest - python3 -m pip install -e sdk/python; pytest -s sdk/python/test/e2e --log-cli-level=debug --namespace=default - env: - GANG_SCHEDULER_NAME: ${{ matrix.gang-scheduler-name }} - JAX_JOB_IMAGE: kubeflow/jaxjob-dist-spmd-mnist:test - - - name: Run initializer_v2 integration tests for Python 3.11+ - if: ${{ matrix.python-version == '3.11' }} - run: | - pip install -r ./cmd/initializer_v2/dataset/requirements.txt - pip install -U './sdk_v2' - pytest ./test/integration/initializer_v2 - - - name: Collect volcano logs - if: ${{ failure() && matrix.gang-scheduler-name == 'volcano' }} - run: | - echo "dump volcano-scheduler logs..." - kubectl logs -n volcano-system -l app=volcano-scheduler --tail=-1 - echo "dump volcano-admission logs..." - kubectl logs -n volcano-system -l app=volcano-admission --tail=-1 - echo "dump volcano-controllers logs..." - kubectl logs -n volcano-system -l app=volcano-controller --tail=-1 - echo "dump podgroups description..." - kubectl describe podgroups.scheduling.volcano.sh -A From 62fc59e00834d31b69dfddde366d96253d15ba03 Mon Sep 17 00:00:00 2001 From: Andrey Velichkevich Date: Sat, 18 Jan 2025 19:29:07 +0000 Subject: [PATCH 10/22] Install SDK in Python integration tests Signed-off-by: Andrey Velichkevich --- Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile b/Makefile index 7ad56ba11f..b5eda4846c 100644 --- a/Makefile +++ b/Makefile @@ -112,6 +112,7 @@ test-python: ## Run Python unit test. export PYTHONPATH=$(PROJECT_DIR) pip install pytest pip install -r ./cmd/initializer_v2/dataset/requirements.txt + pip install ./sdk_v2 pytest ./pkg/initializer_v2/dataset pytest ./pkg/initializer_v2/model From d04ef6fb8b2a5f8a4883ac9465f4e9215f9f0aa5 Mon Sep 17 00:00:00 2001 From: Andrey Velichkevich Date: Sun, 19 Jan 2025 23:58:45 +0000 Subject: [PATCH 11/22] Remove generate file Signed-off-by: Andrey Velichkevich --- generate | 1 - 1 file changed, 1 deletion(-) delete mode 100644 generate diff --git a/generate b/generate deleted file mode 100644 index 9daeafb986..0000000000 --- a/generate +++ /dev/null @@ -1 +0,0 @@ -test From f8cca940a2fc88153b2c8f21c825207ad42d4b32 Mon Sep 17 00:00:00 2001 From: Andrey Velichkevich Date: Mon, 27 Jan 2025 13:48:54 +0000 Subject: [PATCH 12/22] Add Kubeflow Trainer logo Signed-off-by: Andrey Velichkevich --- README.md | 7 ++++--- docs/images/trainer-logo.svg | 1 + 2 files changed, 5 insertions(+), 3 deletions(-) create mode 100644 docs/images/trainer-logo.svg diff --git a/README.md b/README.md index 58c58b93a3..8233ea9018 100644 --- a/README.md +++ b/README.md @@ -4,9 +4,10 @@ [![Coverage Status](https://coveralls.io/repos/github/kubeflow/training-operator/badge.svg?branch=master)](https://coveralls.io/github/kubeflow/training-operator?branch=master) [![Go Report Card](https://goreportcard.com/badge/github.com/kubeflow/training-operator)](https://goreportcard.com/report/github.com/kubeflow/training-operator) -TODO (andreyvelich): Add logo - -TODO (andreyvelich): Add diagram +

+ logo +
+

## Overview diff --git a/docs/images/trainer-logo.svg b/docs/images/trainer-logo.svg new file mode 100644 index 0000000000..662e8325ed --- /dev/null +++ b/docs/images/trainer-logo.svg @@ -0,0 +1 @@ + \ No newline at end of file From 9701aad87d04bec9609107fbfd0b95fa5c691067 Mon Sep 17 00:00:00 2001 From: Andrey Velichkevich Date: Mon, 27 Jan 2025 14:50:41 +0000 Subject: [PATCH 13/22] Add tech diagram Signed-off-by: Andrey Velichkevich --- README.md | 5 +++++ docs/images/trainer-tech-stack.drawio.svg | 4 ++++ 2 files changed, 9 insertions(+) create mode 100644 docs/images/trainer-tech-stack.drawio.svg diff --git a/README.md b/README.md index 8233ea9018..a2e88ab0df 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,11 @@ with Kubeflow Training to orchestrate their ML training on Kubernetes. Kubeflow Trainer allows you effortlessly develop your LLMs with the Kubeflow Python SDK and build Kubernetes-native Training Runtimes with Kubernetes Custom Resources APIs. +

+ logo +
+

+ ## Kubeflow Trainer Introduction The following KubeCon + CloudNativeCon 2024 talk provides an overview of Kubeflow Trainer capabilities: diff --git a/docs/images/trainer-tech-stack.drawio.svg b/docs/images/trainer-tech-stack.drawio.svg new file mode 100644 index 0000000000..08c2eb9d1c --- /dev/null +++ b/docs/images/trainer-tech-stack.drawio.svg @@ -0,0 +1,4 @@ + + + + generated by pstoedit version:3.44 from NVBadge_2D.eps JAX Light Stroke
Local
Self Hosted
MPI-style Training
LLM Blueprints
LLM Fine-Tuning
Elastic Training
Gang-Scheduling
Multi-Node Training
\ No newline at end of file From dbda6ee2b73aec323f23567e68e5a22db6f309fe Mon Sep 17 00:00:00 2001 From: Andrey Velichkevich Date: Mon, 27 Jan 2025 15:00:41 +0000 Subject: [PATCH 14/22] Exclude images from pre-commit Signed-off-by: Andrey Velichkevich --- .pre-commit-config.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 101c63ece8..1451290c28 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -23,6 +23,7 @@ repos: - id: flake8 exclude: | (?x)^( + docs/images/.*| pkg/client/.*| sdk_v2/kubeflow/training/[^/]*.py| sdk_v2/kubeflow/training/models/.*| From 5b477f840925efc45fd1c440cc4f686515b0d007 Mon Sep 17 00:00:00 2001 From: Andrey Velichkevich Date: Mon, 27 Jan 2025 16:46:26 +0000 Subject: [PATCH 15/22] Fix git diff in CI Signed-off-by: Andrey Velichkevich --- .github/workflows/test-go.yaml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/test-go.yaml b/.github/workflows/test-go.yaml index b5b7db0dd8..10b4a85209 100644 --- a/.github/workflows/test-go.yaml +++ b/.github/workflows/test-go.yaml @@ -27,20 +27,20 @@ jobs: - name: Check Go modules run: | - go mod tidy && - git diff --cached --exit-code || (echo 'Please run "go mod tidy" to sync Go modules' && exit 1); + go mod tidy + git diff --exit-code || (echo 'Please run "go mod tidy" to sync Go modules' && exit 1); - name: Check auto-generated assets run: | - make generate && - git diff --cached --exit-code || (echo 'Please run "make generate" to generate assets' && exit 1); + make generate + git diff --exit-code || (echo 'Please run "make generate" to generate assets' && exit 1); - name: Check go fmt run: | - make fmt && - git diff --cached --exit-code || (echo 'Please run "make fmt" to verify gofmt' && exit 1); + make fmt + git diff --exit-code || (echo 'Please run "make fmt" to verify gofmt' && exit 1); - name: Check go fmt run: | make vet - git diff --cached --exit-code || (echo 'Please run "make vet" to verify govet' && exit 1); + git diff --exit-code || (echo 'Please run "make vet" to verify govet' && exit 1); - name: Check golangci lint run: | make golangci-lint From 03d34eba268a521bdf49be709611b701d422bab5 Mon Sep 17 00:00:00 2001 From: Andrey Velichkevich Date: Mon, 27 Jan 2025 16:47:40 +0000 Subject: [PATCH 16/22] Run go mod tidy Signed-off-by: Andrey Velichkevich --- go.sum | 1 - 1 file changed, 1 deletion(-) diff --git a/go.sum b/go.sum index 2511b03550..df2f3ee8dd 100644 --- a/go.sum +++ b/go.sum @@ -137,7 +137,6 @@ golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA= golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.27.0 h1:WP60Sv1nlK1T6SupCHbXzSaN0b9wUmsPoRS9b61A23Q= From 9fa648fd81ad4f31575b293c6ca1f5aca3754713 Mon Sep 17 00:00:00 2001 From: Andrey Velichkevich Date: Tue, 28 Jan 2025 21:41:36 +0000 Subject: [PATCH 17/22] Update diagram Signed-off-by: Andrey Velichkevich --- docs/images/trainer-tech-stack.drawio.svg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/images/trainer-tech-stack.drawio.svg b/docs/images/trainer-tech-stack.drawio.svg index 08c2eb9d1c..d49ec9b9c8 100644 --- a/docs/images/trainer-tech-stack.drawio.svg +++ b/docs/images/trainer-tech-stack.drawio.svg @@ -1,4 +1,4 @@ - generated by pstoedit version:3.44 from NVBadge_2D.eps JAX Light Stroke
Local
Self Hosted
MPI-style Training
LLM Blueprints
LLM Fine-Tuning
Elastic Training
Gang-Scheduling
Multi-Node Training
\ No newline at end of file + generated by pstoedit version:3.44 from NVBadge_2D.eps JAX Light Stroke
MPI-style Training
LLM Blueprints
LLM Fine-Tuning
Elastic Training
Gang-Scheduling
Multi-Node Training
Local
Self Hosted
\ No newline at end of file From 3828d04dfc979dd7502cd2167df3e49e2e33d452 Mon Sep 17 00:00:00 2001 From: Andrey Velichkevich Date: Thu, 30 Jan 2025 01:02:57 +0000 Subject: [PATCH 18/22] Cleanup examples Signed-off-by: Andrey Velichkevich --- examples/jax/jax-dist-spmd-mnist/Dockerfile | 29 --- examples/jax/jax-dist-spmd-mnist/README.md | 132 -------------- examples/jax/jax-dist-spmd-mnist/datasets.py | 97 ---------- .../jaxjob_dist_spmd_mnist_gloo.yaml | 16 -- .../spmd_mnist_classifier_fromscratch.py | 171 ------------------ 5 files changed, 445 deletions(-) delete mode 100644 examples/jax/jax-dist-spmd-mnist/Dockerfile delete mode 100644 examples/jax/jax-dist-spmd-mnist/README.md delete mode 100644 examples/jax/jax-dist-spmd-mnist/datasets.py delete mode 100644 examples/jax/jax-dist-spmd-mnist/jaxjob_dist_spmd_mnist_gloo.yaml delete mode 100644 examples/jax/jax-dist-spmd-mnist/spmd_mnist_classifier_fromscratch.py diff --git a/examples/jax/jax-dist-spmd-mnist/Dockerfile b/examples/jax/jax-dist-spmd-mnist/Dockerfile deleted file mode 100644 index 1538d26507..0000000000 --- a/examples/jax/jax-dist-spmd-mnist/Dockerfile +++ /dev/null @@ -1,29 +0,0 @@ -FROM python:3.13 - -RUN pip install --upgrade pip -RUN pip install --upgrade jax[k8s] absl-py - -RUN apt-get update && apt-get install -y \ - build-essential \ - cmake \ - git \ - libgoogle-glog-dev \ - libgflags-dev \ - libprotobuf-dev \ - protobuf-compiler \ - && rm -rf /var/lib/apt/lists/* - -RUN git clone https://github.com/facebookincubator/gloo.git \ - && cd gloo \ - && git checkout 43b7acbf372cdce14075f3526e39153b7e433b53 \ - && mkdir build \ - && cd build \ - && cmake ../ \ - && make \ - && make install - -WORKDIR /app - -ADD datasets.py spmd_mnist_classifier_fromscratch.py /app/ - -ENTRYPOINT ["python3", "spmd_mnist_classifier_fromscratch.py"] diff --git a/examples/jax/jax-dist-spmd-mnist/README.md b/examples/jax/jax-dist-spmd-mnist/README.md deleted file mode 100644 index d57a4d80fc..0000000000 --- a/examples/jax/jax-dist-spmd-mnist/README.md +++ /dev/null @@ -1,132 +0,0 @@ -## An MNIST example with single-program multiple-data (SPMD) data parallelism. - -The aim here is to illustrate how to use JAX's [`pmap`](https://jax.readthedocs.io/en/latest/_autosummary/jax.pmap.html) to express and execute -[SPMD](https://jax.readthedocs.io/en/latest/glossary.html#term-SPMD) programs for data parallelism along a batch dimension, while also -minimizing dependencies by avoiding the use of higher-level layers and -optimizers libraries. - -Adapted from https://github.com/jax-ml/jax/blob/main/examples/spmd_mnist_classifier_fromscratch.py. - -```bash -$ kubectl apply -f examples/jax/jax-dist-spmd-mnist/jaxjob_dist_spmd_mnist_gloo.yaml -``` - ---- - -```bash -$ kubectl get pods -n kubeflow -l training.kubeflow.org/job-name=jaxjob-mnist -``` - -``` -NAME READY STATUS RESTARTS AGE -jaxjob-mnist-worker-0 0/1 Completed 0 108m -jaxjob-mnist-worker-1 0/1 Completed 0 108m -``` - ---- -```bash -$ PODNAME=$(kubectl get pods -l training.kubeflow.org/job-name=jaxjob-simple,training.kubeflow.org/replica-type=worker,training.kubeflow.org/replica-index=0 -o name -n kubeflow) -$ kubectl logs -f ${PODNAME} -n kubeflow -``` - -``` -downloaded https://storage.googleapis.com/cvdf-datasets/mnist/train-images-idx3-ubyte.gz to /tmp/jax_example_data/ -downloaded https://storage.googleapis.com/cvdf-datasets/mnist/train-labels-idx1-ubyte.gz to /tmp/jax_example_data/ -downloaded https://storage.googleapis.com/cvdf-datasets/mnist/t10k-images-idx3-ubyte.gz to /tmp/jax_example_data/ -downloaded https://storage.googleapis.com/cvdf-datasets/mnist/t10k-labels-idx1-ubyte.gz to /tmp/jax_example_data/ -JAX global devices:[CpuDevice(id=0), CpuDevice(id=1), CpuDevice(id=2), CpuDevice(id=3), CpuDevice(id=4), CpuDevice(id=5), CpuDevice(id=6), CpuDevice(id=7), CpuDevice(id=131072), CpuDevice(id=131073), CpuDevice(id=131074), CpuDevice(id=131075), CpuDevice(id=131076), CpuDevice(id=131077), CpuDevice(id=131078), CpuDevice(id=131079)] -JAX local devices:[CpuDevice(id=0), CpuDevice(id=1), CpuDevice(id=2), CpuDevice(id=3), CpuDevice(id=4), CpuDevice(id=5), CpuDevice(id=6), CpuDevice(id=7)] -JAX device count:16 -JAX local device count:8 -JAX process count:2 -Epoch 0 in 1809.25 sec -Training set accuracy 0.09871666878461838 -Test set accuracy 0.09799999743700027 -Epoch 1 in 0.51 sec -Training set accuracy 0.09871666878461838 -Test set accuracy 0.09799999743700027 -Epoch 2 in 0.69 sec -Training set accuracy 0.09871666878461838 -Test set accuracy 0.09799999743700027 -Epoch 3 in 0.81 sec -Training set accuracy 0.09871666878461838 -Test set accuracy 0.09799999743700027 -Epoch 4 in 0.91 sec -Training set accuracy 0.09871666878461838 -Test set accuracy 0.09799999743700027 -Epoch 5 in 0.97 sec -Training set accuracy 0.09871666878461838 -Test set accuracy 0.09799999743700027 -Epoch 6 in 1.12 sec -Training set accuracy 0.09035000205039978 -Test set accuracy 0.08919999748468399 -Epoch 7 in 1.11 sec -Training set accuracy 0.09871666878461838 -Test set accuracy 0.09799999743700027 -Epoch 8 in 1.21 sec -Training set accuracy 0.09871666878461838 -Test set accuracy 0.09799999743700027 -Epoch 9 in 1.29 sec -Training set accuracy 0.09871666878461838 -Test set accuracy 0.09799999743700027 - -``` - ---- - -```bash -$ kubectl get -o yaml jaxjobs jaxjob-mnist -n kubeflow -``` - -``` -apiVersion: kubeflow.org/v1 -kind: JAXJob -metadata: - annotations: - kubectl.kubernetes.io/last-applied-configuration: | - {"apiVersion":"kubeflow.org/v1","kind":"JAXJob","metadata":{"annotations":{},"name":"jaxjob-mnist","namespace":"kubeflow"},"spec":{"jaxReplicaSpecs":{"Worker":{"replicas":2,"restartPolicy":"OnFailure","template":{"spec":{"containers":[{"image":"docker.io/sandipanify/jaxjob-spmd-mnist:latest","imagePullPolicy":"Always","name":"jax"}]}}}}}} - creationTimestamp: "2024-12-18T16:47:28Z" - generation: 1 - name: jaxjob-mnist - namespace: kubeflow - resourceVersion: "3620" - uid: 15f1db77-3326-405d-95e6-3d9a0d581611 -spec: - jaxReplicaSpecs: - Worker: - replicas: 2 - restartPolicy: OnFailure - template: - spec: - containers: - - image: docker.io/sandipanify/jaxjob-spmd-mnist:latest - imagePullPolicy: Always - name: jax -status: - completionTime: "2024-12-18T17:22:11Z" - conditions: - - lastTransitionTime: "2024-12-18T16:47:28Z" - lastUpdateTime: "2024-12-18T16:47:28Z" - message: JAXJob jaxjob-mnist is created. - reason: JAXJobCreated - status: "True" - type: Created - - lastTransitionTime: "2024-12-18T16:50:57Z" - lastUpdateTime: "2024-12-18T16:50:57Z" - message: JAXJob kubeflow/jaxjob-mnist is running. - reason: JAXJobRunning - status: "False" - type: Running - - lastTransitionTime: "2024-12-18T17:22:11Z" - lastUpdateTime: "2024-12-18T17:22:11Z" - message: JAXJob kubeflow/jaxjob-mnist successfully completed. - reason: JAXJobSucceeded - status: "True" - type: Succeeded - replicaStatuses: - Worker: - selector: training.kubeflow.org/job-name=jaxjob-mnist,training.kubeflow.org/operator-name=jaxjob-controller,training.kubeflow.org/replica-type=worker - succeeded: 2 - startTime: "2024-12-18T16:47:28Z" - -``` diff --git a/examples/jax/jax-dist-spmd-mnist/datasets.py b/examples/jax/jax-dist-spmd-mnist/datasets.py deleted file mode 100644 index 60fb8ce25b..0000000000 --- a/examples/jax/jax-dist-spmd-mnist/datasets.py +++ /dev/null @@ -1,97 +0,0 @@ -# Copyright 2018 The JAX Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Datasets used in examples.""" - - -import array -import gzip -import os -import struct -import urllib.request -from os import path - -import numpy as np - -_DATA = "/tmp/jax_example_data/" - - -def _download(url, filename): - """Download a url to a file in the JAX data temp directory.""" - if not path.exists(_DATA): - os.makedirs(_DATA) - out_file = path.join(_DATA, filename) - if not path.isfile(out_file): - urllib.request.urlretrieve(url, out_file) - print(f"downloaded {url} to {_DATA}") - - -def _partial_flatten(x): - """Flatten all but the first dimension of an ndarray.""" - return np.reshape(x, (x.shape[0], -1)) - - -def _one_hot(x, k, dtype=np.float32): - """Create a one-hot encoding of x of size k.""" - return np.array(x[:, None] == np.arange(k), dtype) - - -def mnist_raw(): - """Download and parse the raw MNIST dataset.""" - # CVDF mirror of http://yann.lecun.com/exdb/mnist/ - base_url = "https://storage.googleapis.com/cvdf-datasets/mnist/" - - def parse_labels(filename): - with gzip.open(filename, "rb") as fh: - _ = struct.unpack(">II", fh.read(8)) - return np.array(array.array("B", fh.read()), dtype=np.uint8) - - def parse_images(filename): - with gzip.open(filename, "rb") as fh: - _, num_data, rows, cols = struct.unpack(">IIII", fh.read(16)) - return np.array(array.array("B", fh.read()), dtype=np.uint8).reshape( - num_data, rows, cols - ) - - for filename in [ - "train-images-idx3-ubyte.gz", - "train-labels-idx1-ubyte.gz", - "t10k-images-idx3-ubyte.gz", - "t10k-labels-idx1-ubyte.gz", - ]: - _download(base_url + filename, filename) - - train_images = parse_images(path.join(_DATA, "train-images-idx3-ubyte.gz")) - train_labels = parse_labels(path.join(_DATA, "train-labels-idx1-ubyte.gz")) - test_images = parse_images(path.join(_DATA, "t10k-images-idx3-ubyte.gz")) - test_labels = parse_labels(path.join(_DATA, "t10k-labels-idx1-ubyte.gz")) - - return train_images, train_labels, test_images, test_labels - - -def mnist(permute_train=False): - """Download, parse and process MNIST data to unit scale and one-hot labels.""" - train_images, train_labels, test_images, test_labels = mnist_raw() - - train_images = _partial_flatten(train_images) / np.float32(255.0) - test_images = _partial_flatten(test_images) / np.float32(255.0) - train_labels = _one_hot(train_labels, 10) - test_labels = _one_hot(test_labels, 10) - - if permute_train: - perm = np.random.RandomState(0).permutation(train_images.shape[0]) - train_images = train_images[perm] - train_labels = train_labels[perm] - - return train_images, train_labels, test_images, test_labels diff --git a/examples/jax/jax-dist-spmd-mnist/jaxjob_dist_spmd_mnist_gloo.yaml b/examples/jax/jax-dist-spmd-mnist/jaxjob_dist_spmd_mnist_gloo.yaml deleted file mode 100644 index e124b2efef..0000000000 --- a/examples/jax/jax-dist-spmd-mnist/jaxjob_dist_spmd_mnist_gloo.yaml +++ /dev/null @@ -1,16 +0,0 @@ -apiVersion: "kubeflow.org/v1" -kind: JAXJob -metadata: - name: jaxjob-mnist - namespace: kubeflow -spec: - jaxReplicaSpecs: - Worker: - replicas: 2 - restartPolicy: OnFailure - template: - spec: - containers: - - name: jax - image: docker.io/kubeflow/jaxjob-dist-spmd-mnist:latest - imagePullPolicy: Always diff --git a/examples/jax/jax-dist-spmd-mnist/spmd_mnist_classifier_fromscratch.py b/examples/jax/jax-dist-spmd-mnist/spmd_mnist_classifier_fromscratch.py deleted file mode 100644 index ca0e9f5165..0000000000 --- a/examples/jax/jax-dist-spmd-mnist/spmd_mnist_classifier_fromscratch.py +++ /dev/null @@ -1,171 +0,0 @@ -# Copyright 2024 kubeflow.org. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""An MNIST example with single-program multiple-data (SPMD) data parallelism. - -The aim here is to illustrate how to use JAX's `pmap` to express and execute -SPMD programs for data parallelism along a batch dimension, while also -minimizing dependencies by avoiding the use of higher-level layers and -optimizers libraries. -""" - -import multiprocessing -import os -import time -from functools import partial - -import numpy as np -import numpy.random as npr - -# JAX will treat your CPU as a single device by default, regardless of the number -# of cores available. Unfortunately, this means that using `pmap` is not possible out -# of the box – we’ll first need to instruct JAX to split the CPU into multiple devices. -# This variable has to be set before JAX or any library that imports it is imported - -os.environ["XLA_FLAGS"] = "--xla_force_host_platform_device_count={}".format( - multiprocessing.cpu_count() -) - -import datasets # noqa -import jax # noqa -import jax.numpy as jnp # noqa -from jax import grad, jit, lax, pmap # noqa -from jax.scipy.special import logsumexp # noqa -from jax.tree_util import tree_map # noqa - -jax.config.update("jax_cpu_collectives_implementation", "gloo") - -process_id = int(os.getenv("PROCESS_ID")) -num_processes = int(os.getenv("NUM_PROCESSES")) -coordinator_address = ( - f"{os.getenv('COORDINATOR_ADDRESS')}:{int(os.getenv('COORDINATOR_PORT'))}" -) - -jax.distributed.initialize( - coordinator_address=coordinator_address, - num_processes=num_processes, - process_id=process_id, -) - - -def init_random_params(scale, layer_sizes, rng=npr.RandomState(0)): - return [ - (scale * rng.randn(m, n), scale * rng.randn(n)) - for m, n, in zip(layer_sizes[:-1], layer_sizes[1:]) - ] - - -def predict(params, inputs): - activations = inputs - for w, b in params[:-1]: - outputs = jnp.dot(activations, w) + b - activations = jnp.tanh(outputs) - - final_w, final_b = params[-1] - logits = jnp.dot(activations, final_w) + final_b - return logits - logsumexp(logits, axis=1, keepdims=True) - - -def loss(params, batch): - inputs, targets = batch - preds = predict(params, inputs) - return -jnp.mean(jnp.sum(preds * targets, axis=1)) - - -@jit -def accuracy(params, batch): - inputs, targets = batch - target_class = jnp.argmax(targets, axis=1) - predicted_class = jnp.argmax(predict(params, inputs), axis=1) - return jnp.mean(predicted_class == target_class) - - -if __name__ == "__main__": - layer_sizes = [784, 1024, 1024, 10] - param_scale = 0.1 - step_size = 0.001 - num_epochs = 10 - # For this manual SPMD example, we get the number of devices (e.g. CPU, - # GPUs or TPU cores) that we're using, and use it to reshape data minibatches. - num_devices = jax.local_device_count() - batch_size = num_devices * 5 - - train_images, train_labels, test_images, test_labels = datasets.mnist() - num_train = train_images.shape[0] - num_complete_batches, leftover = divmod(num_train, batch_size) - - # Increasing number of batches requires more resources. - num_batches = 10 - - def data_stream(): - rng = npr.RandomState(0) - while True: - perm = rng.permutation(num_train) - for i in range(num_batches): - batch_idx = perm[i * batch_size : (i + 1) * batch_size] # noqa - images, labels = train_images[batch_idx], train_labels[batch_idx] - # For this SPMD example, we reshape the data batch dimension into two - # batch dimensions, one of which is mapped over parallel devices. - batch_size_per_device, ragged = divmod(images.shape[0], num_devices) - if ragged: - msg = "batch size must be divisible by device count, got {} and {}." - raise ValueError(msg.format(batch_size, num_devices)) - shape_prefix = (num_devices, batch_size_per_device) - images = images.reshape(shape_prefix + images.shape[1:]) - labels = labels.reshape(shape_prefix + labels.shape[1:]) - yield images, labels - - batches = data_stream() - - @partial(pmap, axis_name="batch") - def spmd_update(params, batch): - grads = grad(loss)(params, batch) - # We compute the total gradients, summing across the device-mapped axis, - # using the `lax.psum` SPMD primitive, which does a fast all-reduce-sum. - grads = [(lax.psum(dw, "batch"), lax.psum(db, "batch")) for dw, db in grads] - return [ - (w - step_size * dw, b - step_size * db) - for (w, b), (dw, db) in zip(params, grads) - ] - - # We replicate the parameters so that the constituent arrays have a leading - # dimension of size equal to the number of devices we're pmapping over. - init_params = init_random_params(param_scale, layer_sizes) - - def replicate_array(x): - return np.broadcast_to(x, (num_devices,) + x.shape) - - replicated_params = tree_map(replicate_array, init_params) - - print(f"JAX global devices:{jax.devices()}") - print(f"JAX local devices:{jax.local_devices()}") - - print(f"JAX device count:{jax.device_count()}") - print(f"JAX local device count:{jax.local_device_count()}") - print(f"JAX process count:{jax.process_count()}") - - for epoch in range(num_epochs): - start_time = time.time() - for _ in range(num_batches): - replicated_params = spmd_update(replicated_params, next(batches)) - epoch_time = time.time() - start_time - - # We evaluate using the jitted `accuracy` function (not using pmap) by - # grabbing just one of the replicated parameter values. - params = tree_map(lambda x: x[0], replicated_params) - train_acc = accuracy(params, (train_images, train_labels)) - test_acc = accuracy(params, (test_images, test_labels)) - print(f"Epoch {epoch} in {epoch_time:0.2f} sec") - print(f"Training set accuracy {train_acc}") - print(f"Test set accuracy {test_acc}") From 9b9b5c9df4743e8149c1e34b74ebe1f002cae269 Mon Sep 17 00:00:00 2001 From: Andrey Velichkevich Date: Tue, 4 Feb 2025 12:34:22 +0000 Subject: [PATCH 19/22] Always install Go bins Signed-off-by: Andrey Velichkevich --- .gitignore | 1 + Makefile | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index f50b5f2538..9ef0e958f6 100644 --- a/.gitignore +++ b/.gitignore @@ -16,6 +16,7 @@ __debug_bin # Python cache files __pycache__/ +*.egg-info/ # OpenAPI Generator CLI JAR file hack/python-sdk-v2/openapi-generator-cli.jar diff --git a/Makefile b/Makefile index b5eda4846c..dcfa0507a5 100644 --- a/Makefile +++ b/Makefile @@ -41,11 +41,11 @@ ENVTEST_K8S_VERSION ?= 1.31 # Instructions to download tools for development. .PHONY: envtest envtest: ## Download the setup-envtest binary if required. - test -s $(ENVTEST) || GOBIN=$(LOCALBIN) go install sigs.k8s.io/controller-runtime/tools/setup-envtest@release-0.19 + GOBIN=$(LOCALBIN) go install sigs.k8s.io/controller-runtime/tools/setup-envtest@release-0.19 .PHONY: controller-gen controller-gen: ## Download the controller-gen binary if required. - test -s $(CONTROLLER_GEN) || GOBIN=$(LOCALBIN) go install sigs.k8s.io/controller-tools/cmd/controller-gen@v0.16.5 + GOBIN=$(LOCALBIN) go install sigs.k8s.io/controller-tools/cmd/controller-gen@v0.16.5 # Download external CRDs for Go integration testings. EXTERNAL_CRDS_DIR ?= $(PROJECT_DIR)/manifests/external-crds From 136f3e358f592f7baf7ab2f3ecab4ab59e59a010 Mon Sep 17 00:00:00 2001 From: Andrey Velichkevich Date: Tue, 4 Feb 2025 12:41:14 +0000 Subject: [PATCH 20/22] Update diagram Signed-off-by: Andrey Velichkevich --- docs/images/trainer-tech-stack.drawio.svg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/images/trainer-tech-stack.drawio.svg b/docs/images/trainer-tech-stack.drawio.svg index d49ec9b9c8..8d7c2ec335 100644 --- a/docs/images/trainer-tech-stack.drawio.svg +++ b/docs/images/trainer-tech-stack.drawio.svg @@ -1,4 +1,4 @@ - generated by pstoedit version:3.44 from NVBadge_2D.eps JAX Light Stroke
MPI-style Training
LLM Blueprints
LLM Fine-Tuning
Elastic Training
Gang-Scheduling
Multi-Node Training
Local
Self Hosted
\ No newline at end of file + generated by pstoedit version:3.44 from NVBadge_2D.eps JAX Light Stroke
MPI-style Training
LLM Blueprints
LLM Fine-Tuning
Elastic Training
Gang-Scheduling
Multi-Node Training
Local
Self Hosted
\ No newline at end of file From f121251f680906af4b3e6f89dccc9f12c29afc7d Mon Sep 17 00:00:00 2001 From: Andrey Velichkevich Date: Tue, 4 Feb 2025 12:46:46 +0000 Subject: [PATCH 21/22] Update hack Remove manifests for JobSet Rename to tools.go Signed-off-by: Andrey Velichkevich --- hack/e2e-run-notebook.sh | 2 +- hack/e2e-setup-cluster.sh | 4 +- hack/python-sdk-v2/gen-sdk.sh | 2 +- hack/swagger-v2/{main.go => tools.go} | 0 hack/update-codegen.sh | 4 +- .../v2/third-party/jobset/manifests.yaml | 130 ------------------ 6 files changed, 6 insertions(+), 136 deletions(-) rename hack/swagger-v2/{main.go => tools.go} (100%) delete mode 100644 manifests/v2/third-party/jobset/manifests.yaml diff --git a/hack/e2e-run-notebook.sh b/hack/e2e-run-notebook.sh index 42bef13a1e..00d446a4f8 100755 --- a/hack/e2e-run-notebook.sh +++ b/hack/e2e-run-notebook.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # Copyright 2024 The Kubeflow Authors. # diff --git a/hack/e2e-setup-cluster.sh b/hack/e2e-setup-cluster.sh index 78fdfe687a..1e1e00fcbe 100755 --- a/hack/e2e-setup-cluster.sh +++ b/hack/e2e-setup-cluster.sh @@ -1,6 +1,6 @@ -#!/bin/bash +#!/usr/bin/env bash -# Copyright 2021 The Kubernetes Authors. +# Copyright 2024 The Kubeflow Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/hack/python-sdk-v2/gen-sdk.sh b/hack/python-sdk-v2/gen-sdk.sh index 00f5a75adb..be043eebe6 100755 --- a/hack/python-sdk-v2/gen-sdk.sh +++ b/hack/python-sdk-v2/gen-sdk.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # Copyright 2024 The Kubeflow Authors. # diff --git a/hack/swagger-v2/main.go b/hack/swagger-v2/tools.go similarity index 100% rename from hack/swagger-v2/main.go rename to hack/swagger-v2/tools.go diff --git a/hack/update-codegen.sh b/hack/update-codegen.sh index 5d09fc4491..4b08581d9a 100755 --- a/hack/update-codegen.sh +++ b/hack/update-codegen.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # Copyright 2024 The Kubeflow Authors. # @@ -63,4 +63,4 @@ go run ${OPENAPI_PKG}/cmd/openapi-gen \ # Generating OpenAPI Swagger for Kubeflow Trainer V2. echo "Generate OpenAPI Swagger for kubeflow.org/v2alpha1" -go run hack/swagger-v2/main.go >api.v2/openapi-spec/swagger.json +go run hack/swagger-v2/tools.go >api.v2/openapi-spec/swagger.json diff --git a/manifests/v2/third-party/jobset/manifests.yaml b/manifests/v2/third-party/jobset/manifests.yaml deleted file mode 100644 index df7f413391..0000000000 --- a/manifests/v2/third-party/jobset/manifests.yaml +++ /dev/null @@ -1,130 +0,0 @@ -apiVersion: v1 -kind: Namespace -metadata: - labels: - app.kubernetes.io/component: manager - app.kubernetes.io/created-by: jobset - app.kubernetes.io/instance: system - app.kubernetes.io/managed-by: kustomize - app.kubernetes.io/name: namespace - app.kubernetes.io/part-of: jobset - control-plane: controller-manager - name: jobset-system ---- -apiVersion: v1 -kind: Service -metadata: - labels: - app.kubernetes.io/component: webhook - app.kubernetes.io/created-by: jobset - app.kubernetes.io/instance: webhook-service - app.kubernetes.io/managed-by: kustomize - app.kubernetes.io/name: service - app.kubernetes.io/part-of: jobset - name: jobset-webhook-service - namespace: jobset-system -spec: - ports: - - port: 443 - protocol: TCP - targetPort: 9443 - selector: - control-plane: controller-manager ---- -apiVersion: admissionregistration.k8s.io/v1 -kind: MutatingWebhookConfiguration -metadata: - name: jobset-mutating-webhook-configuration -webhooks: - - admissionReviewVersions: - - v1 - clientConfig: - service: - name: jobset-webhook-service - namespace: jobset-system - path: /mutate-jobset-x-k8s-io-v1alpha2-jobset - failurePolicy: Fail - name: mjobset.kb.io - rules: - - apiGroups: - - jobset.x-k8s.io - apiVersions: - - v1alpha2 - operations: - - CREATE - - UPDATE - resources: - - jobsets - sideEffects: None - - admissionReviewVersions: - - v1 - clientConfig: - service: - name: jobset-webhook-service - namespace: jobset-system - path: /mutate--v1-pod - failurePolicy: Fail - name: mpod.kb.io - objectSelector: - matchExpressions: - - key: jobset.sigs.k8s.io/jobset-name - operator: Exists - rules: - - apiGroups: - - "" - apiVersions: - - v1 - operations: - - CREATE - resources: - - pods - sideEffects: None ---- -apiVersion: admissionregistration.k8s.io/v1 -kind: ValidatingWebhookConfiguration -metadata: - name: jobset-validating-webhook-configuration -webhooks: - - admissionReviewVersions: - - v1 - clientConfig: - service: - name: jobset-webhook-service - namespace: jobset-system - path: /validate-jobset-x-k8s-io-v1alpha2-jobset - failurePolicy: Fail - name: vjobset.kb.io - rules: - - apiGroups: - - jobset.x-k8s.io - apiVersions: - - v1alpha2 - operations: - - CREATE - - UPDATE - resources: - - jobsets - sideEffects: None - - admissionReviewVersions: - - v1 - clientConfig: - service: - name: jobset-webhook-service - namespace: jobset-system - path: /validate--v1-pod - failurePolicy: Fail - name: vpod.kb.io - objectSelector: - matchExpressions: - - key: jobset.sigs.k8s.io/jobset-name - operator: Exists - rules: - - apiGroups: - - "" - apiVersions: - - v1 - operations: - - CREATE - resources: - - pods - sideEffects: None From c5eafbc6ddb337c88aeae13c15998236335e209a Mon Sep 17 00:00:00 2001 From: Andrey Velichkevich Date: Tue, 4 Feb 2025 14:22:14 +0000 Subject: [PATCH 22/22] Create tools.go Signed-off-by: Andrey Velichkevich --- hack/swagger-v2/{tools.go => main.go} | 2 -- hack/tools.go | 21 +++++++++++++++++++++ hack/update-codegen.sh | 2 +- 3 files changed, 22 insertions(+), 3 deletions(-) rename hack/swagger-v2/{tools.go => main.go} (98%) create mode 100644 hack/tools.go diff --git a/hack/swagger-v2/tools.go b/hack/swagger-v2/main.go similarity index 98% rename from hack/swagger-v2/tools.go rename to hack/swagger-v2/main.go index e4f7230ae4..069656a0ea 100644 --- a/hack/swagger-v2/tools.go +++ b/hack/swagger-v2/main.go @@ -25,8 +25,6 @@ import ( "k8s.io/kube-openapi/pkg/common" "k8s.io/kube-openapi/pkg/validation/spec" - _ "k8s.io/code-generator" - kubeflowv2 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v2alpha1" ) diff --git a/hack/tools.go b/hack/tools.go new file mode 100644 index 0000000000..31af86162f --- /dev/null +++ b/hack/tools.go @@ -0,0 +1,21 @@ +/* +Copyright 2024 The Kubeflow Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package tools + +import ( + _ "k8s.io/code-generator" +) diff --git a/hack/update-codegen.sh b/hack/update-codegen.sh index 4b08581d9a..df640f1ed7 100755 --- a/hack/update-codegen.sh +++ b/hack/update-codegen.sh @@ -63,4 +63,4 @@ go run ${OPENAPI_PKG}/cmd/openapi-gen \ # Generating OpenAPI Swagger for Kubeflow Trainer V2. echo "Generate OpenAPI Swagger for kubeflow.org/v2alpha1" -go run hack/swagger-v2/tools.go >api.v2/openapi-spec/swagger.json +go run hack/swagger-v2/main.go >api.v2/openapi-spec/swagger.json