Merge branch 'main' into token-useage

envoyproxy · Jan 18, 2025 · 9a4ad44 · 9a4ad44
2 parents 5843f1e + 85fc9d3
commit 9a4ad44
Show file tree

Hide file tree

Showing 31 changed files with 695 additions and 108 deletions.
diff --git a/.github/workflows/commit.yaml b/.github/workflows/commit.yaml
@@ -108,12 +108,12 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v4
-        if: github.event.pull_request.head.repo.fork == false
+        if: github.event_name != 'pull_request_target'
       - uses: actions/checkout@v4
         with:
           ref: ${{ github.event.pull_request.head.ref }}
           repository: ${{ github.event.pull_request.head.repo.full_name }}
-        if: contains(github.event.pull_request.labels.*.name, 'safe to test')
+        if: github.event_name == 'pull_request_target'
       - uses: actions/setup-go@v5
         with:
           cache: false
@@ -149,6 +149,12 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v4
+        if: github.event_name != 'pull_request_target'
+      - uses: actions/checkout@v4
+        with:
+          ref: ${{ github.event.pull_request.head.ref }}
+          repository: ${{ github.event.pull_request.head.repo.full_name }}
+        if: github.event_name == 'pull_request_target'
       - uses: actions/setup-go@v5
         with:
           cache: false

diff --git a/Makefile b/Makefile
@@ -18,6 +18,10 @@ TAG ?= latest
 ENABLE_MULTI_PLATFORMS ?= false
 HELM_CHART_VERSION ?= v0.0.0-latest
 
+# Arguments for go test. This can be used, for example, to run specific tests via
+# `GO_TEST_EXTRA_ARGS="-run TestName/foo/etc"`.
+GO_TEST_EXTRA_ARGS ?=
+
 # This will print out the help message for contributing to the project.
 .PHONY: help
 help:
@@ -104,7 +108,7 @@ test-cel: envtest apigen
 	@for k8sVersion in $(ENVTEST_K8S_VERSIONS); do \
   		echo "Run CEL Validation on k8s $$k8sVersion"; \
         KUBEBUILDER_ASSETS="$$($(ENVTEST) use $$k8sVersion -p path)" \
-                 go test ./tests/cel-validation --tags test_cel_validation -v -count=1; \
+                 go test ./tests/cel-validation $(GO_TEST_EXTRA_ARGS) --tags test_cel_validation -v -count=1; \
     done
 
 # This runs the end-to-end tests for extproc without controller or k8s at all.
@@ -116,15 +120,15 @@ test-extproc: build.extproc
 	@$(MAKE) build.extproc_custom_router CMD_PATH_PREFIX=examples
 	@$(MAKE) build.testupstream CMD_PATH_PREFIX=tests
 	@echo "Run ExtProc test"
-	@go test ./tests/extproc/... -tags test_extproc -v -count=1
+	@go test ./tests/extproc/... $(GO_TEST_EXTRA_ARGS) -tags test_extproc -v -count=1
 
 # This runs the end-to-end tests for the controller with EnvTest.
 .PHONY: test-controller
 test-controller: envtest apigen
 	@for k8sVersion in $(ENVTEST_K8S_VERSIONS); do \
   		echo "Run Controller tests on k8s $$k8sVersion"; \
         KUBEBUILDER_ASSETS="$$($(ENVTEST) use $$k8sVersion -p path)" \
-                 go test ./tests/controller --tags test_controller -v -count=1; \
+                 go test ./tests/controller $(GO_TEST_EXTRA_ARGS) --tags test_controller -v -count=1; \
     done
 
 # This runs the end-to-end tests for the controller and extproc with a local kind cluster.
@@ -133,8 +137,9 @@ test-controller: envtest apigen
 .PHONY: test-e2e
 test-e2e: kind
 	@$(MAKE) docker-build DOCKER_BUILD_ARGS="--load"
+	@$(MAKE) docker-build.testupstream CMD_PATH_PREFIX=tests DOCKER_BUILD_ARGS="--load"
 	@echo "Run E2E tests"
-	@go test ./tests/e2e/... -tags test_e2e -v -count=1
+	@go test ./tests/e2e/... $(GO_TEST_EXTRA_ARGS) -tags test_e2e -v -count=1
 
 # This builds a binary for the given command under the internal/cmd directory.
 #
@@ -187,18 +192,23 @@ build.%:
 #
 # Example:
 # - `make docker-build.controller TAG=v1.2.3`
+#
+# To build the main functions outside cmd/ directory, set CMD_PATH_PREFIX to the directory containing the main function.
+#
+# Example:
+# - `make docker-build.extproc_custom_router CMD_PATH_PREFIX=examples`
+# - `make docker-build.testupstream CMD_PATH_PREFIX=tests`
 .PHONY: docker-build.%
+ifeq ($(ENABLE_MULTI_PLATFORMS),true)
+docker-build.%: GOARCH_LIST = amd64 arm64
+docker-build.%: PLATFORMS = --platform linux/amd64,linux/arm64
+endif
 docker-build.%:
 	$(eval COMMAND_NAME := $(subst docker-build.,,$@))
-	@if [ "$(ENABLE_MULTI_PLATFORMS)" = "true" ]; then \
-		GOARCH_LIST="amd64 arm64"; PLATFORMS="--platform linux/amd64,linux/arm64"; \
-	else \
-		GOARCH_LIST="$(shell go env GOARCH)"; PLATFORMS=""; \
-	fi
-	@$(MAKE) build.$(COMMAND_NAME) GOOS_LIST="linux"
+	@$(MAKE) build.$(COMMAND_NAME) GOOS_LIST="linux" GOARCH_LIST="$(GOARCH_LIST)"
 	docker buildx build . -t $(OCI_REGISTRY)/$(COMMAND_NAME):$(TAG) --build-arg COMMAND_NAME=$(COMMAND_NAME) $(PLATFORMS) $(DOCKER_BUILD_ARGS)
 
-# This builds docker images for all commands. All options for `docker-build.%` apply.
+# This builds docker images for all commands under cmd/ directory. All options for `docker-build.%` apply.
 #
 # Example:
 # - `make docker-build`

diff --git a/RELEASES.md b/RELEASES.md
@@ -0,0 +1,67 @@
+# Notes on Releases
+
+## Release Cycles
+
+Since Envoy AI Gateway depends on the Envoy Gateway and Envoy Proxy, we will follow the release cycle of the Envoy Gateway.
+In other words, we aim to cut the release of the Envoy AI Gateway a few days or a week after the new version of the Envoy Gateway
+is released. Therefore, the release cycle of the Envoy AI Gateway will be approximately every 2-3 months.
+
+We do not distinguish between major and patch releases. We will increment the minor version number by one for each release
+except that we will cut the v1.0.0 release when we have a first stable control plane API, i.e. the introduction of
+package `api/v1`. Until then, we will use the version number v0.3.x, v0.4.y, etc. See the [support policy](#Support-Policy) for more details.
+
+The patch version will be incremented when we have a bug fix or a minor feature addition. The end of life for the version
+will be 2 releases after the release of the version. For example, if we release the version v0.1.0, the end of life for
+the version will be when we release the version v0.3.0.
+
+The main branch will always use the latest version of the Envoy Gateway hence the latest version of the Envoy, and
+the main version will be available just like the tagged released versions in the GitHub Container Registry where
+we also host the helm chart.
+
+## Support Policy
+
+This document focuses on compatibility concerns of those using Envoy AI Gateway.
+It is important to note that the support policy is subject to change at any time. The support policy is as follows:
+
+First of all, there are four areas of compatibility that we are concerned with:
+* [Using envoyproxy/ai-gateway as a Go package](#public-go-package).
+* [Deploying the Envoy AI Gateway controller through the Kubernetes Custom Resource Definition (CRD)](#Custom-Resource-Definitions).
+* [Upgrading the Envoy AI Gateway controller](#Upgrading-the-Envoy-AI-Gateway-controller).
+* [Envoy Gateway vs Envoy AI Gateway compatibility](#Envoy-Gateway-vs-Envoy-AI-Gateway-compatibility).
+
+### Public Go package
+
+Since we do not envision this repository ends up as a transitive dependency, i.e. only used as a direct dependency such as
+in a custom control plane, etc., we assume that any consumer of the project should have the full control over the
+source code depending on the project. This allows us to declare deprecation and introduce the breaking changes
+in the version after the next one since they can migrate the code at their discretion. For example, any public API that is
+marked as deprecated in the version N will be removed in the version N+2. We document how users should
+migrate to the new API will be documented in the release notes if applicable, but we do not guarantee that the migration
+path will be provided.
+
+### Custom Resource Definitions
+
+The Custom Resource Definitions (CRDs) are defined in api/${version}/*.go files. The CRDs are versioned as v1alpha1, v1alpha2, etc.
+**For alpha versions**, we simply employ the same deprecation policy as the Go package. In other words, the APIs will be marked as
+deprecated in the version N and will be removed in the version N+2 but without any guarantee of migration path.
+Migration paths for alpha versions will be the best effort and will be documented in the release notes.
+**For beta versions**, For beta versions, it is the same as the alpha versions, but we will provide a migration path in the release notes.
+**For stable versions**, we will never break the APIs unless there is a critical security issue.
+We will provide a migration path in the release notes in case we need to break the APIs.
+
+### Upgrading the Envoy AI Gateway controller
+
+We guarantee that simply upgrading the controller will not break the existing configuration assuming there's
+no _un-migrated_ resources including breaking change left in the k8s API server. In other words, after the
+proper use of the API and migration path described above, the user should be able to upgrade the controller
+without any issue. However, this does mean that we do NOT guarantee that the existing configuration will work
+across more than two version of the controller. For example if you are using the version N of the controller,
+and you want to upgrade to the version N+2, you should first upgrade to the version N+1 while following the
+migration path if applicable, and then upgrade to the version N+2.
+
+### Envoy Gateway vs Envoy AI Gateway compatibility
+
+Since Envoy AI Gateway is built on top of Envoy Gateway, the compatibility between the two is important.
+We use the latest released version of Envoy Gateway as the base of the Envoy AI Gateway when we release a new version.
+Since Envoy Gateway is a stable project and supposed to work across versions, we do not expect any compatibility issue
+as long as the Envoy Gateway version is also up-to-date prior to the upgrade of the Envoy AI Gateway.
diff --git a/api/v1alpha1/api.go b/api/v1alpha1/api.go
@@ -42,9 +42,9 @@ type AIGatewayRouteList struct {
 type AIGatewayRouteSpec struct {
 	// TargetRefs are the names of the Gateway resources this AIGatewayRoute is being attached to.
 	//
-	// +optional
+	// +kubebuilder:validation:MinItems=1
 	// +kubebuilder:validation:MaxItems=128
-	TargetRefs []gwapiv1a2.LocalPolicyTargetReferenceWithSectionName `json:"targetRefs,omitempty"`
+	TargetRefs []gwapiv1a2.LocalPolicyTargetReferenceWithSectionName `json:"targetRefs"`
 	// APISchema specifies the API schema of the input that the target Gateway(s) will receive.
 	// Based on this schema, the ai-gateway will perform the necessary transformation to the
 	// output schema specified in the selected AIServiceBackend during the routing process.
@@ -223,7 +223,7 @@ type AIServiceBackendSpec struct {
 	// This is required to be set.
 	//
 	// +kubebuilder:validation:Required
-	BackendRef egv1a1.BackendRef `json:"backendRef"`
+	BackendRef gwapiv1.BackendObjectReference `json:"backendRef"`
 
 	// BackendSecurityPolicyRef is the name of the BackendSecurityPolicy resources this backend
 	// is being attached to.

diff --git a/internal/apischema/openai/openai.go b/internal/apischema/openai/openai.go
@@ -12,6 +12,7 @@ import (
 // Chat message role defined by the OpenAI API.
 const (
 	ChatMessageRoleSystem    = "system"
+	ChatMessageRoleDeveloper = "developer"
 	ChatMessageRoleUser      = "user"
 	ChatMessageRoleAssistant = "assistant"
 	ChatMessageRoleFunction  = "function"
@@ -225,6 +226,13 @@ func (c *ChatCompletionMessageParamUnion) UnmarshalJSON(data []byte) error {
 		}
 		c.Value = systemMessage
 		c.Type = ChatMessageRoleSystem
+	case ChatMessageRoleDeveloper:
+		var developerMessage ChatCompletionDeveloperMessageParam
+		if err := json.Unmarshal(data, &developerMessage); err != nil {
+			return err
+		}
+		c.Value = developerMessage
+		c.Type = ChatMessageRoleDeveloper
 	case ChatMessageRoleTool:
 		var toolMessage ChatCompletionToolMessageParam
 		if err := json.Unmarshal(data, &toolMessage); err != nil {
@@ -263,6 +271,19 @@ type ChatCompletionSystemMessageParam struct {
 	Name string `json:"name,omitempty"`
 }
 
+// ChatCompletionDeveloperMessageParam Developer-provided instructions that the model should follow, regardless of
+// messages sent by the user. With o1 models and newer, use `developer` messages
+// for this purpose instead.
+type ChatCompletionDeveloperMessageParam struct {
+	// The contents of the developer message.
+	Content StringOrArray `json:"content"`
+	// The role of the messages author, in this case `developer`.
+	Role string `json:"role"`
+	// An optional name for the participant. Provides the model information to
+	// differentiate between participants of the same role.
+	Name string `json:"name,omitempty"`
+}
+
 type ChatCompletionToolMessageParam struct {
 	// The contents of the tool message.
 	Content StringOrArray `json:"content"`

diff --git a/internal/apischema/openai/openai_test.go b/internal/apischema/openai/openai_test.go
@@ -20,6 +20,7 @@ func TestOpenAIChatCompletionMessageUnmarshal(t *testing.T) {
 			in: []byte(`{"model": "gpu-o4",
                         "messages": [
                          {"role": "system", "content": "you are a helpful assistant"},
+                         {"role": "developer", "content": "you are a helpful dev assistant"},
                          {"role": "user", "content": "what do you see in this image"}]}`),
 			out: &ChatCompletionRequest{
 				Model: "gpu-o4",
@@ -33,6 +34,15 @@ func TestOpenAIChatCompletionMessageUnmarshal(t *testing.T) {
 						},
 						Type: ChatMessageRoleSystem,
 					},
+					{
+						Value: ChatCompletionDeveloperMessageParam{
+							Role: ChatMessageRoleDeveloper,
+							Content: StringOrArray{
+								Value: "you are a helpful dev assistant",
+							},
+						},
+						Type: ChatMessageRoleDeveloper,
+					},
 					{
 						Value: ChatCompletionUserMessageParam{
 							Role: ChatMessageRoleUser,
@@ -50,6 +60,7 @@ func TestOpenAIChatCompletionMessageUnmarshal(t *testing.T) {
 			in: []byte(`{"model": "gpu-o4",
                         "messages": [
                          {"role": "system", "content": [{"text": "you are a helpful assistant", "type": "text"}]},
+                         {"role": "developer", "content": [{"text": "you are a helpful dev assistant", "type": "text"}]},
                          {"role": "user", "content": [{"text": "what do you see in this image", "type": "text"}]}]}`),
 			out: &ChatCompletionRequest{
 				Model: "gpu-o4",
@@ -68,6 +79,20 @@ func TestOpenAIChatCompletionMessageUnmarshal(t *testing.T) {
 						},
 						Type: ChatMessageRoleSystem,
 					},
+					{
+						Value: ChatCompletionDeveloperMessageParam{
+							Role: ChatMessageRoleDeveloper,
+							Content: StringOrArray{
+								Value: []ChatCompletionContentPartTextParam{
+									{
+										Text: "you are a helpful dev assistant",
+										Type: string(openai.ChatCompletionContentPartTextTypeText),
+									},
+								},
+							},
+						},
+						Type: ChatMessageRoleDeveloper,
+					},
 					{
 						Value: ChatCompletionUserMessageParam{
 							Role: ChatMessageRoleUser,

diff --git a/internal/controller/ai_gateway_route.go b/internal/controller/ai_gateway_route.go
@@ -46,7 +46,6 @@ type aiGatewayRouteController struct {
 	client              client.Client
 	kube                kubernetes.Interface
 	logger              logr.Logger
-	logLevel            string
 	defaultExtProcImage string
 	eventChan           chan ConfigSinkEvent
 }
@@ -59,7 +58,7 @@ func NewAIGatewayRouteController(
 	return &aiGatewayRouteController{
 		client:              client,
 		kube:                kube,
-		logger:              logger.WithName("ai-gateway-route-controller"),
+		logger:              logger.WithName("eaig-route-controller"),
 		defaultExtProcImage: options.ExtProcImage,
 		eventChan:           ch,
 	}
@@ -204,7 +203,7 @@ func (c *aiGatewayRouteController) reconcileExtProcDeployment(ctx context.Contex
 									Ports:           []corev1.ContainerPort{{Name: "grpc", ContainerPort: 1063}},
 									Args: []string{
 										"-configPath", "/etc/ai-gateway/extproc/" + expProcConfigFileName,
-										"-logLevel", c.logLevel,
+										"-logLevel", "info", // TODO: this should be configurable via FilterConfig API.
 									},
 									VolumeMounts: []corev1.VolumeMount{
 										{Name: "config", MountPath: "/etc/ai-gateway/extproc"},
@@ -268,7 +267,7 @@ func (c *aiGatewayRouteController) reconcileExtProcDeployment(ctx context.Contex
 }
 
 func extProcName(route *aigv1a1.AIGatewayRoute) string {
-	return fmt.Sprintf("ai-gateway-ai-gateway-route-extproc-%s", route.Name)
+	return fmt.Sprintf("eaig-route-extproc-%s", route.Name)
 }
 
 func ownerReferenceForAIGatewayRoute(aiGatewayRoute *aigv1a1.AIGatewayRoute) []metav1.OwnerReference {

diff --git a/internal/controller/ai_gateway_route_test.go b/internal/controller/ai_gateway_route_test.go
@@ -27,7 +27,7 @@ func Test_extProcName(t *testing.T) {
 			Name: "myroute",
 		},
 	})
-	require.Equal(t, "ai-gateway-ai-gateway-route-extproc-myroute", actual)
+	require.Equal(t, "eaig-route-extproc-myroute", actual)
 }
 
 func TestAIGatewayRouteController_ensuresExtProcConfigMapExists(t *testing.T) {