Skip to content

Commit

Permalink
Merge branch 'main' into token-useage
Browse files Browse the repository at this point in the history
  • Loading branch information
yuzisun authored Jan 18, 2025
2 parents 5843f1e + 85fc9d3 commit 9a4ad44
Show file tree
Hide file tree
Showing 31 changed files with 695 additions and 108 deletions.
10 changes: 8 additions & 2 deletions .github/workflows/commit.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -108,12 +108,12 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
if: github.event.pull_request.head.repo.fork == false
if: github.event_name != 'pull_request_target'
- uses: actions/checkout@v4
with:
ref: ${{ github.event.pull_request.head.ref }}
repository: ${{ github.event.pull_request.head.repo.full_name }}
if: contains(github.event.pull_request.labels.*.name, 'safe to test')
if: github.event_name == 'pull_request_target'
- uses: actions/setup-go@v5
with:
cache: false
Expand Down Expand Up @@ -149,6 +149,12 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
if: github.event_name != 'pull_request_target'
- uses: actions/checkout@v4
with:
ref: ${{ github.event.pull_request.head.ref }}
repository: ${{ github.event.pull_request.head.repo.full_name }}
if: github.event_name == 'pull_request_target'
- uses: actions/setup-go@v5
with:
cache: false
Expand Down
32 changes: 21 additions & 11 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@ TAG ?= latest
ENABLE_MULTI_PLATFORMS ?= false
HELM_CHART_VERSION ?= v0.0.0-latest

# Arguments for go test. This can be used, for example, to run specific tests via
# `GO_TEST_EXTRA_ARGS="-run TestName/foo/etc"`.
GO_TEST_EXTRA_ARGS ?=

# This will print out the help message for contributing to the project.
.PHONY: help
help:
Expand Down Expand Up @@ -104,7 +108,7 @@ test-cel: envtest apigen
@for k8sVersion in $(ENVTEST_K8S_VERSIONS); do \
echo "Run CEL Validation on k8s $$k8sVersion"; \
KUBEBUILDER_ASSETS="$$($(ENVTEST) use $$k8sVersion -p path)" \
go test ./tests/cel-validation --tags test_cel_validation -v -count=1; \
go test ./tests/cel-validation $(GO_TEST_EXTRA_ARGS) --tags test_cel_validation -v -count=1; \
done

# This runs the end-to-end tests for extproc without controller or k8s at all.
Expand All @@ -116,15 +120,15 @@ test-extproc: build.extproc
@$(MAKE) build.extproc_custom_router CMD_PATH_PREFIX=examples
@$(MAKE) build.testupstream CMD_PATH_PREFIX=tests
@echo "Run ExtProc test"
@go test ./tests/extproc/... -tags test_extproc -v -count=1
@go test ./tests/extproc/... $(GO_TEST_EXTRA_ARGS) -tags test_extproc -v -count=1

# This runs the end-to-end tests for the controller with EnvTest.
.PHONY: test-controller
test-controller: envtest apigen
@for k8sVersion in $(ENVTEST_K8S_VERSIONS); do \
echo "Run Controller tests on k8s $$k8sVersion"; \
KUBEBUILDER_ASSETS="$$($(ENVTEST) use $$k8sVersion -p path)" \
go test ./tests/controller --tags test_controller -v -count=1; \
go test ./tests/controller $(GO_TEST_EXTRA_ARGS) --tags test_controller -v -count=1; \
done

# This runs the end-to-end tests for the controller and extproc with a local kind cluster.
Expand All @@ -133,8 +137,9 @@ test-controller: envtest apigen
.PHONY: test-e2e
test-e2e: kind
@$(MAKE) docker-build DOCKER_BUILD_ARGS="--load"
@$(MAKE) docker-build.testupstream CMD_PATH_PREFIX=tests DOCKER_BUILD_ARGS="--load"
@echo "Run E2E tests"
@go test ./tests/e2e/... -tags test_e2e -v -count=1
@go test ./tests/e2e/... $(GO_TEST_EXTRA_ARGS) -tags test_e2e -v -count=1

# This builds a binary for the given command under the internal/cmd directory.
#
Expand Down Expand Up @@ -187,18 +192,23 @@ build.%:
#
# Example:
# - `make docker-build.controller TAG=v1.2.3`
#
# To build the main functions outside cmd/ directory, set CMD_PATH_PREFIX to the directory containing the main function.
#
# Example:
# - `make docker-build.extproc_custom_router CMD_PATH_PREFIX=examples`
# - `make docker-build.testupstream CMD_PATH_PREFIX=tests`
.PHONY: docker-build.%
ifeq ($(ENABLE_MULTI_PLATFORMS),true)
docker-build.%: GOARCH_LIST = amd64 arm64
docker-build.%: PLATFORMS = --platform linux/amd64,linux/arm64
endif
docker-build.%:
$(eval COMMAND_NAME := $(subst docker-build.,,$@))
@if [ "$(ENABLE_MULTI_PLATFORMS)" = "true" ]; then \
GOARCH_LIST="amd64 arm64"; PLATFORMS="--platform linux/amd64,linux/arm64"; \
else \
GOARCH_LIST="$(shell go env GOARCH)"; PLATFORMS=""; \
fi
@$(MAKE) build.$(COMMAND_NAME) GOOS_LIST="linux"
@$(MAKE) build.$(COMMAND_NAME) GOOS_LIST="linux" GOARCH_LIST="$(GOARCH_LIST)"
docker buildx build . -t $(OCI_REGISTRY)/$(COMMAND_NAME):$(TAG) --build-arg COMMAND_NAME=$(COMMAND_NAME) $(PLATFORMS) $(DOCKER_BUILD_ARGS)

# This builds docker images for all commands. All options for `docker-build.%` apply.
# This builds docker images for all commands under cmd/ directory. All options for `docker-build.%` apply.
#
# Example:
# - `make docker-build`
Expand Down
67 changes: 67 additions & 0 deletions RELEASES.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# Notes on Releases

## Release Cycles

Since Envoy AI Gateway depends on the Envoy Gateway and Envoy Proxy, we will follow the release cycle of the Envoy Gateway.
In other words, we aim to cut the release of the Envoy AI Gateway a few days or a week after the new version of the Envoy Gateway
is released. Therefore, the release cycle of the Envoy AI Gateway will be approximately every 2-3 months.

We do not distinguish between major and patch releases. We will increment the minor version number by one for each release
except that we will cut the v1.0.0 release when we have a first stable control plane API, i.e. the introduction of
package `api/v1`. Until then, we will use the version number v0.3.x, v0.4.y, etc. See the [support policy](#Support-Policy) for more details.

The patch version will be incremented when we have a bug fix or a minor feature addition. The end of life for the version
will be 2 releases after the release of the version. For example, if we release the version v0.1.0, the end of life for
the version will be when we release the version v0.3.0.

The main branch will always use the latest version of the Envoy Gateway hence the latest version of the Envoy, and
the main version will be available just like the tagged released versions in the GitHub Container Registry where
we also host the helm chart.

## Support Policy

This document focuses on compatibility concerns of those using Envoy AI Gateway.
It is important to note that the support policy is subject to change at any time. The support policy is as follows:

First of all, there are four areas of compatibility that we are concerned with:
* [Using envoyproxy/ai-gateway as a Go package](#public-go-package).
* [Deploying the Envoy AI Gateway controller through the Kubernetes Custom Resource Definition (CRD)](#Custom-Resource-Definitions).
* [Upgrading the Envoy AI Gateway controller](#Upgrading-the-Envoy-AI-Gateway-controller).
* [Envoy Gateway vs Envoy AI Gateway compatibility](#Envoy-Gateway-vs-Envoy-AI-Gateway-compatibility).

### Public Go package

Since we do not envision this repository ends up as a transitive dependency, i.e. only used as a direct dependency such as
in a custom control plane, etc., we assume that any consumer of the project should have the full control over the
source code depending on the project. This allows us to declare deprecation and introduce the breaking changes
in the version after the next one since they can migrate the code at their discretion. For example, any public API that is
marked as deprecated in the version N will be removed in the version N+2. We document how users should
migrate to the new API will be documented in the release notes if applicable, but we do not guarantee that the migration
path will be provided.

### Custom Resource Definitions

The Custom Resource Definitions (CRDs) are defined in api/${version}/*.go files. The CRDs are versioned as v1alpha1, v1alpha2, etc.
**For alpha versions**, we simply employ the same deprecation policy as the Go package. In other words, the APIs will be marked as
deprecated in the version N and will be removed in the version N+2 but without any guarantee of migration path.
Migration paths for alpha versions will be the best effort and will be documented in the release notes.
**For beta versions**, For beta versions, it is the same as the alpha versions, but we will provide a migration path in the release notes.
**For stable versions**, we will never break the APIs unless there is a critical security issue.
We will provide a migration path in the release notes in case we need to break the APIs.

### Upgrading the Envoy AI Gateway controller

We guarantee that simply upgrading the controller will not break the existing configuration assuming there's
no _un-migrated_ resources including breaking change left in the k8s API server. In other words, after the
proper use of the API and migration path described above, the user should be able to upgrade the controller
without any issue. However, this does mean that we do NOT guarantee that the existing configuration will work
across more than two version of the controller. For example if you are using the version N of the controller,
and you want to upgrade to the version N+2, you should first upgrade to the version N+1 while following the
migration path if applicable, and then upgrade to the version N+2.

### Envoy Gateway vs Envoy AI Gateway compatibility

Since Envoy AI Gateway is built on top of Envoy Gateway, the compatibility between the two is important.
We use the latest released version of Envoy Gateway as the base of the Envoy AI Gateway when we release a new version.
Since Envoy Gateway is a stable project and supposed to work across versions, we do not expect any compatibility issue
as long as the Envoy Gateway version is also up-to-date prior to the upgrade of the Envoy AI Gateway.
6 changes: 3 additions & 3 deletions api/v1alpha1/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,9 @@ type AIGatewayRouteList struct {
type AIGatewayRouteSpec struct {
// TargetRefs are the names of the Gateway resources this AIGatewayRoute is being attached to.
//
// +optional
// +kubebuilder:validation:MinItems=1
// +kubebuilder:validation:MaxItems=128
TargetRefs []gwapiv1a2.LocalPolicyTargetReferenceWithSectionName `json:"targetRefs,omitempty"`
TargetRefs []gwapiv1a2.LocalPolicyTargetReferenceWithSectionName `json:"targetRefs"`
// APISchema specifies the API schema of the input that the target Gateway(s) will receive.
// Based on this schema, the ai-gateway will perform the necessary transformation to the
// output schema specified in the selected AIServiceBackend during the routing process.
Expand Down Expand Up @@ -223,7 +223,7 @@ type AIServiceBackendSpec struct {
// This is required to be set.
//
// +kubebuilder:validation:Required
BackendRef egv1a1.BackendRef `json:"backendRef"`
BackendRef gwapiv1.BackendObjectReference `json:"backendRef"`

// BackendSecurityPolicyRef is the name of the BackendSecurityPolicy resources this backend
// is being attached to.
Expand Down
21 changes: 21 additions & 0 deletions internal/apischema/openai/openai.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
// Chat message role defined by the OpenAI API.
const (
ChatMessageRoleSystem = "system"
ChatMessageRoleDeveloper = "developer"
ChatMessageRoleUser = "user"
ChatMessageRoleAssistant = "assistant"
ChatMessageRoleFunction = "function"
Expand Down Expand Up @@ -225,6 +226,13 @@ func (c *ChatCompletionMessageParamUnion) UnmarshalJSON(data []byte) error {
}
c.Value = systemMessage
c.Type = ChatMessageRoleSystem
case ChatMessageRoleDeveloper:
var developerMessage ChatCompletionDeveloperMessageParam
if err := json.Unmarshal(data, &developerMessage); err != nil {
return err
}
c.Value = developerMessage
c.Type = ChatMessageRoleDeveloper
case ChatMessageRoleTool:
var toolMessage ChatCompletionToolMessageParam
if err := json.Unmarshal(data, &toolMessage); err != nil {
Expand Down Expand Up @@ -263,6 +271,19 @@ type ChatCompletionSystemMessageParam struct {
Name string `json:"name,omitempty"`
}

// ChatCompletionDeveloperMessageParam Developer-provided instructions that the model should follow, regardless of
// messages sent by the user. With o1 models and newer, use `developer` messages
// for this purpose instead.
type ChatCompletionDeveloperMessageParam struct {
// The contents of the developer message.
Content StringOrArray `json:"content"`
// The role of the messages author, in this case `developer`.
Role string `json:"role"`
// An optional name for the participant. Provides the model information to
// differentiate between participants of the same role.
Name string `json:"name,omitempty"`
}

type ChatCompletionToolMessageParam struct {
// The contents of the tool message.
Content StringOrArray `json:"content"`
Expand Down
25 changes: 25 additions & 0 deletions internal/apischema/openai/openai_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ func TestOpenAIChatCompletionMessageUnmarshal(t *testing.T) {
in: []byte(`{"model": "gpu-o4",
"messages": [
{"role": "system", "content": "you are a helpful assistant"},
{"role": "developer", "content": "you are a helpful dev assistant"},
{"role": "user", "content": "what do you see in this image"}]}`),
out: &ChatCompletionRequest{
Model: "gpu-o4",
Expand All @@ -33,6 +34,15 @@ func TestOpenAIChatCompletionMessageUnmarshal(t *testing.T) {
},
Type: ChatMessageRoleSystem,
},
{
Value: ChatCompletionDeveloperMessageParam{
Role: ChatMessageRoleDeveloper,
Content: StringOrArray{
Value: "you are a helpful dev assistant",
},
},
Type: ChatMessageRoleDeveloper,
},
{
Value: ChatCompletionUserMessageParam{
Role: ChatMessageRoleUser,
Expand All @@ -50,6 +60,7 @@ func TestOpenAIChatCompletionMessageUnmarshal(t *testing.T) {
in: []byte(`{"model": "gpu-o4",
"messages": [
{"role": "system", "content": [{"text": "you are a helpful assistant", "type": "text"}]},
{"role": "developer", "content": [{"text": "you are a helpful dev assistant", "type": "text"}]},
{"role": "user", "content": [{"text": "what do you see in this image", "type": "text"}]}]}`),
out: &ChatCompletionRequest{
Model: "gpu-o4",
Expand All @@ -68,6 +79,20 @@ func TestOpenAIChatCompletionMessageUnmarshal(t *testing.T) {
},
Type: ChatMessageRoleSystem,
},
{
Value: ChatCompletionDeveloperMessageParam{
Role: ChatMessageRoleDeveloper,
Content: StringOrArray{
Value: []ChatCompletionContentPartTextParam{
{
Text: "you are a helpful dev assistant",
Type: string(openai.ChatCompletionContentPartTextTypeText),
},
},
},
},
Type: ChatMessageRoleDeveloper,
},
{
Value: ChatCompletionUserMessageParam{
Role: ChatMessageRoleUser,
Expand Down
7 changes: 3 additions & 4 deletions internal/controller/ai_gateway_route.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ type aiGatewayRouteController struct {
client client.Client
kube kubernetes.Interface
logger logr.Logger
logLevel string
defaultExtProcImage string
eventChan chan ConfigSinkEvent
}
Expand All @@ -59,7 +58,7 @@ func NewAIGatewayRouteController(
return &aiGatewayRouteController{
client: client,
kube: kube,
logger: logger.WithName("ai-gateway-route-controller"),
logger: logger.WithName("eaig-route-controller"),
defaultExtProcImage: options.ExtProcImage,
eventChan: ch,
}
Expand Down Expand Up @@ -204,7 +203,7 @@ func (c *aiGatewayRouteController) reconcileExtProcDeployment(ctx context.Contex
Ports: []corev1.ContainerPort{{Name: "grpc", ContainerPort: 1063}},
Args: []string{
"-configPath", "/etc/ai-gateway/extproc/" + expProcConfigFileName,
"-logLevel", c.logLevel,
"-logLevel", "info", // TODO: this should be configurable via FilterConfig API.
},
VolumeMounts: []corev1.VolumeMount{
{Name: "config", MountPath: "/etc/ai-gateway/extproc"},
Expand Down Expand Up @@ -268,7 +267,7 @@ func (c *aiGatewayRouteController) reconcileExtProcDeployment(ctx context.Contex
}

func extProcName(route *aigv1a1.AIGatewayRoute) string {
return fmt.Sprintf("ai-gateway-ai-gateway-route-extproc-%s", route.Name)
return fmt.Sprintf("eaig-route-extproc-%s", route.Name)
}

func ownerReferenceForAIGatewayRoute(aiGatewayRoute *aigv1a1.AIGatewayRoute) []metav1.OwnerReference {
Expand Down
2 changes: 1 addition & 1 deletion internal/controller/ai_gateway_route_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ func Test_extProcName(t *testing.T) {
Name: "myroute",
},
})
require.Equal(t, "ai-gateway-ai-gateway-route-extproc-myroute", actual)
require.Equal(t, "eaig-route-extproc-myroute", actual)
}

func TestAIGatewayRouteController_ensuresExtProcConfigMapExists(t *testing.T) {
Expand Down
Loading

0 comments on commit 9a4ad44

Please sign in to comment.