Skip to content

Commit

Permalink
Adds deployment configuration for extproc (#98)
Browse files Browse the repository at this point in the history
Fixes #82

---------

Signed-off-by: Takeshi Yoneda <[email protected]>
  • Loading branch information
mathetake authored Jan 16, 2025
1 parent 9079fef commit 47a125e
Show file tree
Hide file tree
Showing 6 changed files with 425 additions and 38 deletions.
58 changes: 57 additions & 1 deletion api/v1alpha1/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package v1alpha1

import (
egv1a1 "github.com/envoyproxy/gateway/api/v1alpha1"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
gwapiv1 "sigs.k8s.io/gateway-api/apis/v1"
gwapiv1a2 "sigs.k8s.io/gateway-api/apis/v1alpha2"
Expand Down Expand Up @@ -57,7 +58,8 @@ type LLMRouteSpec struct {
// Each rule is a subset of the HTTPRoute in the Gateway API (https://gateway-api.sigs.k8s.io/api-types/httproute/).
//
// AI Gateway controller will generate a HTTPRoute based on the configuration given here with the additional
// modifications to achieve the necessary jobs, notably inserting the AI Gateway external processor filter.
// modifications to achieve the necessary jobs, notably inserting the AI Gateway filter responsible for
// the transformation of the request and response, etc.
//
// In the matching conditions in the LLMRouteRule, `x-envoy-ai-gateway-model` header is available
// if we want to describe the routing behavior based on the model name. The model name is extracted
Expand All @@ -69,6 +71,14 @@ type LLMRouteSpec struct {
// +kubebuilder:validation:Required
// +kubebuilder:validation:MaxItems=128
Rules []LLMRouteRule `json:"rules"`
// FilterConfig is the configuration for the AI Gateway filter inserted in the generated HTTPRoute.
//
// An AI Gateway filter is responsible for the transformation of the request and response
// as well as the routing behavior based on the model name extracted from the request content, etc.
//
// Currently, the filter is only implemented as an external process filter, which might be
// extended to other types of filters in the future. See https://github.com/envoyproxy/ai-gateway/issues/90
FilterConfig *LLMRouteFilterConfig `json:"filterConfig,omitempty"`
}

// LLMRouteRule is a rule that defines the routing behavior of the LLMRoute.
Expand Down Expand Up @@ -122,6 +132,52 @@ type LLMRouteRuleMatch struct {
Headers []gwapiv1.HTTPHeaderMatch `json:"headers,omitempty"`
}

type LLMRouteFilterConfig struct {
// Type specifies the type of the filter configuration.
//
// Currently, only ExternalProcess is supported, and default is ExternalProcess.
//
// +kubebuilder:default=ExternalProcess
Type LLMRouteFilterConfigType `json:"type"`

// ExternalProcess is the configuration for the external process filter.
// This is optional, and if not set, the default values of Deployment spec will be used.
//
// +optional
ExternalProcess *LLMRouteFilterConfigExternalProcess `json:"externalProcess,omitempty"`
}

// LLMRouteFilterConfigType specifies the type of the filter configuration.
//
// +kubebuilder:validation:Enum=ExternalProcess;DynamicModule
type LLMRouteFilterConfigType string

const (
LLMRouteFilterConfigTypeExternalProcess LLMRouteFilterConfigType = "ExternalProcess"
LLMRouteFilterConfigTypeDynamicModule LLMRouteFilterConfigType = "DynamicModule" // Reserved for https://github.com/envoyproxy/ai-gateway/issues/90
)

type LLMRouteFilterConfigExternalProcess struct {
// Replicas is the number of desired pods of the external process deployment.
//
// +optional
Replicas *int32 `json:"replicas,omitempty"`
// Resources required by the external process container.
// More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
//
// +optional
Resources *corev1.ResourceRequirements `json:"resources,omitempty"`
// Image is the image of the external process container.
//
// This defaults to the ghcr.io/envoyproxy/ai-gateway/extproc:${VERSION} image where
// ${VERSION} is the version of the Envoy AI Gateway controller.
//
// +optional
Image string `json:"image,omitempty"`
// TODO: maybe adding the option not to deploy the external process filter and let the user deploy it manually?
// Not sure if it is worth it as we are migrating to dynamic modules.
}

// +kubebuilder:object:root=true

// LLMBackend is a resource that represents a single backend for LLMRoute.
Expand Down
51 changes: 51 additions & 0 deletions api/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

49 changes: 34 additions & 15 deletions internal/controller/llmroute.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,12 @@ func llmRouteIndexFunc(o client.Object) []string {
//
// This handles the LLMRoute resource and creates the necessary resources for the external process.
type llmRouteController struct {
client client.Client
kube kubernetes.Interface
logger logr.Logger
logLevel string
extProcImage string
eventChan chan ConfigSinkEvent
client client.Client
kube kubernetes.Interface
logger logr.Logger
logLevel string
defaultExtProcImage string
eventChan chan ConfigSinkEvent
}

// NewLLMRouteController creates a new reconcile.TypedReconciler[reconcile.Request] for the LLMRoute resource.
Expand All @@ -57,11 +57,11 @@ func NewLLMRouteController(
options Options, ch chan ConfigSinkEvent,
) reconcile.TypedReconciler[reconcile.Request] {
return &llmRouteController{
client: client,
kube: kube,
logger: logger.WithName("llmroute-controller"),
extProcImage: options.ExtProcImage,
eventChan: ch,
client: client,
kube: kube,
logger: logger.WithName("llmroute-controller"),
defaultExtProcImage: options.ExtProcImage,
eventChan: ch,
}
}

Expand Down Expand Up @@ -199,7 +199,7 @@ func (c *llmRouteController) reconcileExtProcDeployment(ctx context.Context, llm
Containers: []corev1.Container{
{
Name: name,
Image: c.extProcImage,
Image: c.defaultExtProcImage,
ImagePullPolicy: corev1.PullIfNotPresent,
Ports: []corev1.ContainerPort{{Name: "grpc", ContainerPort: 1063}},
Args: []string{
Expand All @@ -225,6 +225,7 @@ func (c *llmRouteController) reconcileExtProcDeployment(ctx context.Context, llm
},
},
}
applyExtProcDeploymentConfigUpdate(&deployment.Spec, llmRoute.Spec.FilterConfig)
_, err = c.kube.AppsV1().Deployments(llmRoute.Namespace).Create(ctx, deployment, metav1.CreateOptions{})
if err != nil {
return fmt.Errorf("failed to create deployment: %w", err)
Expand All @@ -233,11 +234,13 @@ func (c *llmRouteController) reconcileExtProcDeployment(ctx context.Context, llm
} else {
return fmt.Errorf("failed to get deployment: %w", err)
}
} else {
applyExtProcDeploymentConfigUpdate(&deployment.Spec, llmRoute.Spec.FilterConfig)
if _, err = c.kube.AppsV1().Deployments(llmRoute.Namespace).Update(ctx, deployment, metav1.UpdateOptions{}); err != nil {
return fmt.Errorf("failed to update deployment: %w", err)
}
}

// TODO: reconcile the deployment spec like replicas etc once we have support for it at the CRD level.
_ = deployment

// This is static, so we don't need to update it.
service := &corev1.Service{
ObjectMeta: metav1.ObjectMeta{
Expand Down Expand Up @@ -276,3 +279,19 @@ func ownerReferenceForLLMRoute(llmRoute *aigv1a1.LLMRoute) []metav1.OwnerReferen
UID: llmRoute.UID,
}}
}

func applyExtProcDeploymentConfigUpdate(d *appsv1.DeploymentSpec, filterConfig *aigv1a1.LLMRouteFilterConfig) {
if filterConfig == nil || filterConfig.ExternalProcess == nil {
return
}
extProc := filterConfig.ExternalProcess
if resource := extProc.Resources; resource != nil {
d.Template.Spec.Containers[0].Resources = *resource
}
if replica := extProc.Replicas; replica != nil {
d.Replicas = replica
}
if image := extProc.Image; image != "" {
d.Template.Spec.Containers[0].Image = image
}
}
74 changes: 71 additions & 3 deletions internal/controller/llmroute_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,13 @@ import (

egv1a1 "github.com/envoyproxy/gateway/api/v1alpha1"
"github.com/stretchr/testify/require"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
fake2 "k8s.io/client-go/kubernetes/fake"
"k8s.io/utils/ptr"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/client/fake"
gwapiv1a2 "sigs.k8s.io/gateway-api/apis/v1alpha2"
Expand Down Expand Up @@ -53,22 +57,50 @@ func TestLLMRouteController_reconcileExtProcDeployment(t *testing.T) {
c.kube = fake2.NewClientset()

ownerRef := []metav1.OwnerReference{{APIVersion: "v1", Kind: "Kind", Name: "Name"}}
llmRoute := &aigv1a1.LLMRoute{ObjectMeta: metav1.ObjectMeta{Name: "myroute", Namespace: "default"}}
llmRoute := &aigv1a1.LLMRoute{
ObjectMeta: metav1.ObjectMeta{Name: "myroute", Namespace: "default"},
Spec: aigv1a1.LLMRouteSpec{
FilterConfig: &aigv1a1.LLMRouteFilterConfig{
Type: aigv1a1.LLMRouteFilterConfigTypeExternalProcess,
ExternalProcess: &aigv1a1.LLMRouteFilterConfigExternalProcess{
Replicas: ptr.To[int32](123),
Resources: &corev1.ResourceRequirements{
Limits: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("200m"),
corev1.ResourceMemory: resource.MustParse("100Mi"),
},
},
},
},
},
}

err := c.reconcileExtProcDeployment(context.Background(), llmRoute, ownerRef)
require.NoError(t, err)

deployment, err := c.kube.AppsV1().Deployments("default").Get(context.Background(), extProcName(llmRoute), metav1.GetOptions{})
require.NoError(t, err)
require.Equal(t, extProcName(llmRoute), deployment.Name)

require.Equal(t, int32(123), *deployment.Spec.Replicas)
require.Equal(t, ownerRef, deployment.OwnerReferences)
require.Equal(t, corev1.ResourceRequirements{
Limits: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("200m"),
corev1.ResourceMemory: resource.MustParse("100Mi"),
},
}, deployment.Spec.Template.Spec.Containers[0].Resources)
service, err := c.kube.CoreV1().Services("default").Get(context.Background(), extProcName(llmRoute), metav1.GetOptions{})
require.NoError(t, err)
require.Equal(t, extProcName(llmRoute), service.Name)

// Doing it again should not fail.
// Doing it again should not fail and update the deployment.
llmRoute.Spec.FilterConfig.ExternalProcess.Replicas = ptr.To[int32](456)
err = c.reconcileExtProcDeployment(context.Background(), llmRoute, ownerRef)
require.NoError(t, err)
// Check the deployment is updated.
deployment, err = c.kube.AppsV1().Deployments("default").Get(context.Background(), extProcName(llmRoute), metav1.GetOptions{})
require.NoError(t, err)
require.Equal(t, int32(456), *deployment.Spec.Replicas)
}

func TestLLMRouteController_reconcileExtProcExtensionPolicy(t *testing.T) {
Expand Down Expand Up @@ -115,6 +147,42 @@ func TestLLMRouteController_reconcileExtProcExtensionPolicy(t *testing.T) {
}
}

func Test_applyExtProcDeploymentConfigUpdate(t *testing.T) {
dep := &appsv1.DeploymentSpec{
Template: corev1.PodTemplateSpec{
Spec: corev1.PodSpec{
Containers: []corev1.Container{{}},
},
},
}
t.Run("not panic", func(t *testing.T) {
applyExtProcDeploymentConfigUpdate(dep, nil)
applyExtProcDeploymentConfigUpdate(dep, &aigv1a1.LLMRouteFilterConfig{})
applyExtProcDeploymentConfigUpdate(dep, &aigv1a1.LLMRouteFilterConfig{
ExternalProcess: &aigv1a1.LLMRouteFilterConfigExternalProcess{},
})
})
t.Run("update", func(t *testing.T) {
req := corev1.ResourceRequirements{
Limits: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("200m"),
corev1.ResourceMemory: resource.MustParse("100Mi"),
},
}
applyExtProcDeploymentConfigUpdate(dep, &aigv1a1.LLMRouteFilterConfig{
ExternalProcess: &aigv1a1.LLMRouteFilterConfigExternalProcess{
Resources: &req,
Replicas: ptr.To[int32](123),
Image: "some-image",
},
},
)
require.Equal(t, req, dep.Template.Spec.Containers[0].Resources)
require.Equal(t, int32(123), *dep.Replicas)
require.Equal(t, "some-image", dep.Template.Spec.Containers[0].Image)
})
}

func Test_llmRouteIndexFunc(t *testing.T) {
scheme := runtime.NewScheme()
require.NoError(t, aigv1a1.AddToScheme(scheme))
Expand Down
Loading

0 comments on commit 47a125e

Please sign in to comment.