Adds deployment configuration for extproc (#98)

Fixes #82 --------- Signed-off-by: Takeshi Yoneda <[email protected]>
envoyproxy · Jan 16, 2025 · 47a125e · 47a125e
1 parent 9079fef
commit 47a125e
Show file tree

Hide file tree

Showing 6 changed files with 425 additions and 38 deletions.
diff --git a/api/v1alpha1/api.go b/api/v1alpha1/api.go
@@ -2,6 +2,7 @@ package v1alpha1
 
 import (
 	egv1a1 "github.com/envoyproxy/gateway/api/v1alpha1"
+	corev1 "k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	gwapiv1 "sigs.k8s.io/gateway-api/apis/v1"
 	gwapiv1a2 "sigs.k8s.io/gateway-api/apis/v1alpha2"
@@ -57,7 +58,8 @@ type LLMRouteSpec struct {
 	// Each rule is a subset of the HTTPRoute in the Gateway API (https://gateway-api.sigs.k8s.io/api-types/httproute/).
 	//
 	// AI Gateway controller will generate a HTTPRoute based on the configuration given here with the additional
-	// modifications to achieve the necessary jobs, notably inserting the AI Gateway external processor filter.
+	// modifications to achieve the necessary jobs, notably inserting the AI Gateway filter responsible for
+	// the transformation of the request and response, etc.
 	//
 	// In the matching conditions in the LLMRouteRule, `x-envoy-ai-gateway-model` header is available
 	// if we want to describe the routing behavior based on the model name. The model name is extracted
@@ -69,6 +71,14 @@ type LLMRouteSpec struct {
 	// +kubebuilder:validation:Required
 	// +kubebuilder:validation:MaxItems=128
 	Rules []LLMRouteRule `json:"rules"`
+	// FilterConfig is the configuration for the AI Gateway filter inserted in the generated HTTPRoute.
+	//
+	// An AI Gateway filter is responsible for the transformation of the request and response
+	// as well as the routing behavior based on the model name extracted from the request content, etc.
+	//
+	// Currently, the filter is only implemented as an external process filter, which might be
+	// extended to other types of filters in the future. See https://github.com/envoyproxy/ai-gateway/issues/90
+	FilterConfig *LLMRouteFilterConfig `json:"filterConfig,omitempty"`
 }
 
 // LLMRouteRule is a rule that defines the routing behavior of the LLMRoute.
@@ -122,6 +132,52 @@ type LLMRouteRuleMatch struct {
 	Headers []gwapiv1.HTTPHeaderMatch `json:"headers,omitempty"`
 }
 
+type LLMRouteFilterConfig struct {
+	// Type specifies the type of the filter configuration.
+	//
+	// Currently, only ExternalProcess is supported, and default is ExternalProcess.
+	//
+	// +kubebuilder:default=ExternalProcess
+	Type LLMRouteFilterConfigType `json:"type"`
+
+	// ExternalProcess is the configuration for the external process filter.
+	// This is optional, and if not set, the default values of Deployment spec will be used.
+	//
+	// +optional
+	ExternalProcess *LLMRouteFilterConfigExternalProcess `json:"externalProcess,omitempty"`
+}
+
+// LLMRouteFilterConfigType specifies the type of the filter configuration.
+//
+// +kubebuilder:validation:Enum=ExternalProcess;DynamicModule
+type LLMRouteFilterConfigType string
+
+const (
+	LLMRouteFilterConfigTypeExternalProcess LLMRouteFilterConfigType = "ExternalProcess"
+	LLMRouteFilterConfigTypeDynamicModule   LLMRouteFilterConfigType = "DynamicModule" // Reserved for https://github.com/envoyproxy/ai-gateway/issues/90
+)
+
+type LLMRouteFilterConfigExternalProcess struct {
+	// Replicas is the number of desired pods of the external process deployment.
+	//
+	// +optional
+	Replicas *int32 `json:"replicas,omitempty"`
+	// Resources required by the external process container.
+	// More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+	//
+	// +optional
+	Resources *corev1.ResourceRequirements `json:"resources,omitempty"`
+	// Image is the image of the external process container.
+	//
+	// This defaults to the ghcr.io/envoyproxy/ai-gateway/extproc:${VERSION} image where
+	// ${VERSION} is the version of the Envoy AI Gateway controller.
+	//
+	// +optional
+	Image string `json:"image,omitempty"`
+	// TODO: maybe adding the option not to deploy the external process filter and let the user deploy it manually?
+	// 	Not sure if it is worth it as we are migrating to dynamic modules.
+}
+
 // +kubebuilder:object:root=true
 
 // LLMBackend is a resource that represents a single backend for LLMRoute.

diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go
diff --git a/internal/controller/llmroute.go b/internal/controller/llmroute.go
@@ -43,12 +43,12 @@ func llmRouteIndexFunc(o client.Object) []string {
 //
 // This handles the LLMRoute resource and creates the necessary resources for the external process.
 type llmRouteController struct {
-	client       client.Client
-	kube         kubernetes.Interface
-	logger       logr.Logger
-	logLevel     string
-	extProcImage string
-	eventChan    chan ConfigSinkEvent
+	client              client.Client
+	kube                kubernetes.Interface
+	logger              logr.Logger
+	logLevel            string
+	defaultExtProcImage string
+	eventChan           chan ConfigSinkEvent
 }
 
 // NewLLMRouteController creates a new reconcile.TypedReconciler[reconcile.Request] for the LLMRoute resource.
@@ -57,11 +57,11 @@ func NewLLMRouteController(
 	options Options, ch chan ConfigSinkEvent,
 ) reconcile.TypedReconciler[reconcile.Request] {
 	return &llmRouteController{
-		client:       client,
-		kube:         kube,
-		logger:       logger.WithName("llmroute-controller"),
-		extProcImage: options.ExtProcImage,
-		eventChan:    ch,
+		client:              client,
+		kube:                kube,
+		logger:              logger.WithName("llmroute-controller"),
+		defaultExtProcImage: options.ExtProcImage,
+		eventChan:           ch,
 	}
 }
 
@@ -199,7 +199,7 @@ func (c *llmRouteController) reconcileExtProcDeployment(ctx context.Context, llm
 							Containers: []corev1.Container{
 								{
 									Name:            name,
-									Image:           c.extProcImage,
+									Image:           c.defaultExtProcImage,
 									ImagePullPolicy: corev1.PullIfNotPresent,
 									Ports:           []corev1.ContainerPort{{Name: "grpc", ContainerPort: 1063}},
 									Args: []string{
@@ -225,6 +225,7 @@ func (c *llmRouteController) reconcileExtProcDeployment(ctx context.Context, llm
 					},
 				},
 			}
+			applyExtProcDeploymentConfigUpdate(&deployment.Spec, llmRoute.Spec.FilterConfig)
 			_, err = c.kube.AppsV1().Deployments(llmRoute.Namespace).Create(ctx, deployment, metav1.CreateOptions{})
 			if err != nil {
 				return fmt.Errorf("failed to create deployment: %w", err)
@@ -233,11 +234,13 @@ func (c *llmRouteController) reconcileExtProcDeployment(ctx context.Context, llm
 		} else {
 			return fmt.Errorf("failed to get deployment: %w", err)
 		}
+	} else {
+		applyExtProcDeploymentConfigUpdate(&deployment.Spec, llmRoute.Spec.FilterConfig)
+		if _, err = c.kube.AppsV1().Deployments(llmRoute.Namespace).Update(ctx, deployment, metav1.UpdateOptions{}); err != nil {
+			return fmt.Errorf("failed to update deployment: %w", err)
+		}
 	}
 
-	// TODO: reconcile the deployment spec like replicas etc once we have support for it at the CRD level.
-	_ = deployment
-
 	// This is static, so we don't need to update it.
 	service := &corev1.Service{
 		ObjectMeta: metav1.ObjectMeta{
@@ -276,3 +279,19 @@ func ownerReferenceForLLMRoute(llmRoute *aigv1a1.LLMRoute) []metav1.OwnerReferen
 		UID:        llmRoute.UID,
 	}}
 }
+
+func applyExtProcDeploymentConfigUpdate(d *appsv1.DeploymentSpec, filterConfig *aigv1a1.LLMRouteFilterConfig) {
+	if filterConfig == nil || filterConfig.ExternalProcess == nil {
+		return
+	}
+	extProc := filterConfig.ExternalProcess
+	if resource := extProc.Resources; resource != nil {
+		d.Template.Spec.Containers[0].Resources = *resource
+	}
+	if replica := extProc.Replicas; replica != nil {
+		d.Replicas = replica
+	}
+	if image := extProc.Image; image != "" {
+		d.Template.Spec.Containers[0].Image = image
+	}
+}
diff --git a/internal/controller/llmroute_test.go b/internal/controller/llmroute_test.go
@@ -6,9 +6,13 @@ import (
 
 	egv1a1 "github.com/envoyproxy/gateway/api/v1alpha1"
 	"github.com/stretchr/testify/require"
+	appsv1 "k8s.io/api/apps/v1"
+	corev1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/api/resource"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/runtime"
 	fake2 "k8s.io/client-go/kubernetes/fake"
+	"k8s.io/utils/ptr"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 	"sigs.k8s.io/controller-runtime/pkg/client/fake"
 	gwapiv1a2 "sigs.k8s.io/gateway-api/apis/v1alpha2"
@@ -53,22 +57,50 @@ func TestLLMRouteController_reconcileExtProcDeployment(t *testing.T) {
 	c.kube = fake2.NewClientset()
 
 	ownerRef := []metav1.OwnerReference{{APIVersion: "v1", Kind: "Kind", Name: "Name"}}
-	llmRoute := &aigv1a1.LLMRoute{ObjectMeta: metav1.ObjectMeta{Name: "myroute", Namespace: "default"}}
+	llmRoute := &aigv1a1.LLMRoute{
+		ObjectMeta: metav1.ObjectMeta{Name: "myroute", Namespace: "default"},
+		Spec: aigv1a1.LLMRouteSpec{
+			FilterConfig: &aigv1a1.LLMRouteFilterConfig{
+				Type: aigv1a1.LLMRouteFilterConfigTypeExternalProcess,
+				ExternalProcess: &aigv1a1.LLMRouteFilterConfigExternalProcess{
+					Replicas: ptr.To[int32](123),
+					Resources: &corev1.ResourceRequirements{
+						Limits: corev1.ResourceList{
+							corev1.ResourceCPU:    resource.MustParse("200m"),
+							corev1.ResourceMemory: resource.MustParse("100Mi"),
+						},
+					},
+				},
+			},
+		},
+	}
 
 	err := c.reconcileExtProcDeployment(context.Background(), llmRoute, ownerRef)
 	require.NoError(t, err)
 
 	deployment, err := c.kube.AppsV1().Deployments("default").Get(context.Background(), extProcName(llmRoute), metav1.GetOptions{})
 	require.NoError(t, err)
 	require.Equal(t, extProcName(llmRoute), deployment.Name)
-
+	require.Equal(t, int32(123), *deployment.Spec.Replicas)
+	require.Equal(t, ownerRef, deployment.OwnerReferences)
+	require.Equal(t, corev1.ResourceRequirements{
+		Limits: corev1.ResourceList{
+			corev1.ResourceCPU:    resource.MustParse("200m"),
+			corev1.ResourceMemory: resource.MustParse("100Mi"),
+		},
+	}, deployment.Spec.Template.Spec.Containers[0].Resources)
 	service, err := c.kube.CoreV1().Services("default").Get(context.Background(), extProcName(llmRoute), metav1.GetOptions{})
 	require.NoError(t, err)
 	require.Equal(t, extProcName(llmRoute), service.Name)
 
-	// Doing it again should not fail.
+	// Doing it again should not fail and update the deployment.
+	llmRoute.Spec.FilterConfig.ExternalProcess.Replicas = ptr.To[int32](456)
 	err = c.reconcileExtProcDeployment(context.Background(), llmRoute, ownerRef)
 	require.NoError(t, err)
+	// Check the deployment is updated.
+	deployment, err = c.kube.AppsV1().Deployments("default").Get(context.Background(), extProcName(llmRoute), metav1.GetOptions{})
+	require.NoError(t, err)
+	require.Equal(t, int32(456), *deployment.Spec.Replicas)
 }
 
 func TestLLMRouteController_reconcileExtProcExtensionPolicy(t *testing.T) {
@@ -115,6 +147,42 @@ func TestLLMRouteController_reconcileExtProcExtensionPolicy(t *testing.T) {
 	}
 }
 
+func Test_applyExtProcDeploymentConfigUpdate(t *testing.T) {
+	dep := &appsv1.DeploymentSpec{
+		Template: corev1.PodTemplateSpec{
+			Spec: corev1.PodSpec{
+				Containers: []corev1.Container{{}},
+			},
+		},
+	}
+	t.Run("not panic", func(t *testing.T) {
+		applyExtProcDeploymentConfigUpdate(dep, nil)
+		applyExtProcDeploymentConfigUpdate(dep, &aigv1a1.LLMRouteFilterConfig{})
+		applyExtProcDeploymentConfigUpdate(dep, &aigv1a1.LLMRouteFilterConfig{
+			ExternalProcess: &aigv1a1.LLMRouteFilterConfigExternalProcess{},
+		})
+	})
+	t.Run("update", func(t *testing.T) {
+		req := corev1.ResourceRequirements{
+			Limits: corev1.ResourceList{
+				corev1.ResourceCPU:    resource.MustParse("200m"),
+				corev1.ResourceMemory: resource.MustParse("100Mi"),
+			},
+		}
+		applyExtProcDeploymentConfigUpdate(dep, &aigv1a1.LLMRouteFilterConfig{
+			ExternalProcess: &aigv1a1.LLMRouteFilterConfigExternalProcess{
+				Resources: &req,
+				Replicas:  ptr.To[int32](123),
+				Image:     "some-image",
+			},
+		},
+		)
+		require.Equal(t, req, dep.Template.Spec.Containers[0].Resources)
+		require.Equal(t, int32(123), *dep.Replicas)
+		require.Equal(t, "some-image", dep.Template.Spec.Containers[0].Image)
+	})
+}
+
 func Test_llmRouteIndexFunc(t *testing.T) {
 	scheme := runtime.NewScheme()
 	require.NoError(t, aigv1a1.AddToScheme(scheme))