From 62366f6fb21800c5cdb00c061fcee141aa6ea6b1 Mon Sep 17 00:00:00 2001
From: zhengkezhou1 <madzhou1@gmail.com>
Date: Wed, 24 Sep 2025 00:53:14 +0800
Subject: [PATCH] [misc] introduce llm-d vLLM simulator for E2E tests

Signed-off-by: zhengkezhou1 <madzhou1@gmail.com>
---
 development/simulator/components.yaml    | 76 ++++++++++++++++++++++++
 development/simulator/deployment.yaml    | 54 +++++++++++++++++
 development/simulator/kustomization.yaml |  5 ++
 test/e2e/e2e_test.go                     | 14 +++--
 test/run-e2e-tests.sh                    |  5 +-
 5 files changed, 148 insertions(+), 6 deletions(-)
 create mode 100644 development/simulator/components.yaml
 create mode 100644 development/simulator/deployment.yaml
 create mode 100644 development/simulator/kustomization.yaml

diff --git a/development/simulator/components.yaml b/development/simulator/components.yaml
new file mode 100644
index 000000000..5a6e6e78f
--- /dev/null
+++ b/development/simulator/components.yaml
@@ -0,0 +1,76 @@
+# Debug only: Make sure pod can be visited from controller that deployed in mac.
+apiVersion: v1
+kind: Service
+metadata:
+  name: vllm-llama3-8b-instruct
+  namespace: default
+  labels:
+    prometheus-discovery: "true"
+  annotations:
+    prometheus.io/scrape: "true"
+    prometheus.io/path: "/metrics"
+    prometheus.io/port: "8000"
+spec:
+  selector:
+    model.aibrix.ai/name: "vllm-llama3-8b-instruct"
+  ports:
+    - protocol: TCP
+      name: inference
+      port: 8010
+      targetPort: 8010
+      nodePort: 30082
+  type: NodePort
+---
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: vllm-llama3-8b-instruct-sa
+  namespace: default
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: Role
+metadata:
+  name: vllm-llama3-8b-instruct-reader-role
+  namespace: default
+rules:
+  - apiGroups: [""]
+    resources: ["pods"]
+    verbs: ["get", "list", "watch"]
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: RoleBinding
+metadata:
+  name: vllm-llama3-8b-instruct-pod-reader-role-binding
+  namespace: default
+subjects:
+  - kind: ServiceAccount
+    name: vllm-llama3-8b-instruct-sa
+    namespace: default
+roleRef:
+  kind: Role
+  name: vllm-llama3-8b-instruct-pod-reader-role
+  apiGroup: rbac.authorization.k8s.io
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: Role
+metadata:
+  namespace: default
+  name: vllm-llama3-8b-instruct-deployment-reader-role
+rules:
+  - apiGroups: ["apps"]
+    resources: ["deployments"]
+    verbs: ["get", "list", "watch"]
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: RoleBinding
+metadata:
+  name: vllm-llama3-8b-instruct-deployment-reader-role-binding
+  namespace: default
+subjects:
+  - kind: ServiceAccount
+    name: vllm-llama3-8b-instruct-sa
+    namespace: default
+roleRef:
+  kind: Role
+  name: vllm-llama3-8b-instruct-deployment-reader-role
+  apiGroup: rbac.authorization.k8s.io
diff --git a/development/simulator/deployment.yaml b/development/simulator/deployment.yaml
new file mode 100644
index 000000000..595f02770
--- /dev/null
+++ b/development/simulator/deployment.yaml
@@ -0,0 +1,54 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: vllm-llama3-8b-instruct
+  namespace: default
+  labels:
+    model.aibrix.ai/name: "vllm-llama3-8b-instruct"
+    model.aibrix.ai/port: "8010"
+    adapter.model.aibrix.ai/enabled: "true"
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      model.aibrix.ai/name: "vllm-llama3-8b-instruct"
+      adapter.model.aibrix.ai/enabled: "true"
+      app: vllm-llama3-8b-instruct
+  template:
+    metadata:
+      labels:
+        model.aibrix.ai/name: "vllm-llama3-8b-instruct"
+        model.aibrix.ai/port: "8010"
+        adapter.model.aibrix.ai/enabled: "true"
+        app: vllm-llama3-8b-instruct
+    spec:
+      serviceAccountName: vllm-llama3-8b-instruct-sa
+      containers:
+        - name: llm-engine
+          image: ghcr.io/llm-d/llm-d-inference-sim:v0.5.0
+          imagePullPolicy: IfNotPresent
+          args:
+            - --model
+            - vllm-llama3-8b-instruct
+            - --port
+            - "8010"
+            - --max-loras
+            - "2"
+            - --lora-modules
+            - '{"name": "food-review-1"}'
+          env:
+            - name: POD_NAME
+              valueFrom:
+                fieldRef:
+                  fieldPath: metadata.name
+            - name: NAMESPACE
+              valueFrom:
+                fieldRef:
+                  fieldPath: metadata.namespace
+          ports:
+            - containerPort: 8010
+              name: http
+              protocol: TCP
+          resources:
+            requests:
+              cpu: 10m
diff --git a/development/simulator/kustomization.yaml b/development/simulator/kustomization.yaml
new file mode 100644
index 000000000..d913f7bdb
--- /dev/null
+++ b/development/simulator/kustomization.yaml
@@ -0,0 +1,5 @@
+kind: Kustomization
+
+resources:
+  - deployment.yaml
+  - components.yaml
\ No newline at end of file
diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go
index 1105a2926..08f6dc87c 100644
--- a/test/e2e/e2e_test.go
+++ b/test/e2e/e2e_test.go
@@ -27,6 +27,10 @@ import (
 	"github.com/stretchr/testify/assert"
 )
 
+const (
+	simModeName = "vllm-llama3-8b-instruct"
+)
+
 func TestBaseModelInference(t *testing.T) {
 	initializeClient(context.Background(), t)
 
@@ -35,12 +39,12 @@ func TestBaseModelInference(t *testing.T) {
 		Prompt: openai.CompletionNewParamsPromptUnion{
 			OfString: openai.String("Say this is a test"),
 		},
-		Model: modelName,
+		Model: simModeName,
 	})
 	if err != nil {
 		t.Fatalf("completions failed: %v", err)
 	}
-	assert.Equal(t, modelName, completion.Model)
+	assert.Equal(t, simModeName, completion.Model)
 	assert.NotEmpty(t, completion.Choices, "completion has no choices returned")
 	assert.NotEmpty(t, completion.Choices[0].Text, "chat completion has no message returned")
 	assert.Greater(t, completion.Usage.CompletionTokens, int64(0), "completion tokens are more than zero")
@@ -49,12 +53,12 @@ func TestBaseModelInference(t *testing.T) {
 		Messages: []openai.ChatCompletionMessageParamUnion{
 			openai.UserMessage("Say this is a test"),
 		},
-		Model: modelName,
+		Model: simModeName,
 	})
 	if err != nil {
 		t.Fatalf("chat completions failed: %v", err)
 	}
-	assert.Equal(t, modelName, chatCompletion.Model)
+	assert.Equal(t, simModeName, chatCompletion.Model)
 	assert.NotEmpty(t, chatCompletion.Choices, "chat completion has no choices returned")
 	assert.NotNil(t, chatCompletion.Choices[0].Message.Content, "chat completion has no message returned")
 }
@@ -82,7 +86,7 @@ func TestBaseModelInferenceFailures(t *testing.T) {
 		{
 			name:            "Invalid Routing Strategy",
 			apiKey:          apiKey,
-			modelName:       modelName,
+			modelName:       simModeName,
 			routingStrategy: "invalid-routing-strategy",
 			expectErrCode:   400,
 		},
diff --git a/test/run-e2e-tests.sh b/test/run-e2e-tests.sh
index 3d1a131b2..b5ffa0460 100755
--- a/test/run-e2e-tests.sh
+++ b/test/run-e2e-tests.sh
@@ -58,6 +58,8 @@ if [ -n "$INSTALL_AIBRIX" ]; then
   make docker-build-all
   kind load docker-image aibrix/controller-manager:nightly aibrix/gateway-plugins:nightly aibrix/metadata-service:nightly aibrix/runtime:nightly
 
+  kubectl apply -k development/simulator
+
   kubectl apply -k config/dependency --server-side
   kubectl apply -k config/test
 
@@ -108,8 +110,9 @@ function cleanup {
     # Clean up k8s resources if INSTALL_AIBRIX is set
     kubectl delete --ignore-not-found=true -k config/test
     kubectl delete --ignore-not-found=true -k config/dependency
+    kubectl delete --ignore-not-found=true -k development/simulator
     cd development/app
-    kubectl delete -k config/mock
+    kubectl delete --ignore-not-found=true -k config/mock
     cd ../..
   else
     echo "Skipping k8s cleanup as INSTALL_AIBRIX is not set"