From 62366f6fb21800c5cdb00c061fcee141aa6ea6b1 Mon Sep 17 00:00:00 2001 From: zhengkezhou1 Date: Wed, 24 Sep 2025 00:53:14 +0800 Subject: [PATCH] [misc] introduce llm-d vLLM simulator for E2E tests Signed-off-by: zhengkezhou1 --- development/simulator/components.yaml | 76 ++++++++++++++++++++++++ development/simulator/deployment.yaml | 54 +++++++++++++++++ development/simulator/kustomization.yaml | 5 ++ test/e2e/e2e_test.go | 14 +++-- test/run-e2e-tests.sh | 5 +- 5 files changed, 148 insertions(+), 6 deletions(-) create mode 100644 development/simulator/components.yaml create mode 100644 development/simulator/deployment.yaml create mode 100644 development/simulator/kustomization.yaml diff --git a/development/simulator/components.yaml b/development/simulator/components.yaml new file mode 100644 index 000000000..5a6e6e78f --- /dev/null +++ b/development/simulator/components.yaml @@ -0,0 +1,76 @@ +# Debug only: Make sure pod can be visited from controller that deployed in mac. +apiVersion: v1 +kind: Service +metadata: + name: vllm-llama3-8b-instruct + namespace: default + labels: + prometheus-discovery: "true" + annotations: + prometheus.io/scrape: "true" + prometheus.io/path: "/metrics" + prometheus.io/port: "8000" +spec: + selector: + model.aibrix.ai/name: "vllm-llama3-8b-instruct" + ports: + - protocol: TCP + name: inference + port: 8010 + targetPort: 8010 + nodePort: 30082 + type: NodePort +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: vllm-llama3-8b-instruct-sa + namespace: default +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: vllm-llama3-8b-instruct-reader-role + namespace: default +rules: + - apiGroups: [""] + resources: ["pods"] + verbs: ["get", "list", "watch"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: vllm-llama3-8b-instruct-pod-reader-role-binding + namespace: default +subjects: + - kind: ServiceAccount + name: vllm-llama3-8b-instruct-sa + namespace: default +roleRef: + kind: Role + name: vllm-llama3-8b-instruct-pod-reader-role + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + namespace: default + name: vllm-llama3-8b-instruct-deployment-reader-role +rules: + - apiGroups: ["apps"] + resources: ["deployments"] + verbs: ["get", "list", "watch"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: vllm-llama3-8b-instruct-deployment-reader-role-binding + namespace: default +subjects: + - kind: ServiceAccount + name: vllm-llama3-8b-instruct-sa + namespace: default +roleRef: + kind: Role + name: vllm-llama3-8b-instruct-deployment-reader-role + apiGroup: rbac.authorization.k8s.io diff --git a/development/simulator/deployment.yaml b/development/simulator/deployment.yaml new file mode 100644 index 000000000..595f02770 --- /dev/null +++ b/development/simulator/deployment.yaml @@ -0,0 +1,54 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vllm-llama3-8b-instruct + namespace: default + labels: + model.aibrix.ai/name: "vllm-llama3-8b-instruct" + model.aibrix.ai/port: "8010" + adapter.model.aibrix.ai/enabled: "true" +spec: + replicas: 1 + selector: + matchLabels: + model.aibrix.ai/name: "vllm-llama3-8b-instruct" + adapter.model.aibrix.ai/enabled: "true" + app: vllm-llama3-8b-instruct + template: + metadata: + labels: + model.aibrix.ai/name: "vllm-llama3-8b-instruct" + model.aibrix.ai/port: "8010" + adapter.model.aibrix.ai/enabled: "true" + app: vllm-llama3-8b-instruct + spec: + serviceAccountName: vllm-llama3-8b-instruct-sa + containers: + - name: llm-engine + image: ghcr.io/llm-d/llm-d-inference-sim:v0.5.0 + imagePullPolicy: IfNotPresent + args: + - --model + - vllm-llama3-8b-instruct + - --port + - "8010" + - --max-loras + - "2" + - --lora-modules + - '{"name": "food-review-1"}' + env: + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + ports: + - containerPort: 8010 + name: http + protocol: TCP + resources: + requests: + cpu: 10m diff --git a/development/simulator/kustomization.yaml b/development/simulator/kustomization.yaml new file mode 100644 index 000000000..d913f7bdb --- /dev/null +++ b/development/simulator/kustomization.yaml @@ -0,0 +1,5 @@ +kind: Kustomization + +resources: + - deployment.yaml + - components.yaml \ No newline at end of file diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index 1105a2926..08f6dc87c 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -27,6 +27,10 @@ import ( "github.com/stretchr/testify/assert" ) +const ( + simModeName = "vllm-llama3-8b-instruct" +) + func TestBaseModelInference(t *testing.T) { initializeClient(context.Background(), t) @@ -35,12 +39,12 @@ func TestBaseModelInference(t *testing.T) { Prompt: openai.CompletionNewParamsPromptUnion{ OfString: openai.String("Say this is a test"), }, - Model: modelName, + Model: simModeName, }) if err != nil { t.Fatalf("completions failed: %v", err) } - assert.Equal(t, modelName, completion.Model) + assert.Equal(t, simModeName, completion.Model) assert.NotEmpty(t, completion.Choices, "completion has no choices returned") assert.NotEmpty(t, completion.Choices[0].Text, "chat completion has no message returned") assert.Greater(t, completion.Usage.CompletionTokens, int64(0), "completion tokens are more than zero") @@ -49,12 +53,12 @@ func TestBaseModelInference(t *testing.T) { Messages: []openai.ChatCompletionMessageParamUnion{ openai.UserMessage("Say this is a test"), }, - Model: modelName, + Model: simModeName, }) if err != nil { t.Fatalf("chat completions failed: %v", err) } - assert.Equal(t, modelName, chatCompletion.Model) + assert.Equal(t, simModeName, chatCompletion.Model) assert.NotEmpty(t, chatCompletion.Choices, "chat completion has no choices returned") assert.NotNil(t, chatCompletion.Choices[0].Message.Content, "chat completion has no message returned") } @@ -82,7 +86,7 @@ func TestBaseModelInferenceFailures(t *testing.T) { { name: "Invalid Routing Strategy", apiKey: apiKey, - modelName: modelName, + modelName: simModeName, routingStrategy: "invalid-routing-strategy", expectErrCode: 400, }, diff --git a/test/run-e2e-tests.sh b/test/run-e2e-tests.sh index 3d1a131b2..b5ffa0460 100755 --- a/test/run-e2e-tests.sh +++ b/test/run-e2e-tests.sh @@ -58,6 +58,8 @@ if [ -n "$INSTALL_AIBRIX" ]; then make docker-build-all kind load docker-image aibrix/controller-manager:nightly aibrix/gateway-plugins:nightly aibrix/metadata-service:nightly aibrix/runtime:nightly + kubectl apply -k development/simulator + kubectl apply -k config/dependency --server-side kubectl apply -k config/test @@ -108,8 +110,9 @@ function cleanup { # Clean up k8s resources if INSTALL_AIBRIX is set kubectl delete --ignore-not-found=true -k config/test kubectl delete --ignore-not-found=true -k config/dependency + kubectl delete --ignore-not-found=true -k development/simulator cd development/app - kubectl delete -k config/mock + kubectl delete --ignore-not-found=true -k config/mock cd ../.. else echo "Skipping k8s cleanup as INSTALL_AIBRIX is not set"