envoyproxy · hustxiayang · Nov 6, 2025 · Nov 5, 2025 · Nov 5, 2025 · Nov 6, 2025
@@ -194,8 +194,41 @@ func (o *openAIToGCPVertexAITranslatorV1ChatCompletion) handleStreamingResponse(
 		// Convert GCP chunk to OpenAI chunk.
 		openAIChunk := o.convertGCPChunkToOpenAI(chunk)
 
-		// Extract token usage if present in this chunk (typically in the last chunk).
-		if chunk.UsageMetadata != nil {
+		// Serialize to SSE format as expected by OpenAI API.
+		err := serializeOpenAIChatCompletionChunk(*openAIChunk, &newBody)
+		if err != nil {
+			return nil, nil, metrics.TokenUsage{}, "", fmt.Errorf("error marshaling OpenAI chunk: %w", err)
+		}
+
+		if span != nil {
+			span.RecordResponseChunk(openAIChunk)
+		}
+
+		// Extract token usage only in the last chunk.
+		if chunk.UsageMetadata != nil && chunk.UsageMetadata.PromptTokenCount > 0 {
+			// Convert usage to pointer if available.
+			usage := ptr.To(geminiUsageToOpenAIUsage(chunk.UsageMetadata))
+
+			usageChunk := openai.ChatCompletionResponseChunk{
+				ID:      chunk.ResponseID,
+				Created: openai.JSONUNIXTime(chunk.CreateTime),
+				Object:  "chat.completion.chunk",
+				Choices: []openai.ChatCompletionResponseChunkChoice{},
+				// usage is nil for all chunks other than the last chunk
+				Usage: usage,
+				Model: o.requestModel,
+			}
+
+			// Serialize to SSE format as expected by OpenAI API.
+			err := serializeOpenAIChatCompletionChunk(usageChunk, &newBody)
+			if err != nil {
+				return nil, nil, metrics.TokenUsage{}, "", fmt.Errorf("error marshaling OpenAI chunk: %w", err)
+			}
+
+			if span != nil {
+				span.RecordResponseChunk(openAIChunk)
+			}
+
 			if chunk.UsageMetadata.PromptTokenCount >= 0 {
 				tokenUsage.SetInputTokens(uint32(chunk.UsageMetadata.PromptTokenCount)) //nolint:gosec
 			}
@@ -209,16 +242,6 @@ func (o *openAIToGCPVertexAITranslatorV1ChatCompletion) handleStreamingResponse(
 				tokenUsage.SetCachedInputTokens(uint32(chunk.UsageMetadata.CachedContentTokenCount)) //nolint:gosec
 			}
 		}
-
-		// Serialize to SSE format as expected by OpenAI API.
-		err := serializeOpenAIChatCompletionChunk(*openAIChunk, &newBody)
-		if err != nil {
-			return nil, nil, metrics.TokenUsage{}, "", fmt.Errorf("error marshaling OpenAI chunk: %w", err)
-		}
-
-		if span != nil {
-			span.RecordResponseChunk(openAIChunk)
-		}
 	}
 
 	if endOfStream {
@@ -381,19 +404,14 @@ func (o *openAIToGCPVertexAITranslatorV1ChatCompletion) convertGCPChunkToOpenAI(
 		choices = []openai.ChatCompletionResponseChunkChoice{}
 	}
 
-	// Convert usage to pointer if available.
-	var usage *openai.Usage
-	if chunk.UsageMetadata != nil {
-		usage = ptr.To(geminiUsageToOpenAIUsage(chunk.UsageMetadata))
-	}
-
 	return &openai.ChatCompletionResponseChunk{
 		ID:      chunk.ResponseID,
 		Created: openai.JSONUNIXTime(chunk.CreateTime),
 		Object:  "chat.completion.chunk",
 		Choices: choices,
-		Usage:   usage,
-		Model:   o.requestModel,
+		// usage is nil for all chunks other than the last chunk
+		Usage: nil,
+		Model: o.requestModel,
 	}
 }
 

@@ -969,7 +969,9 @@ func TestOpenAIToGCPVertexAITranslatorV1ChatCompletion_ResponseBody(t *testing.T
 			endOfStream:   true,
 			wantError:     false,
 			wantHeaderMut: nil,
-			wantBodyMut: []byte(`data: {"choices":[{"index":0,"delta":{"content":"Hello","role":"assistant"}}],"object":"chat.completion.chunk","usage":{"prompt_tokens":5,"completion_tokens":3,"total_tokens":8,"completion_tokens_details":{},"prompt_tokens_details":{}}}
+			wantBodyMut: []byte(`data: {"choices":[{"index":0,"delta":{"content":"Hello","role":"assistant"}}],"object":"chat.completion.chunk"}
+
+data: {"object":"chat.completion.chunk","usage":{"prompt_tokens":5,"completion_tokens":3,"total_tokens":8,"completion_tokens_details":{},"prompt_tokens_details":{}}}
 
 data: [DONE]
 `),
@@ -1178,7 +1180,9 @@ data: {"candidates":[{"content":{"parts":[{"text":"Hello"}]}}],"usageMetadata":{
 			wantHeaderMut: nil,
 			wantBodyMut: []byte(`data: {"choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":{"text":"let me think step by step and reply you."}}}],"object":"chat.completion.chunk"}
 
-data: {"choices":[{"index":0,"delta":{"content":"Hello","role":"assistant"}}],"object":"chat.completion.chunk","usage":{"prompt_tokens":5,"completion_tokens":3,"total_tokens":8,"completion_tokens_details":{},"prompt_tokens_details":{}}}
+data: {"choices":[{"index":0,"delta":{"content":"Hello","role":"assistant"}}],"object":"chat.completion.chunk"}
+
+data: {"object":"chat.completion.chunk","usage":{"prompt_tokens":5,"completion_tokens":3,"total_tokens":8,"completion_tokens_details":{},"prompt_tokens_details":{}}}
 
 data: [DONE]
 `),

@@ -579,7 +579,9 @@ data: {"id":"msg_123","choices":[{"index":0,"delta":{"content":" you","role":"as
 
 data: {"id":"msg_123","choices":[{"index":0,"delta":{"content":" today","role":"assistant"}}],"created":123,"model":"gemini-1.5-pro","object":"chat.completion.chunk"}
 
-data: {"id":"msg_123","choices":[{"index":0,"delta":{"content":"?","role":"assistant"},"finish_reason":"stop"}],"created":123,"model":"gemini-1.5-pro","object":"chat.completion.chunk","usage":{"prompt_tokens":10,"completion_tokens":7,"total_tokens":17,"completion_tokens_details":{},"prompt_tokens_details":{}}}
+data: {"id":"msg_123","choices":[{"index":0,"delta":{"content":"?","role":"assistant"},"finish_reason":"stop"}],"created":123,"model":"gemini-1.5-pro","object":"chat.completion.chunk"}
+
+data: {"id":"msg_123","created":123,"model":"gemini-1.5-pro","object":"chat.completion.chunk","usage":{"prompt_tokens":10,"completion_tokens":7,"total_tokens":17,"completion_tokens_details":{},"prompt_tokens_details":{}}}
 
 data: [DONE]
 `,