We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 435fa95 commit 6e4cea1Copy full SHA for 6e4cea1
vllm/entrypoints/utils.py
@@ -26,6 +26,11 @@ async def listen_for_disconnect(request: Request) -> None:
26
while True:
27
message = await request.receive()
28
if message["type"] == "http.disconnect":
29
+ if request.app.state.enable_server_load_tracking:
30
+ # on timeout/cancellation the BackgroundTask in load_aware_call
31
+ # cannot decrement the server load metrics.
32
+ # Must be decremented by with_cancellation instead.
33
+ request.app.state.server_load_metrics -= 1
34
break
35
36
0 commit comments