Skip to content

Commit c5efdb7

Browse files
CatherineSuech-tiger1
authored andcommitted
[router][grpc] Fix proto3 default value mismatches and cleanup unused fields (sgl-project#11283)
1 parent 83ed57b commit c5efdb7

File tree

6 files changed

+153
-117
lines changed

6 files changed

+153
-117
lines changed

python/sglang/srt/entrypoints/grpc_server.py

Lines changed: 39 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from typing import AsyncIterator, Dict, Optional, Tuple
1515

1616
import grpc
17+
from google.protobuf.json_format import MessageToDict
1718
from grpc_reflection.v1alpha import reflection
1819

1920
from sglang.srt.disaggregation.utils import FAKE_BOOTSTRAP_HOST, DisaggregationMode
@@ -483,28 +484,52 @@ def _convert_sampling_params(
483484
elif grpc_params.HasField("structural_tag"):
484485
structural_tag = grpc_params.structural_tag
485486

487+
# Handle optional parameters conversion
488+
custom_params = (
489+
MessageToDict(grpc_params.custom_params)
490+
if grpc_params.HasField("custom_params")
491+
else None
492+
)
493+
max_new_tokens = (
494+
grpc_params.max_new_tokens
495+
if grpc_params.HasField("max_new_tokens")
496+
else None
497+
)
498+
stream_interval = (
499+
grpc_params.stream_interval
500+
if grpc_params.HasField("stream_interval")
501+
else None
502+
)
503+
logit_bias = dict(grpc_params.logit_bias) if grpc_params.logit_bias else None
504+
stop = list(grpc_params.stop) if grpc_params.stop else None
505+
stop_token_ids = (
506+
list(grpc_params.stop_token_ids) if grpc_params.stop_token_ids else None
507+
)
508+
486509
return SGLSamplingParams(
487-
temperature=grpc_params.temperature or 1.0,
488-
top_p=grpc_params.top_p or 1.0,
489-
top_k=grpc_params.top_k or -1,
490-
min_p=grpc_params.min_p or 0.0,
491-
frequency_penalty=grpc_params.frequency_penalty or 0.0,
492-
presence_penalty=grpc_params.presence_penalty or 0.0,
493-
repetition_penalty=grpc_params.repetition_penalty or 1.0,
494-
max_new_tokens=grpc_params.max_new_tokens or 128,
495-
min_new_tokens=grpc_params.min_new_tokens or 0,
496-
stop=list(grpc_params.stop) if grpc_params.stop else [],
497-
stop_token_ids=(
498-
list(grpc_params.stop_token_ids) if grpc_params.stop_token_ids else []
499-
),
510+
temperature=grpc_params.temperature,
511+
top_p=grpc_params.top_p,
512+
top_k=grpc_params.top_k,
513+
min_p=grpc_params.min_p,
514+
frequency_penalty=grpc_params.frequency_penalty,
515+
presence_penalty=grpc_params.presence_penalty,
516+
repetition_penalty=grpc_params.repetition_penalty,
517+
max_new_tokens=max_new_tokens,
518+
min_new_tokens=grpc_params.min_new_tokens,
519+
stop=stop,
520+
stop_token_ids=stop_token_ids,
500521
skip_special_tokens=grpc_params.skip_special_tokens,
501522
spaces_between_special_tokens=grpc_params.spaces_between_special_tokens,
523+
no_stop_trim=grpc_params.no_stop_trim,
502524
regex=regex,
503525
json_schema=json_schema,
504526
ebnf=ebnf_grammar,
505527
structural_tag=structural_tag,
506-
n=grpc_params.n or 1,
528+
n=grpc_params.n,
507529
ignore_eos=grpc_params.ignore_eos,
530+
stream_interval=stream_interval,
531+
logit_bias=logit_bias,
532+
custom_params=custom_params,
508533
)
509534

510535
def _convert_output_logprobs_to_proto(

python/sglang/srt/grpc/sglang_scheduler.proto

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,11 @@ service SglangScheduler {
2727
// =====================
2828

2929
// Sampling parameters matching SGLang's SamplingParams
30+
//
31+
// IMPORTANT: Do not use SamplingParams::default() directly!
32+
// The proto3 defaults (0 for numeric fields) do NOT match the semantic defaults
33+
// (temperature=1.0, top_p=1.0, top_k=-1, etc.). Always construct with explicit values
34+
// or use the conversion functions in sglang_scheduler.rs / grpc_server.py.
3035
message SamplingParams {
3136
float temperature = 1;
3237
float top_p = 2;
@@ -50,24 +55,18 @@ message SamplingParams {
5055
string structural_tag = 16;
5156
}
5257

53-
// LoRA adapter
54-
string lora_path = 17;
55-
5658
// Speculative decoding
57-
int32 n = 18; // Number of samples
58-
59-
// Token healing
60-
bool token_healing = 19;
59+
int32 n = 17; // Number of samples
6160

6261
// Additional parameters
63-
int32 min_new_tokens = 20;
64-
bool ignore_eos = 21;
65-
bool no_stop_trim = 22;
66-
int32 stream_interval = 23;
67-
map<string, float> logit_bias = 24;
62+
int32 min_new_tokens = 18;
63+
bool ignore_eos = 19;
64+
bool no_stop_trim = 20;
65+
optional int32 stream_interval = 21;
66+
map<string, float> logit_bias = 22;
6867

6968
// Custom parameters for extensibility
70-
google.protobuf.Struct custom_params = 25;
69+
google.protobuf.Struct custom_params = 23;
7170
}
7271

7372

0 commit comments

Comments
 (0)