Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 25 additions & 12 deletions python/sglang/srt/entrypoints/grpc_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from typing import AsyncIterator, Dict, Optional, Tuple

import grpc
from google.protobuf.json_format import MessageToDict
from grpc_reflection.v1alpha import reflection

from sglang.srt.disaggregation.utils import FAKE_BOOTSTRAP_HOST, DisaggregationMode
Expand Down Expand Up @@ -483,28 +484,40 @@ def _convert_sampling_params(
elif grpc_params.HasField("structural_tag"):
structural_tag = grpc_params.structural_tag

# Convert logit_bias from proto map to dict
logit_bias = dict(grpc_params.logit_bias) if grpc_params.logit_bias else None

# Convert custom_params from proto Struct to dict
custom_params = None
if grpc_params.HasField("custom_params"):
custom_params = MessageToDict(grpc_params.custom_params)

return SGLSamplingParams(
temperature=grpc_params.temperature or 1.0,
top_p=grpc_params.top_p or 1.0,
top_k=grpc_params.top_k or -1,
min_p=grpc_params.min_p or 0.0,
frequency_penalty=grpc_params.frequency_penalty or 0.0,
presence_penalty=grpc_params.presence_penalty or 0.0,
repetition_penalty=grpc_params.repetition_penalty or 1.0,
max_new_tokens=grpc_params.max_new_tokens or 128,
min_new_tokens=grpc_params.min_new_tokens or 0,
stop=list(grpc_params.stop) if grpc_params.stop else [],
temperature=grpc_params.temperature,
top_p=grpc_params.top_p,
top_k=grpc_params.top_k,
min_p=grpc_params.min_p,
frequency_penalty=grpc_params.frequency_penalty,
presence_penalty=grpc_params.presence_penalty,
repetition_penalty=grpc_params.repetition_penalty,
max_new_tokens=grpc_params.max_new_tokens,
min_new_tokens=grpc_params.min_new_tokens,
stop=list(grpc_params.stop) if grpc_params.stop else None,
stop_token_ids=(
list(grpc_params.stop_token_ids) if grpc_params.stop_token_ids else []
list(grpc_params.stop_token_ids) if grpc_params.stop_token_ids else None
),
skip_special_tokens=grpc_params.skip_special_tokens,
spaces_between_special_tokens=grpc_params.spaces_between_special_tokens,
no_stop_trim=grpc_params.no_stop_trim,
regex=regex,
json_schema=json_schema,
ebnf=ebnf_grammar,
structural_tag=structural_tag,
n=grpc_params.n or 1,
n=grpc_params.n,
ignore_eos=grpc_params.ignore_eos,
stream_interval=grpc_params.stream_interval,
logit_bias=logit_bias,
custom_params=custom_params,
)

def _convert_output_logprobs_to_proto(
Expand Down
25 changes: 12 additions & 13 deletions python/sglang/srt/grpc/sglang_scheduler.proto
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,11 @@ service SglangScheduler {
// =====================

// Sampling parameters matching SGLang's SamplingParams
//
// IMPORTANT: Do not use SamplingParams::default() directly!
// The proto3 defaults (0 for numeric fields) do NOT match the semantic defaults
// (temperature=1.0, top_p=1.0, top_k=-1, etc.). Always construct with explicit values
// or use the conversion functions in sglang_scheduler.rs / grpc_server.py.
message SamplingParams {
float temperature = 1;
float top_p = 2;
Expand All @@ -50,24 +55,18 @@ message SamplingParams {
string structural_tag = 16;
}

// LoRA adapter
string lora_path = 17;

// Speculative decoding
int32 n = 18; // Number of samples

// Token healing
bool token_healing = 19;
int32 n = 17; // Number of samples

// Additional parameters
int32 min_new_tokens = 20;
bool ignore_eos = 21;
bool no_stop_trim = 22;
int32 stream_interval = 23;
map<string, float> logit_bias = 24;
int32 min_new_tokens = 18;
bool ignore_eos = 19;
bool no_stop_trim = 20;
optional int32 stream_interval = 21;
map<string, float> logit_bias = 22;

// Custom parameters for extensibility
google.protobuf.Struct custom_params = 25;
google.protobuf.Struct custom_params = 23;
}


Expand Down
Loading
Loading