You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: trl/trainer/grpo_config.py
+10Lines changed: 10 additions & 0 deletions
Original file line number
Diff line number
Diff line change
@@ -136,6 +136,8 @@ class GRPOConfig(TrainingArguments):
136
136
Number of iterations per batch (denoted as μ in the algorithm).
137
137
epsilon (`float`, *optional*, defaults to `0.2`):
138
138
Epsilon value for clipping.
139
+
delta: (`float`, *optional*, defaults to `None`):
140
+
Delta value for the upper clipping bound in two-sided GRPO. Recommended to be > 1 + epsilon. This method was introduced in the [INTELLECT-2 tech report](https://huggingface.co/papers/2505.07291).
139
141
epsilon_high (`float` or `None`, *optional*, defaults to `None`):
140
142
Upper-bound epsilon value for clipping. If not specified, it defaults to the same value as the lower-bound
141
143
specified in argument `epsilon`. Paper [DAPO](https://huggingface.co/papers/2503.14476) recommends `0.28`.
@@ -389,6 +391,12 @@ class GRPOConfig(TrainingArguments):
389
391
default=0.2,
390
392
metadata={"help": "Epsilon value for clipping."},
391
393
)
394
+
delta: Optional[float] =field(
395
+
default=None,
396
+
metadata={
397
+
"help": "If set to a float value (e.g., 2.0), enables the upper clipping bound in two-sided GRPO loss. If None (default), the standard GRPO clipping is used. Recommended to be > 1 + epsilon when enabled."
398
+
},
399
+
)
392
400
epsilon_high: Optional[float] =field(
393
401
default=None,
394
402
metadata={
@@ -536,3 +544,5 @@ def __post_init__(self):
536
544
"current global eval batch size, the valid values for the number of generations are: "
537
545
f"{possible_values}."
538
546
)
547
+
ifself.deltaisnotNoneandself.use_liger_loss:
548
+
raiseValueError("Liger loss does not support two-sided GRPO loss yet.")
0 commit comments