🪪 Update SFTTrainer to handle labels correctly and add configuration example in paper index (#4051)

qgallouedec · web-flow · commit 816ac610c00d · 2025-09-09T14:49:36.000-06:00
diff --git a/docs/source/paper_index.md b/docs/source/paper_index.md
@@ -470,6 +470,19 @@ training_args = SFTConfig(
 )
 ```
 
+To closely match the paper’s setup, you can use the following configuration (see Sec. 4.1). Authors also mention that the hyperparameters are not very sensitive (Sec. 4.3):
+
+```python
+SFTConfig(
+    loss_type="dft",
+    learning_rate=5e-5,
+    max_length=2048,
+    # Target batch size 256; achieved via per-device batch 8 * grad accumulation 32
+    per_device_train_batch_size=8,
+    gradient_accumulation_steps=32,
+)
+```
+
 ## Reinforce Leave-One-Out
 
 Papers relating to the [`RLOOTrainer`]
diff --git a/trl/trainer/sft_trainer.py b/trl/trainer/sft_trainer.py
@@ -1093,6 +1093,11 @@ def compute_loss(self, model, inputs, return_outputs=False, num_items_in_batch=N
         Compute training loss and additionally compute token accuracies
         """
         mode = "train" if self.model.training else "eval"
+
+        # Set aside labels as it will be dropped by super().compute_loss() if a custom `compute_loss_func` is used.
+        # This can be removed when this issue is fixed.
+        labels = inputs["labels"]
+
         # If not set, defaults from model config and may warn since cache isn't compatible with gradient checkpointing
         inputs["use_cache"] = False
         (loss, outputs) = super().compute_loss(
@@ -1137,7 +1142,7 @@ def compute_loss(self, model, inputs, return_outputs=False, num_items_in_batch=N
         self._metrics[mode]["num_tokens"] = [self._total_train_tokens]
 
         # Compute token accuracy if we have labels and if the model is not using Liger (no logits)
-        if "labels" in inputs and not self.args.use_liger_kernel:
+        if not self.args.use_liger_kernel:
             with torch.no_grad():
                 if "shift_labels" in inputs:
                     # When using CP, labels are pre-shifted. We must use these (and cannot manually shift) because:
@@ -1147,7 +1152,7 @@ def compute_loss(self, model, inputs, return_outputs=False, num_items_in_batch=N
                     shift_labels = inputs["shift_labels"]
                 else:
                     shift_logits = outputs.logits[..., :-1, :].contiguous()
-                    shift_labels = inputs["labels"][..., 1:].contiguous()
+                    shift_labels = labels[..., 1:].contiguous()
 
                 # When using Prompt Tuning, skip the virtual tokens in logits before accuracy computation, since they do
                 # not correspond to actual input labels.