Align docstring parameters with function definitions (#4017)

albertvillanova · web-flow · commit c9484b161fb0 · 2025-09-07T10:40:09.000+02:00
diff --git a/examples/notebooks/gpt2-sentiment.ipynb b/examples/notebooks/gpt2-sentiment.ipynb
@@ -147,8 +147,14 @@
     "    customize this function to train the model on its own dataset.\n",
     "\n",
     "    Args:\n",
+    "        config (`PPOConfig`):\n",
+    "            The configuration of the PPO training.\n",
     "        dataset_name (`str`):\n",
     "            The name of the dataset to be loaded.\n",
+    "        input_min_text_length (`int`, defaults to 5):\n",
+    "            The minimum length of the input text.\n",
+    "        input_max_text_length (`int`, defaults to 10):\n",
+    "            The maximum length of the input text.\n",
     "\n",
     "    Returns:\n",
     "        dataloader (`torch.utils.data.DataLoader`):\n",
diff --git a/examples/research_projects/stack_llama/scripts/rl_training.py b/examples/research_projects/stack_llama/scripts/rl_training.py
@@ -126,6 +126,8 @@ def build_dataset(
     customize this function to train the model on its own dataset.
 
     Args:
+        tokenizer (`transformers.PreTrainedTokenizer`):
+            The tokenizer used for the model.
         dataset_name (`str`):
             The name of the dataset to be loaded.
 
diff --git a/examples/research_projects/toxicity/scripts/gpt-j-6b-toxicity.py b/examples/research_projects/toxicity/scripts/gpt-j-6b-toxicity.py
@@ -104,8 +104,14 @@ def build_dataset(
     customize this function to train the model on its own dataset.
 
     Args:
+        config (`PPOConfig`):
+            The configuration of the PPO training.
         dataset_name (`str`):
             The name of the dataset to be loaded.
+        input_min_text_length (`int`, defaults to 5):
+            The minimum length of the input text.
+        input_max_text_length (`int`, defaults to 10):
+            The maximum length of the input text.
 
     Returns:
         dataloader (`torch.utils.data.DataLoader`):
diff --git a/trl/data_utils.py b/trl/data_utils.py
@@ -691,7 +691,7 @@ def truncate_dataset(
     Args:
         dataset (`Dataset` or `DatasetDict`):
             Dataset to truncate.
-        seq_length (`int`):
+        max_length (`int`):
             Maximum sequence length to truncate to.
         map_kwargs (`dict` or `None`, *optional*, defaults to `None`):
             Additional keyword arguments to pass to the dataset's map method when truncating examples.
diff --git a/trl/extras/dataset_formatting.py b/trl/extras/dataset_formatting.py
@@ -94,6 +94,8 @@ def get_formatting_func_from_dataset(
     Args:
         dataset (Dataset): User dataset
         tokenizer (AutoTokenizer): Tokenizer used for formatting
+        tools (list, *optional*): List of tools (callable functions) that will be accessible to the model.
+            If the template does not support function calling, this argument will have no effect.
 
     Returns:
         Callable: Formatting function if the dataset format is supported else None
diff --git a/trl/models/modeling_sd_base.py b/trl/models/modeling_sd_base.py
@@ -205,6 +205,7 @@ def scheduler_step(
     from the learned model outputs (most often the predicted noise).
 
     Args:
+        self: scheduler.
         model_output (`torch.FloatTensor`): direct output from learned diffusion model.
         timestep (`int`): current discrete timestep in the diffusion chain.
         sample (`torch.FloatTensor`):
@@ -215,9 +216,7 @@ def scheduler_step(
             `self.config.clip_sample` is `True`. If no clipping has happened, "corrected" `model_output` would coincide
             with the one provided as input and `use_clipped_model_output` will have not effect.
         generator: random number generator.
-        variance_noise (`torch.FloatTensor`): instead of generating noise for the variance using `generator`, we
-            can directly provide the noise for the variance itself. This is useful for methods such as CycleDiffusion.
-            (https://huggingface.co/papers/2210.05559)
+        prev_sample (`torch.FloatTensor`, *optional*): if not `None`, the previous sample to be used
 
     Returns:
         `DDPOSchedulerOutput`: the predicted sample at the previous timestep and the log probability of the sample
@@ -564,6 +563,7 @@ def pipeline_step_with_grad(
     Function to get RGB image with gradients attached to the model weights.
 
     Args:
+        pipeline (`StableDiffusionPipeline`): Pipeline to be used for image generation.
         prompt (`str` or `list[str]`, *optional*, defaults to `None`):
             The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`
             instead.
diff --git a/trl/models/utils.py b/trl/models/utils.py
@@ -14,6 +14,7 @@
 
 import itertools
 import warnings
+from collections.abc import Callable
 from contextlib import contextmanager
 from copy import deepcopy
 from dataclasses import dataclass
@@ -431,18 +432,18 @@ class _ForwardRedirection:
     """
 
     def __call__(
-        self, wrapper_module: nn.Module, original_module: nn.Module, method: callable, *args: Any, **kwargs: Any
+        self, wrapper_module: nn.Module, original_module: nn.Module, method: Callable, *args: Any, **kwargs: Any
     ):
         """Reroutes a method call through the `wrapper_module`'s `forward` method.
 
         Args:
             wrapper_module: The module that has `original_module` wrapped.
             original_module: The module that was wrapped inside `wrapper_module`.
-            method_name: The name of the method that should be called on the `original_module` after inputs get
+            method: The method that should be called on the `original_module` after inputs get
                 redirected through the `wrapper_module`'s `forward` method.
-            *args: The positional arguments to the method `method_name`. They will get passed to a patched
+            *args: The positional arguments to the `method`. They will get passed to a patched
                 `forward` method instead.
-            **kwargs: The keyword arguments to the method `method_name`. They will get passed to a patched
+            **kwargs: The keyword arguments to the `method`. They will get passed to a patched
                 `forward` method instead.
 
         """
diff --git a/trl/trainer/alignprop_trainer.py b/trl/trainer/alignprop_trainer.py
@@ -329,6 +329,7 @@ def _generate_samples(self, batch_size, with_grad=True, prompts=None):
         Args:
             batch_size (int): Batch size to use for sampling
             with_grad (bool): Whether the generated RGBs should have gradients attached to it.
+            prompts (list[str], *optional*): If provided, use these prompts instead of generating new ones.
 
         Returns:
             prompt_image_pairs (dict[Any])
diff --git a/trl/trainer/bco_trainer.py b/trl/trainer/bco_trainer.py
@@ -1035,6 +1035,12 @@ def get_batch_logps(
             average_log_prob:
                 If True, return the average log probability per (non-masked) token. Otherwise, return the sum of the
                 log probabilities of the (non-masked) tokens.
+            label_pad_token_id:
+                The label value to ignore when computing log probabilities.
+            is_encoder_decoder:
+                Whether the model is an encoder-decoder model. If True, the labels are not shifted, and the logits are
+                assumed to already be aligned with the labels. If False, the labels are shifted to the right by one
+                position, and the logits are assumed to be aligned with the shifted labels.
 
         Returns:
             A tensor of shape (batch_size,) containing the average/sum log probabilities of the given labels under the
@@ -1144,6 +1150,7 @@ def bco_loss(
                 batch_size,)
             chosen_embeddings: embeddings of desirable prompts
             rejected_embeddings: embeddings of undesirable prompts
+            do_train: whether to update the running delta value. Default is True.
 
         Returns:
             A tuple of four tensors: (losses, chosen_rewards, rejected_rewards, delta). The losses tensor contains the
diff --git a/trl/trainer/dpo_trainer.py b/trl/trainer/dpo_trainer.py
@@ -1015,6 +1015,29 @@ def dpo_loss(
                 Log probabilities of the reference model for the chosen responses. Shape: `(batch_size,)`.
             ref_rejected_logps (`torch.FloatTensor`):
                 Log probabilities of the reference model for the rejected responses. Shape: `(batch_size,)`.
+            loss_type (`str`, defaults to `"sigmoid"`):
+                The type of loss to compute. One of:
+                - `"sigmoid"`: Sigmoid loss from the original [DPO](https://huggingface.co/papers/2305.18290) paper.
+                - `"hinge"`: Hinge loss on the normalized likelihood from the
+                  [SLiC](https://huggingface.co/papers/2305.10425) paper.
+                - `"ipo"`: IPO loss from the [IPO](https://huggingface.co/papers/2310.12036) paper.
+                - `"exo_pair"`: Pairwise EXO loss from the [EXO](https://huggingface.co/papers/2402.00856) paper.
+                - `"nca_pair"`: Pairwise NCA loss from the [NCA](https://huggingface.co/papers/2402.05369) paper.
+                - `"robust"`: Unbiased estimate of the DPO loss that is robust to preference noise from the [Robust
+                  DPO](https://huggingface.co/papers/2403.00409) paper.
+                - `"bco_pair"`: Pairwise BCO loss from the [BCO](https://huggingface.co/papers/2404.04656) paper.
+                - `"sppo_hard"`: SPPO loss with hard label from the [SPPO](https://huggingface.co/papers/2405.00675)
+                  paper.
+                - `"aot"`: AOT loss for paired datasets from the [AOT](https://huggingface.co/papers/2406.05882) paper.
+                - `"aot_pair"`: AOT loss for unpaired datasets from the [AOT](https://huggingface.co/papers/2406.05882)
+                  paper.
+                - `"discopop"`: DiscoPOP (a.k.a Log-Ratio Modulated Loss, LRML) loss from the
+                  [DiscoPOP](https://huggingface.co/papers/2406.08414) paper.
+                - `"apo_zero"`: APO-zero loss from the [APO](https://huggingface.co/papers/2408.06266) paper.
+                - `"apo_down"`: APO-down loss from the [APO](https://huggingface.co/papers/2408.06266) paper.
+                - `"sft"`: Negative log-likelihood loss (standard supervised fine-tuning loss).
+            model_output (`dict[str, torch.FloatTensor]`, *optional*):
+                The output of the model's forward pass. This is used to compute auxiliary losses if enabled.
 
         Returns:
             A tuple of three tensors: `(losses, chosen_rewards, rejected_rewards)`. The losses tensor contains the DPO
diff --git a/trl/trainer/kto_trainer.py b/trl/trainer/kto_trainer.py
@@ -1013,6 +1013,12 @@ def get_batch_logps(
             average_log_prob:
                 If True, return the average log probability per (non-masked) token. Otherwise, return the sum of the
                 log probabilities of the (non-masked) tokens.
+            label_pad_token_id:
+                The label value to ignore when computing log probabilities.
+            is_encoder_decoder:
+                Whether the model is an encoder-decoder model. If True, the labels are not shifted and the logits are
+                assumed to already be aligned with the labels. If False, the labels are shifted to the right by one
+                position, and the logits are assumed to be aligned with the shifted labels.
 
         Returns:
             A tensor of shape (batch_size,) containing the average/sum log probabilities of the given labels under the
diff --git a/trl/trainer/sft_trainer.py b/trl/trainer/sft_trainer.py
@@ -525,7 +525,9 @@ class SFTTrainer(Trainer):
             If the processing class has not set a padding token, `tokenizer.eos_token` will be used as the default.
         compute_loss_func (`Callable` or `None`, *optional*, defaults to `None`):
             A function that accepts the raw model outputs, labels, and the number of items in the entire accumulated
-            batch (batch_size * gradient_accumulation_steps) and returns the loss. For example, see the default [loss function](https://github.com/huggingface/transformers/blob/052e652d6d53c2b26ffde87e039b723949a53493/src/transformers/trainer.py#L3618) used by [`Trainer`].
+            batch (batch_size * gradient_accumulation_steps) and returns the loss. For example, see the default [loss
+            function](https://github.com/huggingface/transformers/blob/052e652d6d53c2b26ffde87e039b723949a53493/src/transformers/trainer.py#L3618)
+            used by [`Trainer`].
         compute_metrics (`Callable[[EvalPrediction], dict]` or `None`, *optional*, defaults to `None`):
             The function that will be used to compute metrics at evaluation. Must take a
             [`~transformers.EvalPrediction`] and return a dictionary string to metric values. When passing
diff --git a/trl/trainer/utils.py b/trl/trainer/utils.py
@@ -1002,6 +1002,10 @@ def prepare_deepspeed(
             The model to be prepared for DeepSpeed training.
         per_device_train_batch_size (`int`):
             The training batch size per device.
+        fp16 (`bool`, defaults to `False`):
+            Whether to use FP16 precision.
+        bf16 (`bool`, defaults to `False`):
+            Whether to use BF16 precision.
 
     Returns:
         `torch.nn.Module`:
@@ -1359,7 +1363,7 @@ def flush_left(mask: torch.Tensor, *tensors: torch.Tensor) -> Union[torch.Tensor
     Args:
         mask (`torch.Tensor`):
             2D tensor (binary mask) with shape `(N, M)`.
-        *tensors (`torch.Tensor`)
+        *tensors (`torch.Tensor`):
             One or more 2D tensors with the same shape as `mask`. These tensors will be processed alongside `mask`,
             with non-zero values shifted and excess zero columns truncated in the same manner.