Skip to content

Commit 6356343

Browse files
Add deprecation warnings to docstrings (#4083)
Co-authored-by: Quentin Gallouédec <[email protected]>
1 parent 45e59f7 commit 6356343

File tree

4 files changed

+191
-5
lines changed

4 files changed

+191
-5
lines changed

trl/trainer/online_dpo_config.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,14 @@ class may differ from those in [`~transformers.TrainingArguments`].
6262
6363
dataset_num_proc (`int`, *optional*):
6464
Number of processes to use for processing the dataset.
65+
66+
<Deprecated version="0.22.0">
67+
68+
This parameter is deprecated and will be removed in version 0.25.0. Since OnlineDPO does not involve
69+
dataset preparation, you can safely remove it.
70+
71+
</Deprecated>
72+
6573
disable_dropout (`bool`, *optional*, defaults to `True`):
6674
Whether to disable dropout in the model and reference model.
6775

trl/trainer/online_dpo_trainer.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -168,12 +168,13 @@ class OnlineDPOTrainer(Trainer):
168168
preprocess_logits_for_metrics (`Callable[[torch.Tensor, torch.Tensor], torch.Tensor]`):
169169
The function to use to preprocess the logits before computing the metrics.
170170
171-
.. deprecated:: 0.22.0
172-
The following parameters are deprecated and will be removed in a future version:
171+
reward_model:
173172
174-
* `reward_model`: Use `reward_funcs` instead. For example, change `reward_model=model` to `reward_funcs=model`.
175-
* `reward_processing_class`: Use `reward_processing_classes` instead. For example, change
176-
`reward_processing_class=tokenizer` to `reward_processing_classes=tokenizer`.
173+
<Deprecated version="0.22.0">
174+
175+
This parameter is deprecated and will be removed in version 0.25.0. Use `reward_funcs` instead.
176+
177+
</Deprecated>
177178
"""
178179

179180
_tag_names = ["trl", "online-dpo"]

trl/trainer/rloo_config.py

Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,142 @@ class RLOOConfig(TrainingArguments):
190190
wandb_log_unique_prompts (`bool`, *optional*, defaults to `False`):
191191
Whether to log unique prompts in wandb. If `True`, only unique prompts are logged. If `False`, all prompts
192192
are logged.
193+
194+
> Deprecated parameters
195+
196+
rloo_k:
197+
198+
<Deprecated version="0.22.0">
199+
200+
This parameter is deprecated and will be removed in version 0.25.0. Use `num_generations` instead.
201+
202+
</Deprecated>
203+
204+
cliprange:
205+
206+
<Deprecated version="0.22.0">
207+
208+
This parameter is deprecated and will be removed in version 0.25.0. Use `epsilon` instead.
209+
210+
</Deprecated>
211+
212+
kl_coef:
213+
214+
<Deprecated version="0.22.0">
215+
216+
This parameter is deprecated and will be removed in version 0.25.0. Use `beta` instead.
217+
218+
</Deprecated>
219+
220+
exp_name:
221+
222+
<Deprecated version="0.22.0">
223+
224+
This parameter is deprecated and will be removed in version 0.25.0. Use `run_name` instead.
225+
226+
</Deprecated>
227+
228+
normalize_reward:
229+
230+
<Deprecated version="0.22.0">
231+
232+
This parameter is deprecated and will be removed in version 0.25.0. Use `normalize_advantages` instead.
233+
234+
</Deprecated>
235+
236+
num_ppo_epochs:
237+
238+
<Deprecated version="0.22.0">
239+
240+
This parameter is deprecated and will be removed in version 0.25.0. Use `num_iterations` instead.
241+
242+
</Deprecated>
243+
244+
num_mini_batches:
245+
246+
<Deprecated version="0.22.0">
247+
248+
This parameter is deprecated and will be removed in version 0.25.0. Use `steps_per_generation` instead.
249+
250+
</Deprecated>
251+
252+
total_episodes:
253+
254+
<Deprecated version="0.22.0">
255+
256+
This parameter is deprecated and will be removed in version 0.25.0. Use `max_steps` instead.
257+
258+
</Deprecated>
259+
260+
response_length:
261+
262+
<Deprecated version="0.22.0">
263+
264+
This parameter is deprecated and will be removed in version 0.25.0. Use `max_completion_length` instead.
265+
266+
</Deprecated>
267+
268+
token_level_kl:
269+
270+
<Deprecated version="0.22.0">
271+
272+
This parameter is deprecated and will be removed in version 0.25.0. KL is now computed only at the sequence
273+
level.
274+
275+
</Deprecated>
276+
277+
dataset_num_proc:
278+
279+
<Deprecated version="0.22.0">
280+
281+
This parameter is deprecated and will be removed in version 0.25.0. This parameter was unused, you can
282+
safely remove it from your scripts.
283+
284+
</Deprecated>
285+
286+
local_rollout_forward_batch_size:
287+
288+
<Deprecated version="0.22.0">
289+
290+
This parameter is deprecated and will be removed in version 0.25.0. Now it is automatically set to
291+
`per_device_train_batch_size` (or `per_device_eval_batch_size` during evaluation).
292+
293+
</Deprecated>
294+
295+
num_sample_generations:
296+
297+
<Deprecated version="0.22.0">
298+
299+
This parameter is deprecated and will be removed in version 0.25.0. Use `logging_steps` to control
300+
generation logging frequency.
301+
302+
</Deprecated>
303+
304+
stop_token:
305+
306+
<Deprecated version="0.22.0">
307+
308+
This parameter is deprecated and will be removed in version 0.25.0.
309+
310+
</Deprecated>
311+
312+
stop_token_id:
313+
314+
<Deprecated version="0.22.0">
315+
316+
This parameter is deprecated and will be removed in version 0.25.0. Use `processing_class.eos_token_id`
317+
instead.
318+
319+
</Deprecated>
320+
321+
missing_eos_penalty:
322+
323+
<Deprecated version="0.22.0">
324+
325+
This parameter is deprecated and will be removed in version 0.25.0. Replicate with a custom reward function
326+
checking if `eos_token_id` is in `completion_ids`.
327+
328+
</Deprecated>
193329
"""
194330

195331
_VALID_DICT_FIELDS = TrainingArguments._VALID_DICT_FIELDS + ["model_init_kwargs"]

trl/trainer/rloo_trainer.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,47 @@ def reward_func(completions, **kwargs):
197197
model and a scheduler given by [`get_linear_schedule_with_warmup`] controlled by `args`.
198198
peft_config ([`~peft.PeftConfig`], *optional*):
199199
PEFT configuration used to wrap the model. If `None`, the model is not wrapped.
200+
201+
config:
202+
203+
<Deprecated version="0.22.0">
204+
205+
This parameter is deprecated and will be removed in version 0.25.0. Use `args` instead.
206+
207+
</Deprecated>
208+
209+
reward_model:
210+
<Deprecated version="0.22.0">
211+
212+
This parameter is deprecated and will be removed in version 0.25.0. Use `reward_funcs` instead.
213+
214+
</Deprecated>
215+
216+
policy:
217+
218+
<Deprecated version="0.22.0">
219+
220+
This parameter is deprecated and will be removed in version 0.25.0. Use `model` instead.
221+
222+
</Deprecated>
223+
224+
ref_policy:
225+
226+
<Deprecated version="0.22.0">
227+
228+
This parameter is deprecated and will be removed in version 0.25.0. To use the initial model as the
229+
reference model, simply omit this parameter. The parameter is ignored.
230+
231+
</Deprecated>
232+
233+
data_collator:
234+
235+
<Deprecated version="0.22.0">
236+
237+
This parameter is deprecated and will be removed in version 0.25.0. The RLOOTrainer does not use a data
238+
collator, so this parameter is ignored.
239+
240+
</Deprecated>
200241
"""
201242

202243
_tag_names = ["trl", "rloo"]

0 commit comments

Comments
 (0)