@@ -190,6 +190,142 @@ class RLOOConfig(TrainingArguments):
190
190
wandb_log_unique_prompts (`bool`, *optional*, defaults to `False`):
191
191
Whether to log unique prompts in wandb. If `True`, only unique prompts are logged. If `False`, all prompts
192
192
are logged.
193
+
194
+ > Deprecated parameters
195
+
196
+ rloo_k:
197
+
198
+ <Deprecated version="0.22.0">
199
+
200
+ This parameter is deprecated and will be removed in version 0.25.0. Use `num_generations` instead.
201
+
202
+ </Deprecated>
203
+
204
+ cliprange:
205
+
206
+ <Deprecated version="0.22.0">
207
+
208
+ This parameter is deprecated and will be removed in version 0.25.0. Use `epsilon` instead.
209
+
210
+ </Deprecated>
211
+
212
+ kl_coef:
213
+
214
+ <Deprecated version="0.22.0">
215
+
216
+ This parameter is deprecated and will be removed in version 0.25.0. Use `beta` instead.
217
+
218
+ </Deprecated>
219
+
220
+ exp_name:
221
+
222
+ <Deprecated version="0.22.0">
223
+
224
+ This parameter is deprecated and will be removed in version 0.25.0. Use `run_name` instead.
225
+
226
+ </Deprecated>
227
+
228
+ normalize_reward:
229
+
230
+ <Deprecated version="0.22.0">
231
+
232
+ This parameter is deprecated and will be removed in version 0.25.0. Use `normalize_advantages` instead.
233
+
234
+ </Deprecated>
235
+
236
+ num_ppo_epochs:
237
+
238
+ <Deprecated version="0.22.0">
239
+
240
+ This parameter is deprecated and will be removed in version 0.25.0. Use `num_iterations` instead.
241
+
242
+ </Deprecated>
243
+
244
+ num_mini_batches:
245
+
246
+ <Deprecated version="0.22.0">
247
+
248
+ This parameter is deprecated and will be removed in version 0.25.0. Use `steps_per_generation` instead.
249
+
250
+ </Deprecated>
251
+
252
+ total_episodes:
253
+
254
+ <Deprecated version="0.22.0">
255
+
256
+ This parameter is deprecated and will be removed in version 0.25.0. Use `max_steps` instead.
257
+
258
+ </Deprecated>
259
+
260
+ response_length:
261
+
262
+ <Deprecated version="0.22.0">
263
+
264
+ This parameter is deprecated and will be removed in version 0.25.0. Use `max_completion_length` instead.
265
+
266
+ </Deprecated>
267
+
268
+ token_level_kl:
269
+
270
+ <Deprecated version="0.22.0">
271
+
272
+ This parameter is deprecated and will be removed in version 0.25.0. KL is now computed only at the sequence
273
+ level.
274
+
275
+ </Deprecated>
276
+
277
+ dataset_num_proc:
278
+
279
+ <Deprecated version="0.22.0">
280
+
281
+ This parameter is deprecated and will be removed in version 0.25.0. This parameter was unused, you can
282
+ safely remove it from your scripts.
283
+
284
+ </Deprecated>
285
+
286
+ local_rollout_forward_batch_size:
287
+
288
+ <Deprecated version="0.22.0">
289
+
290
+ This parameter is deprecated and will be removed in version 0.25.0. Now it is automatically set to
291
+ `per_device_train_batch_size` (or `per_device_eval_batch_size` during evaluation).
292
+
293
+ </Deprecated>
294
+
295
+ num_sample_generations:
296
+
297
+ <Deprecated version="0.22.0">
298
+
299
+ This parameter is deprecated and will be removed in version 0.25.0. Use `logging_steps` to control
300
+ generation logging frequency.
301
+
302
+ </Deprecated>
303
+
304
+ stop_token:
305
+
306
+ <Deprecated version="0.22.0">
307
+
308
+ This parameter is deprecated and will be removed in version 0.25.0.
309
+
310
+ </Deprecated>
311
+
312
+ stop_token_id:
313
+
314
+ <Deprecated version="0.22.0">
315
+
316
+ This parameter is deprecated and will be removed in version 0.25.0. Use `processing_class.eos_token_id`
317
+ instead.
318
+
319
+ </Deprecated>
320
+
321
+ missing_eos_penalty:
322
+
323
+ <Deprecated version="0.22.0">
324
+
325
+ This parameter is deprecated and will be removed in version 0.25.0. Replicate with a custom reward function
326
+ checking if `eos_token_id` is in `completion_ids`.
327
+
328
+ </Deprecated>
193
329
"""
194
330
195
331
_VALID_DICT_FIELDS = TrainingArguments ._VALID_DICT_FIELDS + ["model_init_kwargs" ]
0 commit comments