diff --git a/nemo/collections/asr/models/aed_multitask_models.py b/nemo/collections/asr/models/aed_multitask_models.py index 91e5dd2b60c0..2de734600a18 100644 --- a/nemo/collections/asr/models/aed_multitask_models.py +++ b/nemo/collections/asr/models/aed_multitask_models.py @@ -569,7 +569,8 @@ def transcribe( f"but got {type(override_config)}" ) trcfg = override_config - trcfg.timestamps = timestamps + if timestamps is not None: + trcfg.timestamps = timestamps if trcfg.enable_chunking: # Check if only one audio is provided with string diff --git a/tests/collections/asr/mixins/test_transcription.py b/tests/collections/asr/mixins/test_transcription.py index d2df529d9778..3c4cf7e2a38a 100644 --- a/tests/collections/asr/mixins/test_transcription.py +++ b/tests/collections/asr/mixins/test_transcription.py @@ -579,3 +579,30 @@ def test_transcribe_returns_xattn(self, audio_files, canary_1b_v2): # Reset the decoding strategy to original canary_1b_v2.change_decoding_strategy(orig_decoding_config) + + @pytest.mark.with_downloads() + @pytest.mark.unit + def test_transcribe_override_config_preserves_timestamps(self, audio_files, canary_1b_v2): + canary_1b_v2.eval() + audio1, audio2 = audio_files + + config = MultiTaskTranscriptionConfig( + batch_size=4, + return_hypotheses=True, + num_workers=0, + verbose=False, + prompt={'source_lang': 'en', 'target_lang': 'en'}, + enable_chunking=False, + timestamps=True, + ) + + output = canary_1b_v2.transcribe([audio1, audio2], override_config=config) + + assert len(output) == 2 + assert isinstance(output[0], Hypothesis) + assert isinstance(output[1], Hypothesis) + + assert output[0].timestamp is not None + assert output[1].timestamp is not None + assert 'word' in output[0].timestamp + assert 'word' in output[1].timestamp