Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion nemo/collections/asr/models/aed_multitask_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -573,7 +573,6 @@ def transcribe(
f"but got {type(override_config)}"
)
trcfg = override_config
trcfg.timestamps = timestamps

if trcfg.enable_chunking:
# Check if only one audio is provided with string
Expand Down Expand Up @@ -1099,6 +1098,7 @@ def _transcribe_output_processing(self, outputs, trcfg: MultiTaskTranscriptionCo
main_model_predictions=hypotheses,
timestamp_type='char' if merge_to_be_done else ['word', 'segment'],
viterbi_device=trcfg._internal.device,
verbose=trcfg.verbose,
)
elif trcfg.timestamps:
hypotheses = process_aed_timestamp_outputs(
Expand Down
2 changes: 0 additions & 2 deletions nemo/collections/asr/parts/mixins/transcription.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,6 @@ def transcribe(

- Dict[str, List[str/Hypothesis]]
"""

if override_config is None:
transcribe_cfg = TranscribeConfig(
use_lhotse=use_lhotse,
Expand Down Expand Up @@ -348,7 +347,6 @@ def transcribe_generator(self, audio, override_config: Optional[TranscribeConfig
"""
A generator version of `transcribe` function.
"""

if override_config is None:
override_config = TranscribeConfig()

Expand Down
5 changes: 4 additions & 1 deletion nemo/collections/asr/parts/utils/aligner_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -878,6 +878,7 @@ def get_batch_variables(
buffered_chunk_params: dict = {},
padding_value: float = -3.4e38,
has_hypotheses: bool = False,
verbose: bool = False,
):
"""
Args:
Expand Down Expand Up @@ -947,7 +948,9 @@ def get_batch_variables(
if has_hypotheses:
hypotheses = audio
else:
hypotheses = model.transcribe(audio, return_hypotheses=True, batch_size=batch_size)
hypotheses = model.transcribe(
audio, return_hypotheses=True, batch_size=batch_size, verbose=verbose
)
else:
assert isinstance(audio, list) or isinstance(
audio, str
Expand Down
2 changes: 2 additions & 0 deletions nemo/collections/asr/parts/utils/timestamp_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -490,6 +490,7 @@ def get_forced_aligned_timestamps_with_external_model(
supported_punctuation: Optional[Union[Set, List[str]]] = {',', '.', '!', '?'},
timestamp_type: Optional[Union[str, List[str]]] = "all",
has_hypotheses: bool = False,
verbose: bool = False,
) -> List[Hypothesis]:
"""
Extracts the word, segment and char timestamps by aligning the audio with the external ASR model and adds them to the provided Hypothesis objects.
Expand Down Expand Up @@ -649,6 +650,7 @@ def process_timestamps(utt_obj, output_timestep_duration, timestamp_type):
word_separator=word_separator,
gt_text_batch=[hyp.text for hyp in main_model_predictions[start_idx:end_idx]],
has_hypotheses=has_hypotheses,
verbose=verbose,
)

alignments_batch = viterbi_decoding(
Expand Down
Loading