diff --git a/nemo/collections/asr/models/aed_multitask_models.py b/nemo/collections/asr/models/aed_multitask_models.py
index d20860203e7e..2ff70c34a2fd 100644
--- a/nemo/collections/asr/models/aed_multitask_models.py
+++ b/nemo/collections/asr/models/aed_multitask_models.py
@@ -573,7 +573,6 @@ def transcribe(
                     f"but got {type(override_config)}"
                 )
             trcfg = override_config
-            trcfg.timestamps = timestamps
 
         if trcfg.enable_chunking:
             # Check if only one audio is provided with string
@@ -1099,6 +1098,7 @@ def _transcribe_output_processing(self, outputs, trcfg: MultiTaskTranscriptionCo
                 main_model_predictions=hypotheses,
                 timestamp_type='char' if merge_to_be_done else ['word', 'segment'],
                 viterbi_device=trcfg._internal.device,
+                verbose=trcfg.verbose,
             )
         elif trcfg.timestamps:
             hypotheses = process_aed_timestamp_outputs(
diff --git a/nemo/collections/asr/parts/mixins/transcription.py b/nemo/collections/asr/parts/mixins/transcription.py
index 7a48b99049b3..89f1d76e8172 100644
--- a/nemo/collections/asr/parts/mixins/transcription.py
+++ b/nemo/collections/asr/parts/mixins/transcription.py
@@ -253,7 +253,6 @@ def transcribe(
 
                 - Dict[str, List[str/Hypothesis]]
         """
-
         if override_config is None:
             transcribe_cfg = TranscribeConfig(
                 use_lhotse=use_lhotse,
@@ -348,7 +347,6 @@ def transcribe_generator(self, audio, override_config: Optional[TranscribeConfig
         """
         A generator version of `transcribe` function.
         """
-
         if override_config is None:
             override_config = TranscribeConfig()
 
diff --git a/nemo/collections/asr/parts/utils/aligner_utils.py b/nemo/collections/asr/parts/utils/aligner_utils.py
index 5d2b719c2521..2c6b5f1f5138 100644
--- a/nemo/collections/asr/parts/utils/aligner_utils.py
+++ b/nemo/collections/asr/parts/utils/aligner_utils.py
@@ -878,6 +878,7 @@ def get_batch_variables(
     buffered_chunk_params: dict = {},
     padding_value: float = -3.4e38,
     has_hypotheses: bool = False,
+    verbose: bool = False,
 ):
     """
     Args:
@@ -947,7 +948,9 @@ def get_batch_variables(
                 if has_hypotheses:
                     hypotheses = audio
                 else:
-                    hypotheses = model.transcribe(audio, return_hypotheses=True, batch_size=batch_size)
+                    hypotheses = model.transcribe(
+                        audio, return_hypotheses=True, batch_size=batch_size, verbose=verbose
+                    )
         else:
             assert isinstance(audio, list) or isinstance(
                 audio, str
diff --git a/nemo/collections/asr/parts/utils/timestamp_utils.py b/nemo/collections/asr/parts/utils/timestamp_utils.py
index 6e21fdf08d75..f7f9f84722e4 100644
--- a/nemo/collections/asr/parts/utils/timestamp_utils.py
+++ b/nemo/collections/asr/parts/utils/timestamp_utils.py
@@ -490,6 +490,7 @@ def get_forced_aligned_timestamps_with_external_model(
     supported_punctuation: Optional[Union[Set, List[str]]] = {',', '.', '!', '?'},
     timestamp_type: Optional[Union[str, List[str]]] = "all",
     has_hypotheses: bool = False,
+    verbose: bool = False,
 ) -> List[Hypothesis]:
     """
     Extracts the word, segment and char timestamps by aligning the audio with the external ASR model and adds them to the provided Hypothesis objects.
@@ -649,6 +650,7 @@ def process_timestamps(utt_obj, output_timestep_duration, timestamp_type):
             word_separator=word_separator,
             gt_text_batch=[hyp.text for hyp in main_model_predictions[start_idx:end_idx]],
             has_hypotheses=has_hypotheses,
+            verbose=verbose,
         )
 
         alignments_batch = viterbi_decoding(