@@ -6652,8 +6652,8 @@ static bool whisper_vad(
6652
6652
6653
6653
if (vad_segments->data .size () > 0 ) {
6654
6654
state->has_vad_segments = true ;
6655
- ctx-> state ->vad_segments .clear ();
6656
- ctx-> state ->vad_segments .reserve (vad_segments->data .size ());
6655
+ state->vad_segments .clear ();
6656
+ state->vad_segments .reserve (vad_segments->data .size ());
6657
6657
6658
6658
// Initialize the time mapping table
6659
6659
state->vad_mapping_table .clear ();
@@ -6749,7 +6749,7 @@ static bool whisper_vad(
6749
6749
6750
6750
WHISPER_LOG_INFO (" %s: vad_segment_info: orig_start: %.2f, orig_end: %.2f, vad_start: %.2f, vad_end: %.2f\n " ,
6751
6751
__func__, segment.orig_start /100.0 , segment.orig_end /100.0 , segment.vad_start /100.0 , segment.vad_end /100.0 );
6752
- ctx-> state ->vad_segments .push_back (segment);
6752
+ state->vad_segments .push_back (segment);
6753
6753
6754
6754
// Copy this speech segment
6755
6755
memcpy (filtered_samples.data () + offset, samples + segment_start_samples, segment_length * sizeof (float ));
@@ -6820,6 +6820,24 @@ int whisper_full_with_state(
6820
6820
}
6821
6821
}
6822
6822
6823
+ std::vector<float > vad_samples;
6824
+ if (params.vad )
6825
+ {
6826
+ WHISPER_LOG_INFO (" %s: VAD is enabled, processing speech segments only\n " , __func__);
6827
+ if (!whisper_vad (ctx, state, params, samples, n_samples, vad_samples))
6828
+ {
6829
+ WHISPER_LOG_ERROR (" %s: failed to compute VAD\n " , __func__);
6830
+ return -1 ;
6831
+ }
6832
+ if (vad_samples.empty ())
6833
+ {
6834
+ state->result_all .clear ();
6835
+ return 0 ;
6836
+ }
6837
+ samples = vad_samples.data ();
6838
+ n_samples = vad_samples.size ();
6839
+ }
6840
+
6823
6841
// auto-detect language if not specified
6824
6842
if (params.language == nullptr || strlen (params.language ) == 0 || strcmp (params.language , " auto" ) == 0 || params.detect_language ) {
6825
6843
std::vector<float > probs (whisper_lang_max_id () + 1 , 0 .0f );
@@ -7720,25 +7738,11 @@ int whisper_full_with_state(
7720
7738
}
7721
7739
7722
7740
int whisper_full (
7723
- struct whisper_context * ctx,
7724
- struct whisper_full_params params,
7725
- const float * samples,
7726
- int n_samples) {
7727
-
7728
- std::vector<float > vad_samples;
7729
- if (params.vad ) {
7730
- WHISPER_LOG_INFO (" %s: VAD is enabled, processing speech segments only\n " , __func__);
7731
- if (!whisper_vad (ctx, ctx->state , params, samples, n_samples, vad_samples)) {
7732
- WHISPER_LOG_ERROR (" %s: failed to compute VAD\n " , __func__);
7733
- return -1 ;
7734
- }
7735
- if (vad_samples.empty ()) {
7736
- ctx->state ->result_all .clear ();
7737
- return 0 ;
7738
- }
7739
- samples = vad_samples.data ();
7740
- n_samples = vad_samples.size ();
7741
- }
7741
+ struct whisper_context *ctx,
7742
+ struct whisper_full_params params,
7743
+ const float *samples,
7744
+ int n_samples)
7745
+ {
7742
7746
return whisper_full_with_state (ctx, ctx->state , params, samples, n_samples);
7743
7747
}
7744
7748
@@ -7753,19 +7757,6 @@ int whisper_full_parallel(
7753
7757
return whisper_full (ctx, params, samples, n_samples);
7754
7758
}
7755
7759
7756
- std::vector<float > vad_samples;
7757
- if (params.vad ) {
7758
- WHISPER_LOG_INFO (" %s: VAD is enabled, processing speech segments only\n " , __func__);
7759
- if (!whisper_vad (ctx, ctx->state , params, samples, n_samples, vad_samples)) {
7760
- WHISPER_LOG_ERROR (" %s: failed to compute VAD\n " , __func__);
7761
- return -1 ;
7762
- }
7763
- if (vad_samples.empty ()) {
7764
- return 0 ;
7765
- }
7766
- samples = vad_samples.data ();
7767
- n_samples = vad_samples.size ();
7768
- }
7769
7760
int ret = 0 ;
7770
7761
7771
7762
// prepare separate states for each thread
0 commit comments