-
Notifications
You must be signed in to change notification settings - Fork 1.6k
Add big file recognition demo #1153
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
vadimdddd
wants to merge
19
commits into
alphacep:master
Choose a base branch
from
vadimdddd:big_file
base: master
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from 3 commits
Commits
Show all changes
19 commits
Select commit
Hold shift + click to select a range
95aba97
add test_big_file.py
vadimdddd c8774f2
updated algorithm
vadimdddd 91e1df6
add print FinalResult
vadimdddd 5f7538e
fix algorithm
vadimdddd a1b92a5
deleted unused string
vadimdddd 95b78e3
deleted unused code
vadimdddd 9293de9
fix algorithm
vadimdddd 9fd0ce3
add srt
vadimdddd bccd82f
combined methods for srt and txt formats; added logging
vadimdddd 0a42998
models download added
vadimdddd 34535df
pylint fix
vadimdddd c5887cc
added display of available languages
vadimdddd e1507c9
changed imports order
vadimdddd 27eb7a0
add error if no input file
vadimdddd c432667
changed model selection from language to model name
vadimdddd 2234622
add list-model-pairs method in init; add list-models, list-languages,…
vadimdddd 6e17337
deleted result() from big model recognition process, finalresult() le…
vadimdddd 6b18144
srt fix part 1
vadimdddd da420fa
srt fix part 2
vadimdddd File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,125 @@ | ||
#!/usr/bin/env python3 | ||
|
||
import argparse | ||
import os | ||
import re | ||
import subprocess | ||
import json | ||
import shlex | ||
|
||
from pathlib import Path | ||
from vosk import Model, KaldiRecognizer, SetLogLevel | ||
from multiprocessing.dummy import Pool | ||
from queue import Queue | ||
|
||
SetLogLevel(0) | ||
|
||
SAMPLE_RATE = 16000 | ||
MODEL_PRE_URL = "https://alphacephei.com/vosk/models/" | ||
MODEL_LIST_URL = MODEL_PRE_URL + "model-list.json" | ||
MODEL_DIRS = [os.getenv("VOSK_MODEL_PATH"), Path("usr/share/vosk"), | ||
Path.home() / "AppData/Local/vosk", Path.home() / ".cache/vosk"] | ||
|
||
parser = argparse.ArgumentParser( | ||
description = "Transcribe big size audiofiles") | ||
parser.add_argument( | ||
"--lang", "-l", default="en-us", type=str, | ||
help="select both model language") | ||
parser.add_argument( | ||
"--input", "-i", type=str, | ||
help="audiofile") | ||
parser.add_argument( | ||
"--cores", "-c", default=4, type=int, | ||
help="PC cores used for recognize") | ||
|
||
class HugeFileProcessor: | ||
|
||
def __init__(self, args): | ||
self.args = args | ||
self.queue = Queue() | ||
|
||
def resample_ffmpeg(self, infile): | ||
cmd = shlex.split("ffmpeg -nostdin -loglevel quiet " | ||
"-i \'{}\' -ar {} -ac 1 -f s16le -".format(str(infile), SAMPLE_RATE)) | ||
stream = subprocess.Popen(cmd, stdout=subprocess.PIPE) | ||
return stream | ||
|
||
def get_model(self): | ||
for directory in MODEL_DIRS: | ||
if directory is None or not Path(directory).exists(): | ||
continue | ||
model_file_list = os.listdir(directory) | ||
model_files = [model for model in model_file_list if | ||
re.match(r"vosk-model(-small)?-{}".format(self.args.lang), model)] | ||
if len(model_files) == 2: | ||
return Path(directory, model_files[0]), Path(directory, model_files[1]) | ||
|
||
def process_by_small_model(self, small_model_path): | ||
|
||
model = Model(model_path=str(small_model_path)) | ||
rec = KaldiRecognizer(model, SAMPLE_RATE) | ||
rec.SetPartialWords(True) | ||
fragments = [] | ||
|
||
stream = self.resample_ffmpeg(self.args.input) | ||
|
||
while True: | ||
data = stream.stdout.read(4000) | ||
if len(data) == 0: | ||
break | ||
if rec.AcceptWaveform(data): | ||
rec.Result() | ||
if part_res["partial"] != '' and part_res["partial_result"] != '': | ||
fragments.append((int(part_res["partial_result"][0]["start"]), | ||
int(part_res["partial_result"][-1]["end"]))) | ||
else: | ||
part_res = json.loads(rec.PartialResult()) | ||
rec.FinalResult() | ||
return fragments | ||
|
||
def process_by_big_model(self, timestamps): | ||
|
||
item_0, item_1 = self.queue.get_nowait() | ||
|
||
model = Model(model_path=str(big_model_path)) | ||
rec = KaldiRecognizer(model, SAMPLE_RATE) | ||
start_pos = 32000 * item_0 | ||
end_pos = 32000 * item_1 | ||
|
||
stream = self.resample_ffmpeg(self.args.input) | ||
stream.stdout.read(start_pos) | ||
|
||
while True: | ||
data = stream.stdout.read(4000) | ||
if end_pos - start_pos == 0: | ||
break | ||
if rec.AcceptWaveform(data): | ||
print(json.loads(rec.Result())["text"]) | ||
else: | ||
rec.PartialResult() | ||
start_pos += 4000 | ||
print(json.loads(rec.FinalResult())["text"]) | ||
return | ||
|
||
def process(self, small_model_path): | ||
|
||
timestamps = self.process_by_small_model(small_model_path) | ||
|
||
for x in timestamps: | ||
self.queue.put(x) | ||
|
||
with Pool(self.args.cores) as pool: | ||
pool.map(self.process_by_big_model, timestamps) | ||
|
||
def main(): | ||
|
||
args = parser.parse_args() | ||
|
||
processor = HugeFileProcessor(args) | ||
global big_model_path | ||
small_model_path, big_model_path = processor.get_model() | ||
|
||
processor.process(small_model_path) | ||
|
||
if __name__ == "__main__": | ||
main() |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It is a bad idea to ignore final here