Skip to content

Commit c415092

Browse files
authored
Upload RKNN models for sense-voice (#2592)
1 parent c691318 commit c415092

File tree

3 files changed

+215
-4
lines changed

3 files changed

+215
-4
lines changed
Lines changed: 213 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,213 @@
1+
name: export-sense-voice-to-rknn
2+
3+
on:
4+
push:
5+
branches:
6+
- export-sense-voice-rknn-ci-2
7+
workflow_dispatch:
8+
9+
concurrency:
10+
group: export-sense-voice-to-rknn-${{ github.ref }}
11+
cancel-in-progress: true
12+
13+
jobs:
14+
export-sense-voice-to-rknn:
15+
if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
16+
name: ${{ matrix.framework }} ${{ matrix.platform }} ${{ matrix.input_in_seconds }}
17+
runs-on: ${{ matrix.os }}
18+
strategy:
19+
fail-fast: false
20+
matrix:
21+
os: [ubuntu-latest]
22+
python-version: ["3.10"]
23+
platform: ["rk3562", "rk3566", "rk3568", "rk3576", "rk3588"]
24+
input_in_seconds: ["10", "15", "20", "25", "30"]
25+
framework: ["FunASR", "WSYue-ASR"]
26+
27+
steps:
28+
- uses: actions/checkout@v4
29+
30+
- name: Setup Python ${{ matrix.python-version }}
31+
uses: actions/setup-python@v5
32+
with:
33+
python-version: ${{ matrix.python-version }}
34+
35+
- name: Install Python dependencies
36+
shell: bash
37+
run: |
38+
python3 -m pip install --upgrade \
39+
pip \
40+
"numpy<2" \
41+
torch==2.0.0+cpu -f https://download.pytorch.org/whl/torch \
42+
onnx==1.17.0 \
43+
onnxruntime==1.17.1 \
44+
librosa \
45+
soundfile \
46+
onnxsim \
47+
sentencepiece \
48+
kaldi_native_fbank
49+
50+
curl -SL -O https://huggingface.co/csukuangfj/rknn-toolkit2/resolve/main/rknn_toolkit2-2.1.0%2B708089d1-cp310-cp310-linux_x86_64.whl
51+
pip install ./*.whl "numpy<=1.26.4"
52+
53+
- name: Run SenseVoice from FunAsr
54+
if: matrix.framework == 'FunASR'
55+
shell: bash
56+
run: |
57+
cd scripts/sense-voice/rknn
58+
59+
curl -SL -O https://hf-mirror.com/FunAudioLLM/SenseVoiceSmall/resolve/main/am.mvn
60+
curl -SL -O https://hf-mirror.com/FunAudioLLM/SenseVoiceSmall/resolve/main/model.pt
61+
curl -SL -O https://hf-mirror.com/FunAudioLLM/SenseVoiceSmall/resolve/main/chn_jpn_yue_eng_ko_spectok.bpe.model
62+
63+
curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/test_wavs/en.wav
64+
curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/test_wavs/ja.wav
65+
curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/test_wavs/ko.wav
66+
curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/test_wavs/yue.wav
67+
curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/test_wavs/zh.wav
68+
69+
rm -f README.md || true
70+
71+
curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/README.md
72+
curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/LICENSE
73+
74+
echo "export to onnx"
75+
t=${{ matrix.input_in_seconds }}
76+
p=${{ matrix.platform }}
77+
78+
echo "----$t---"
79+
python3 ./export-onnx.py --input-len-in-seconds $t
80+
81+
ls -lh *.onnx
82+
83+
echo "test exported onnx models"
84+
85+
echo "----------$t----------"
86+
python3 ./test_onnx.py --model model-$t-seconds.onnx --tokens ./tokens.txt --wave ./en.wav
87+
python3 ./test_onnx.py --model model-$t-seconds.onnx --tokens ./tokens.txt --wave ./ja.wav
88+
python3 ./test_onnx.py --model model-$t-seconds.onnx --tokens ./tokens.txt --wave ./ko.wav
89+
python3 ./test_onnx.py --model model-$t-seconds.onnx --tokens ./tokens.txt --wave ./yue.wav
90+
python3 ./test_onnx.py --model model-$t-seconds.onnx --tokens ./tokens.txt --wave ./zh.wav
91+
92+
echo "export to rknn"
93+
echo "----------$t----------"
94+
echo "----------$p----------"
95+
python3 export-rknn.py --target-platform $p --in-model model-$t-seconds.onnx --out-model model-$p-$t-seconds.rknn >/dev/null 2>&1
96+
97+
ls -lh *.rknn
98+
99+
echo "collect results"
100+
d=sherpa-onnx-$p-$t-seconds-sense-voice-zh-en-ja-ko-yue-2024-07-17
101+
102+
mkdir -p $d
103+
mkdir -p $d/test_wavs
104+
105+
cp -v README.md $d
106+
cp -v LICENSE $d
107+
cp -v model-$p-$t-seconds.rknn $d/model.rknn
108+
cp -v tokens.txt $d
109+
cp -v *.wav $d/test_wavs
110+
ls -lh $d
111+
tar cjfv $d.tar.bz2 $d
112+
ls -lh *.tar.bz2
113+
rm -rf d
114+
115+
echo "----show---"
116+
ls -lh *.tar.bz2
117+
118+
mv *.tar.bz2 ../../..
119+
120+
- name: Run SenseVoice from WSYue-ASR
121+
if: matrix.framework == 'WSYue-ASR'
122+
shell: bash
123+
run: |
124+
cd scripts/sense-voice/rknn
125+
126+
curl -SL -O https://huggingface.co/ASLP-lab/WSYue-ASR/resolve/main/sensevoice_small_yue/model.pt
127+
128+
curl -SL -O https://hf-mirror.com/FunAudioLLM/SenseVoiceSmall/resolve/main/am.mvn
129+
curl -SL -O https://hf-mirror.com/FunAudioLLM/SenseVoiceSmall/resolve/main/chn_jpn_yue_eng_ko_spectok.bpe.model
130+
131+
curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/test_wavs/en.wav
132+
curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/test_wavs/yue.wav
133+
curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/test_wavs/zh.wav
134+
135+
for i in $(seq 0 17); do
136+
curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2025-09-09/resolve/main/test_wavs/yue-$i.wav
137+
done
138+
139+
rm -f README.md || true
140+
141+
curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2025-09-09/resolve/main/README.md
142+
143+
echo "export to onnx"
144+
t=${{ matrix.input_in_seconds }}
145+
p=${{ matrix.platform }}
146+
147+
echo "----$t---"
148+
149+
export model_author="ASLP-lab"
150+
export comment="ASLP-lab/WSYue-ASR"
151+
export url="https://huggingface.co/ASLP-lab/WSYue-ASR/tree/main/sensevoice_small_yue"
152+
153+
python3 ./export-onnx.py --input-len-in-seconds $t
154+
155+
ls -lh *.onnx
156+
157+
echo "test exported onnx models"
158+
159+
echo "----------$t----------"
160+
python3 ./test_onnx.py --model model-$t-seconds.onnx --tokens ./tokens.txt --wave ./en.wav
161+
python3 ./test_onnx.py --model model-$t-seconds.onnx --tokens ./tokens.txt --wave ./yue.wav
162+
python3 ./test_onnx.py --model model-$t-seconds.onnx --tokens ./tokens.txt --wave ./zh.wav
163+
for i in $(seq 0 17); do
164+
echo "yue-$i.wav"
165+
python3 ./test_onnx.py --model model-$t-seconds.onnx --tokens ./tokens.txt --wave ./yue-$i.wav
166+
done
167+
168+
echo "export to rknn"
169+
echo "----------$t----------"
170+
echo "----------$p----------"
171+
python3 export-rknn.py --target-platform $p --in-model model-$t-seconds.onnx --out-model model-$p-$t-seconds.rknn >/dev/null 2>&1
172+
173+
ls -lh *.rknn
174+
175+
echo "collect results"
176+
d=sherpa-onnx-$p-$t-seconds-sense-voice-zh-en-ja-ko-yue-2025-09-09
177+
178+
mkdir -p $d
179+
mkdir -p $d/test_wavs
180+
181+
cp -v README.md $d
182+
cp -v model-$p-$t-seconds.rknn $d/model.rknn
183+
cp -v tokens.txt $d
184+
cp -v *.wav $d/test_wavs
185+
ls -lh $d
186+
tar cjfv $d.tar.bz2 $d
187+
ls -lh *.tar.bz2
188+
rm -rf d
189+
190+
echo "----show---"
191+
ls -lh *.tar.bz2
192+
193+
mv *.tar.bz2 ../../..
194+
195+
- name: Release
196+
if: github.repository_owner == 'csukuangfj'
197+
uses: svenstaro/upload-release-action@v2
198+
with:
199+
file_glob: true
200+
file: ./*.tar.bz2
201+
overwrite: true
202+
repo_name: k2-fsa/sherpa-onnx
203+
repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
204+
tag: asr-models
205+
206+
- name: Release
207+
if: github.repository_owner == 'k2-fsa'
208+
uses: svenstaro/upload-release-action@v2
209+
with:
210+
file_glob: true
211+
file: ./*.tar.bz2
212+
overwrite: true
213+
tag: asr-models

scripts/sense-voice/rknn/export-onnx.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ def main():
8686

8787
print("loading model")
8888

89-
state_dict = torch.load("./model.pt")
89+
state_dict = torch.load("./model.pt", map_location="cpu")
9090
if "state_dict" in state_dict:
9191
state_dict = state_dict["state_dict"]
9292

scripts/sense-voice/rknn/torch_model.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,8 @@
88

99

1010
class SinusoidalPositionEncoder(nn.Module):
11-
""" """
12-
1311
def __init__(self, d_model=80, dropout_rate=0.1):
14-
pass
12+
super().__init__()
1513

1614
def encode(
1715
self,

0 commit comments

Comments
 (0)