Skip to content

Commit 1dee908

Browse files
committed
support qwen-1.8b
1 parent ccc1461 commit 1dee908

File tree

10 files changed

+115
-19
lines changed

10 files changed

+115
-19
lines changed

.github/model-test.yml

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
name: model-test
2+
on:
3+
push:
4+
branches:
5+
- master
6+
- 'feature/**'
7+
paths:
8+
- 'src/**'
9+
- '.github/workflows/model-test.yml'
10+
pull_request:
11+
branches: [master]
12+
paths:
13+
- 'src/**'
14+
- '.github/workflows/model-test.yml'
15+
16+
jobs:
17+
llm-build:
18+
name: ${{ matrix.os }}-build
19+
env:
20+
PACAGE_DIR: ${{ matrix.os }}-package
21+
PACAGE_FILE: ${{ matrix.os }}-package.zip
22+
runs-on: ${{ matrix.os }}
23+
strategy:
24+
matrix:
25+
os: [ubuntu-latest, macos-latest, windows-latest]
26+
27+
steps:
28+
- uses: actions/checkout@v3
29+
# linux and macos
30+
- name: linux-macos-build-pack
31+
if: matrix.os != 'windows-latest'
32+
run: |
33+
./script/build.sh
34+
./script/package.sh $PACAGE_DIR
35+
zip -r $PACAGE_FILE $PACAGE_DIR
36+
# windows
37+
- name: windows-build-pack
38+
if: matrix.os == 'windows-latest'
39+
run: |
40+
.\script\build.ps1
41+
.\script\package.ps1 windows-package
42+
7z a -r windows-package.zip windows-package
43+
# upload
44+
- name: upload-zip
45+
uses: actions/upload-artifact@v3
46+
with:
47+
path: ./*.zip
48+
49+
model-test:
50+
needs: llm-build
51+
name: ${{ matrix.model }}-${{ matrix.os }}-test
52+
runs-on: ${{ matrix.os }}
53+
env:
54+
PACAGE_DIR: ${{ matrix.os }}-package
55+
PACAGE_FILE: ${{ matrix.os }}-package.zip
56+
strategy:
57+
matrix:
58+
os: [ubuntu-latest, macos-latest, windows-latest]
59+
model: [chatglm-6b, chatglm2-6b, codegeex2-6b, qwen-7b-chat, baichuan2-7b-chat, llama2-7b-chat]
60+
61+
steps:
62+
- uses: actions/download-artifact@v3
63+
with:
64+
name: artifact
65+
path: workspace
66+
- name: linux-macos-test
67+
if: matrix.os != 'windows-latest'
68+
run: |
69+
cd workspace
70+
unzip $PACAGE_FILE
71+
cd $PACAGE_DIR
72+
./script/model_test.sh ${{ matrix.model }}
73+
- name: windows-test
74+
if: matrix.os == 'windows-latest'
75+
run: |
76+
cd workspace
77+
7z x windows-package.zip
78+
cd windows-package
79+
./script/model_test.ps1 ${{ matrix.model }}

.github/workflows/linux.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ jobs:
3838
runs-on: ubuntu-latest
3939
strategy:
4040
matrix:
41-
model: [chatglm-6b, chatglm2-6b, chatglm3-6b, codegeex2-6b, qwen-7b-chat, baichuan2-7b-chat, llama2-7b-chat]
41+
model: [qwen-1.8b, chatglm-6b, chatglm2-6b, chatglm3-6b, codegeex2-6b, qwen-7b-chat, baichuan2-7b-chat, llama2-7b-chat]
4242

4343
steps:
4444
- uses: actions/download-artifact@v3

.github/workflows/macos.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ jobs:
3838
runs-on: macos-latest
3939
strategy:
4040
matrix:
41-
model: [chatglm-6b, chatglm2-6b, chatglm3-6b, codegeex2-6b, qwen-7b-chat, baichuan2-7b-chat, llama2-7b-chat]
41+
model: [qwen-1.8b, chatglm-6b, chatglm2-6b, chatglm3-6b, codegeex2-6b, qwen-7b-chat, baichuan2-7b-chat, llama2-7b-chat]
4242

4343
steps:
4444
- uses: actions/download-artifact@v3

.github/workflows/windows.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ jobs:
3838
runs-on: windows-latest
3939
strategy:
4040
matrix:
41-
model: [chatglm-6b, chatglm2-6b, chatglm3-6b, codegeex2-6b, qwen-7b-chat, baichuan2-7b-chat, llama2-7b-chat]
41+
model: [qwen-1.8b, chatglm-6b, chatglm2-6b, chatglm3-6b, codegeex2-6b, qwen-7b-chat, baichuan2-7b-chat, llama2-7b-chat]
4242

4343
steps:
4444
- uses: actions/download-artifact@v3
@@ -54,4 +54,4 @@ jobs:
5454
./script/download_model.ps1 ${{ matrix.model }}
5555
cd build
5656
.\Release\cli_demo ..\${{ matrix.model }} prompt.txt
57-
Exit 0
57+
Exit 0

README.md

Lines changed: 22 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,10 @@ llm模型导出onnx模型请使用[llm-export](https://github.com/wangzhaode/llm
2121
| Qwen-7B-Chat | [![Download][download-qwen-7b-chat-onnx]][release-qwen-7b-chat-onnx] | [![Download][download-qwen-7b-chat-mnn]][release-qwen-7b-chat-mnn] |
2222
| Baichuan2-7B-Chat | [![Download][download-baichuan2-7b-chat-onnx]][release-baichuan2-7b-chat-onnx] | [![Download][download-baichuan2-7b-chat-mnn]][release-baichuan2-7b-chat-mnn] |
2323
| Llama-2-7b-chat | [![Download][download-llama2-7b-chat-onnx]][release-llama2-7b-chat-onnx] | [![Download][download-llama2-7b-chat-mnn]][release-llama2-7b-chat-mnn] |
24+
| Qwen-1_8B-Chat | [![Download][download-qwen-1.8b-onnx]][release-qwen-1.8b-onnx] | [![Download][download-qwen-1.8b-mnn]][release-qwen-1.8b-mnn] |
25+
26+
其他版本:
27+
- Qwen-1_8B-Chat-int8:[![Download][download-qwen-1.8b-mnn-int8]][release-qwen-1.8b-mnn-int8]
2428

2529
[download-chatglm-6b-onnx]: https://img.shields.io/github/downloads/wangzhaode/llm-export/chatglm-6b-onnx/total
2630
[download-chatglm2-6b-onnx]: https://img.shields.io/github/downloads/wangzhaode/llm-export/chatglm2-6b-onnx/total
@@ -29,30 +33,38 @@ llm模型导出onnx模型请使用[llm-export](https://github.com/wangzhaode/llm
2933
[download-qwen-7b-chat-onnx]: https://img.shields.io/github/downloads/wangzhaode/llm-export/qwen-7b-chat-onnx/total
3034
[download-baichuan2-7b-chat-onnx]: https://img.shields.io/github/downloads/wangzhaode/llm-export/baichuan2-7b-chat-onnx/total
3135
[download-llama2-7b-chat-onnx]: https://img.shields.io/github/downloads/wangzhaode/llm-export/llama2-7b-chat-onnx/total
36+
[download-qwen-1.8b-onnx]: https://img.shields.io/github/downloads/wangzhaode/llm-export/qwen-1.8b-onnx/total
3237
[release-chatglm-6b-onnx]: https://github.com/wangzhaode/llm-export/releases/tag/chatglm-6b-onnx
3338
[release-chatglm2-6b-onnx]: https://github.com/wangzhaode/llm-export/releases/tag/chatglm2-6b-onnx
3439
[release-chatglm3-6b-onnx]: https://github.com/wangzhaode/llm-export/releases/tag/chatglm3-6b-onnx
3540
[release-codegeex2-6b-onnx]: https://github.com/wangzhaode/llm-export/releases/tag/codegeex2-6b-onnx
3641
[release-qwen-7b-chat-onnx]: https://github.com/wangzhaode/llm-export/releases/tag/qwen-7b-chat-onnx
3742
[release-baichuan2-7b-chat-onnx]: https://github.com/wangzhaode/llm-export/releases/tag/baichuan2-7b-chat-onnx
3843
[release-llama2-7b-chat-onnx]: https://github.com/wangzhaode/llm-export/releases/tag/llama2-7b-chat-onnx
44+
[release-qwen-1.8b-onnx]: https://github.com/wangzhaode/llm-export/releases/tag/qwen-1.8b-onnx
3945
[download-chatglm-6b-mnn]: https://img.shields.io/github/downloads/wangzhaode/mnn-llm/chatglm-6b-mnn/total
4046
[download-chatglm2-6b-mnn]: https://img.shields.io/github/downloads/wangzhaode/mnn-llm/chatglm2-6b-mnn/total
4147
[download-chatglm3-6b-mnn]: https://img.shields.io/github/downloads/wangzhaode/mnn-llm/chatglm3-6b-mnn/total
4248
[download-codegeex2-6b-mnn]: https://img.shields.io/github/downloads/wangzhaode/mnn-llm/codegeex2-6b-mnn/total
4349
[download-qwen-7b-chat-mnn]: https://img.shields.io/github/downloads/wangzhaode/mnn-llm/qwen-7b-chat-mnn/total
4450
[download-baichuan2-7b-chat-mnn]: https://img.shields.io/github/downloads/wangzhaode/mnn-llm/baichuan2-7b-chat-mnn/total
4551
[download-llama2-7b-chat-mnn]: https://img.shields.io/github/downloads/wangzhaode/mnn-llm/llama2-7b-chat-mnn/total
52+
[download-qwen-1.8b-mnn]: https://img.shields.io/github/downloads/wangzhaode/mnn-llm/qwen-1.8b-mnn/total
53+
[download-qwen-1.8b-mnn-int8]: https://img.shields.io/github/downloads/wangzhaode/mnn-llm/qwen-1.8b-mnn-int8/total
4654
[release-chatglm-6b-mnn]: https://github.com/wangzhaode/mnn-llm/releases/tag/chatglm-6b-mnn
4755
[release-chatglm2-6b-mnn]: https://github.com/wangzhaode/mnn-llm/releases/tag/chatglm2-6b-mnn
4856
[release-chatglm3-6b-mnn]: https://github.com/wangzhaode/mnn-llm/releases/tag/chatglm3-6b-mnn
4957
[release-codegeex2-6b-mnn]: https://github.com/wangzhaode/mnn-llm/releases/tag/codegeex2-6b-mnn
5058
[release-qwen-7b-chat-mnn]: https://github.com/wangzhaode/mnn-llm/releases/tag/qwen-7b-chat-mnn
5159
[release-baichuan2-7b-chat-mnn]: https://github.com/wangzhaode/mnn-llm/releases/tag/baichuan2-7b-chat-mnn
5260
[release-llama2-7b-chat-mnn]: https://github.com/wangzhaode/mnn-llm/releases/tag/llama2-7b-chat-mnn
61+
[release-qwen-1.8b-mnn]: https://github.com/wangzhaode/mnn-llm/releases/tag/qwen-1.8b-mnn
62+
[release-qwen-1.8b-mnn-int8]: https://github.com/wangzhaode/mnn-llm/releases/tag/qwen-1.8b-mnn-int8
5363

5464
### 速度
5565

66+
#### CPU 4线程速度: `prefill / decode` `tok/s`
67+
5668
| model | android(f16/32)| macos (f32) | linux (f32) | windows (f32) |
5769
|:-----------------:|:--------------:|:-------------:|:--------------:|:--------------:|
5870
| qwen-1.8b-int4 | 100.21 / 22.22 | 84.85 / 19.93 | 151.00 / 35.89 | 117.30 / 33.40 |
@@ -64,19 +76,16 @@ llm模型导出onnx模型请使用[llm-export](https://github.com/wangzhaode/llm
6476
| baichuan2-7b-int4 | 13.87 / 6.08 | 17.21 / 6.10 | 30.11 / 10.87 | 26.31 / 9.84 |
6577
| llama-2-7b-int4 | 17.98 / 5.17 | 19.72 / 5.06 | 34.47 / 9.29 | 28.66 / 8.90 |
6678

67-
- android
68-
- 测试设备: XiaoMi12
69-
- 处理器: Snapdragon 8gen1
70-
- 内存大小: 8 GB
71-
- macos
72-
- 测试设备: MacBook Pro 2019
73-
- 处理器: Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz
74-
- 内存大小: 16 GB
75-
- linux(wsl)/windows
76-
- 测试设备: PC
77-
- 处理器: Intel(R) Core(TM) i7-13700K @ 3.40 GHz
78-
- 内存大小: 32 GB
79-
- CPU 4线程速度: prefill / decode `tok/s`
79+
测试的系统和设备信息如下,
80+
81+
| os | device | CPU | Memory |
82+
|:--:|:-------:|:----:|:--------:|
83+
| android | XiaoMi12 | Snapdragon 8gen1 | 8 GB |
84+
| macos | MacBook Pro 2019 | Intel(R) Core(TM) i7-9750H CPU | 16 GB |
85+
| linux | PC | Intel(R) Core(TM) i7-13700K | 32GB |
86+
| windows | PC | Intel(R) Core(TM) i7-13700K | 32GB |
87+
88+
8089

8190

8291
### 下载int4模型

script/download_model.ps1

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@ $block_num = 28
77
if ($model.Contains('7b')) {
88
$block_num = 32
99
}
10+
if ($model.Contains('1.8b')) {
11+
$block_num = 24
12+
}
1013
Invoke-WebRequest -Uri https://github.com/wangzhaode/mnn-llm/releases/download/$model-mnn/tokenizer.txt -OutFile tokenizer.txt
1114
Invoke-WebRequest -Uri https://github.com/wangzhaode/mnn-llm/releases/download/$model-mnn/embedding.mnn -OutFile embedding.mnn
1215
Invoke-WebRequest -Uri https://github.com/wangzhaode/mnn-llm/releases/download/$model-mnn/lm.mnn -OutFile lm.mnn

script/download_model.sh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,14 @@ model=$1
77
mkdir $model
88
cd $model
99
is_7b=`echo $model | grep '7b'`
10+
is_1_8b=`echo $model | grep '1.8b'`
1011
block_num=27
1112
if [ $is_7b ]; then
1213
block_num=31
1314
fi
15+
if [ $is_1_8b ]; then
16+
block_num=24
17+
fi
1418
# download models
1519
wget -c -nv https://github.com/wangzhaode/mnn-llm/releases/download/$model-mnn/tokenizer.txt
1620
wget -c -nv https://github.com/wangzhaode/mnn-llm/releases/download/$model-mnn/embedding.mnn

script/model_test.ps1

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ function model_test($model) {
66
Write-Output "test model : ${model}"
77
powershell .\script\download_model.ps1 ${model}
88
cd build
9-
.\Release\cli_demo -m ..\${model}
9+
.\Release\cli_demo ..\${model} prompt.txt
1010
cd ..
1111
}
1212

script/model_test.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ model_test() {
1111
}
1212

1313
test_all() {
14+
model_test qwen-1.8b
1415
model_test chatglm-6b
1516
model_test chatglm2-6b
1617
model_test chatglm3-6b

src/llm.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ void Llm::load(const std::string& model_dir) {
143143
ScheduleConfig config;
144144
BackendConfig cpuBackendConfig;
145145
config.type = MNN_FORWARD_CPU;
146-
config.type = MNN_FORWARD_OPENCL;
146+
// config.type = MNN_FORWARD_OPENCL;
147147
config.numThread = 4;
148148
cpuBackendConfig.precision = BackendConfig::Precision_Low;
149149
cpuBackendConfig.memory = BackendConfig::Memory_Low;

0 commit comments

Comments
 (0)