Skip to content

Commit b689cbc

Browse files
[ci] add pytorch kvint testcase into function regresstion (#2584)
* update * update * update * update * update * update * update * update * update * update * update * update * update * updata * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * Update config.yaml * update * update kvint testcase for vl model * update * update * update * updaste * update * update * update
1 parent ba3701b commit b689cbc

11 files changed

+560
-175
lines changed

autotest/config.yaml

Lines changed: 21 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -57,10 +57,13 @@ turbomind_chat_model:
5757
- deepseek-ai/deepseek-coder-1.3b-instruct
5858
- codellama/CodeLlama-7b-Instruct-hf
5959
- THUDM/glm-4-9b-chat
60+
- openbmb/MiniCPM-Llama3-V-2_5
61+
- openbmb/MiniCPM-V-2_6
6062

6163
pytorch_chat_model:
6264
- meta-llama/Meta-Llama-3-8B-Instruct
6365
- meta-llama/Meta-Llama-3-1-8B-Instruct
66+
- meta-llama/Llama-3.2-1B-Instruct
6467
- meta-llama/Llama-2-7b-chat-hf
6568
- internlm/internlm2_5-7b-chat
6669
- internlm/internlm2_5-20b-chat
@@ -71,6 +74,7 @@ pytorch_chat_model:
7174
- OpenGVLab/InternVL2-8B
7275
- OpenGVLab/InternVL2-26B
7376
- OpenGVLab/InternVL2-40B
77+
- OpenGVLab/InternVL-Chat-V1-5
7478
- baichuan-inc/Baichuan2-7B-Chat
7579
- baichuan-inc/Baichuan2-13B-Chat
7680
- 01-ai/Yi-6B-Chat
@@ -94,9 +98,9 @@ pytorch_chat_model:
9498
- THUDM/cogvlm2-llama3-chinese-chat-19B
9599
- THUDM/glm-4v-9b
96100
- THUDM/glm-4-9b-chat
101+
- THUDM/cogvlm-chat-hf
97102
- microsoft/Phi-3-mini-4k-instruct
98103
- microsoft/Phi-3-vision-128k-instruct
99-
- bigcode/starcoder2-7b
100104

101105
turbomind_base_model:
102106
- internlm/internlm2_5-7b
@@ -109,6 +113,7 @@ pytorch_base_model:
109113
- internlm/internlm2_5-7b
110114
- internlm/internlm2_5-1_8b
111115
- internlm/internlm2_5-20b
116+
- bigcode/starcoder2-7b
112117

113118
vl_model:
114119
- Qwen/Qwen-VL-Chat
@@ -125,81 +130,27 @@ vl_model:
125130
- OpenGVLab/InternVL2-40B
126131
- Qwen/Qwen2-VL-2B-Instruct
127132
- Qwen/Qwen2-VL-7B-Instruct
128-
- internlm/internlm-xcomposer2-vl-7b
129133
- internlm/internlm-xcomposer2d5-7b
130134
- internlm/internlm-xcomposer2-4khd-7b
131135
- THUDM/cogvlm-chat-hf
132136
- THUDM/cogvlm2-llama3-chinese-chat-19B
133137
- THUDM/glm-4v-9b
138+
- microsoft/Phi-3-mini-4k-instruct
134139
- microsoft/Phi-3-vision-128k-instruct
135140
- openbmb/MiniCPM-Llama3-V-2_5
136141
- openbmb/MiniCPM-V-2_6
137142

138143
turbomind_quatization:
139-
awq:
140-
- meta-llama/Meta-Llama-3-1-8B-Instruct
141-
- meta-llama/Meta-Llama-3-8B-Instruct
142-
- meta-llama/Llama-2-7b-chat-hf
143-
- internlm/internlm2_5-7b-chat
144-
- internlm/internlm2_5-7b
145-
- internlm/internlm2_5-20b-chat
146-
- internlm/internlm2-chat-20b
147-
- internlm/internlm2_5-20b
148-
- internlm/internlm-chat-20b
149-
- internlm/internlm-xcomposer2-4khd-7b
150-
- internlm/internlm-xcomposer2d5-7b
151-
- OpenGVLab/InternVL-Chat-V1-5
152-
- OpenGVLab/Mini-InternVL-Chat-2B-V1-5
153-
- OpenGVLab/InternVL2-2B
154-
- OpenGVLab/InternVL2-8B
155-
- OpenGVLab/InternVL2-26B
156-
- OpenGVLab/InternVL2-40B
157-
- Qwen/Qwen1.5-7B-Chat
158-
- Qwen/Qwen2-7B-Instruct
159-
- Qwen/Qwen2-1.5B-Instruct
160-
- Qwen/Qwen2.5-7B-Instruct
161-
- Qwen/Qwen-VL-Chat
162-
- liuhaotian/llava-v1.5-13b
163-
- liuhaotian/llava-v1.6-vicuna-7b
164-
- 01-ai/Yi-VL-6B
165-
- 01-ai/Yi-6B-Chat
166-
- deepseek-ai/deepseek-vl-1.3b-chat
167-
- baichuan-inc/Baichuan2-7B-Chat
168-
- codellama/CodeLlama-7b-hf
169-
- openbmb/MiniCPM-Llama3-V-2_5
170-
- THUDM/glm-4-9b-chat
171-
gptq:
172-
- internlm/internlm2_5-7b-chat
173-
kvint:
174-
- meta-llama/Meta-Llama-3-1-8B-Instruct
175-
- meta-llama/Meta-Llama-3-8B-Instruct
176-
- meta-llama/Llama-2-7b-chat-hf
177-
- internlm/internlm2_5-7b-chat
178-
- internlm/internlm2_5-20b-chat
179-
- internlm/internlm2-chat-20b
180-
- internlm/internlm2-chat-20b-4bits
181-
- internlm/internlm-chat-20b
182-
- internlm/internlm-xcomposer2-4khd-7b
183-
- internlm/internlm-xcomposer2d5-7b
184-
- OpenGVLab/InternVL-Chat-V1-5
185-
- Qwen/Qwen2-7B-Instruct
186-
- Qwen/Qwen2-7B-Instruct-AWQ
187-
- Qwen/Qwen2-1.5B-Instruct
188-
- Qwen/Qwen1.5-7B-Chat
189-
- Qwen/Qwen1.5-4B-Chat-AWQ
190-
- Qwen/Qwen-VL-Chat
144+
no_awq:
145+
- Qwen/Qwen2-VL-2B-Instruct
146+
- Qwen/Qwen2-VL-7B-Instruct
147+
- mistralai/Mistral-7B-Instruct-v0.1
191148
- mistralai/Mistral-7B-Instruct-v0.2
192149
- mistralai/Mistral-7B-Instruct-v0.3
193-
- lmdeploy/llama2-chat-7b-w4
194-
- baichuan-inc/Baichuan2-7B-Chat
195-
- 01-ai/Yi-6B-Chat
196-
- 01-ai/Yi-VL-6B
197-
- liuhaotian/llava-v1.5-13b
198-
- liuhaotian/llava-v1.6-vicuna-7b
199-
- deepseek-ai/deepseek-vl-1.3b-chat
200150
- deepseek-ai/deepseek-coder-1.3b-instruct
201151
- codellama/CodeLlama-7b-Instruct-hf
202-
- THUDM/glm-4-9b-chat
152+
gptq:
153+
- internlm/internlm2_5-7b-chat
203154

204155
pytorch_quatization:
205156
awq:
@@ -211,6 +162,7 @@ pytorch_quatization:
211162
- internlm/internlm2-chat-20b
212163
- OpenGVLab/InternVL-Chat-V1-5
213164
- 01-ai/Yi-6B-Chat
165+
- Qwen/Qwen1.5-7B-Chat
214166
- Qwen/Qwen2-7B-Instruct
215167
- Qwen/Qwen2-1.5B-Instruct
216168
- microsoft/Phi-3-mini-4k-instruct
@@ -223,6 +175,13 @@ pytorch_quatization:
223175
- 01-ai/Yi-6B-Chat
224176
- internlm/internlm2_5-20b
225177
- internlm/internlm2_5-7b
178+
no_kvint4:
179+
- OpenGVLab/InternVL2-4B
180+
- deepseek-ai/DeepSeek-V2-Lite-Chat
181+
- microsoft/Phi-3-mini-4k-instruct
182+
- microsoft/Phi-3-vision-128k-instruct
183+
no_kvint8:
184+
- deepseek-ai/DeepSeek-V2-Lite-Chat
226185

227186

228187
longtext_model:

autotest/tools/chat/test_command_chat_hf_pytorch.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,54 @@ def test_hf_pytorch_chat_tp2(config, model, cli_case_config, worker_id):
5151
assert result, msg
5252

5353

54+
@pytest.mark.order(10)
55+
@pytest.mark.usefixtures('cli_case_config')
56+
@pytest.mark.hf_turbomind_chat
57+
@pytest.mark.gpu_num_1
58+
@pytest.mark.parametrize('model',
59+
get_torch_model_list(tp_num=1,
60+
model_type='base_model'))
61+
def test_hf_pytorch_base_tp1(config, model, cli_case_config, worker_id):
62+
usercase = 'base_testcase'
63+
result, chat_log, msg = hf_command_line_test(
64+
config,
65+
usercase,
66+
cli_case_config.get(usercase),
67+
model,
68+
'pytorch',
69+
cuda_prefix=get_cuda_prefix_by_workerid(worker_id))
70+
71+
if chat_log is not None:
72+
allure.attach.file(chat_log,
73+
attachment_type=allure.attachment_type.TEXT)
74+
75+
assert result, msg
76+
77+
78+
@pytest.mark.order(10)
79+
@pytest.mark.usefixtures('cli_case_config')
80+
@pytest.mark.hf_turbomind_chat
81+
@pytest.mark.gpu_num_2
82+
@pytest.mark.parametrize('model',
83+
get_torch_model_list(tp_num=2,
84+
model_type='base_model'))
85+
def test_hf_pytorch_base_tp2(config, model, cli_case_config, worker_id):
86+
usercase = 'base_testcase'
87+
result, chat_log, msg = hf_command_line_test(
88+
config,
89+
usercase,
90+
cli_case_config.get(usercase),
91+
model,
92+
'pytorch',
93+
cuda_prefix=get_cuda_prefix_by_workerid(worker_id, tp_num=2))
94+
95+
if chat_log is not None:
96+
allure.attach.file(chat_log,
97+
attachment_type=allure.attachment_type.TEXT)
98+
99+
assert result, msg
100+
101+
54102
@pytest.mark.order(10)
55103
@pytest.mark.usefixtures('cli_case_config')
56104
@pytest.mark.hf_pytorch_chat

autotest/tools/pipeline/test_pipeline_chat_pytorch.py

Lines changed: 104 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,19 +7,13 @@
77
run_pipeline_chat_test)
88

99

10-
def getModelList(tp_num):
11-
return [
12-
item for item in get_torch_model_list(tp_num)
13-
if 'falcon' not in item.lower() and 'chatglm2' not in item.lower()
14-
]
15-
16-
1710
@pytest.mark.order(6)
1811
@pytest.mark.usefixtures('common_case_config')
1912
@pytest.mark.pipeline_chat_pytorch
2013
@pytest.mark.gpu_num_1
2114
@pytest.mark.flaky(reruns=0)
22-
@pytest.mark.parametrize('model', getModelList(tp_num=1))
15+
@pytest.mark.parametrize('model',
16+
get_torch_model_list(tp_num=1, exclude_dup=True))
2317
def test_pipeline_chat_pytorch_tp1(config, common_case_config, model,
2418
worker_id):
2519
if 'gw' in worker_id:
@@ -39,7 +33,8 @@ def test_pipeline_chat_pytorch_tp1(config, common_case_config, model,
3933
@pytest.mark.pipeline_chat_pytorch
4034
@pytest.mark.gpu_num_2
4135
@pytest.mark.flaky(reruns=0)
42-
@pytest.mark.parametrize('model', getModelList(tp_num=2))
36+
@pytest.mark.parametrize('model',
37+
get_torch_model_list(tp_num=2, exclude_dup=True))
4338
def test_pipeline_chat_pytorch_tp2(config, common_case_config, model,
4439
worker_id):
4540
if 'gw' in worker_id:
@@ -57,6 +52,106 @@ def test_pipeline_chat_pytorch_tp2(config, common_case_config, model,
5752
worker_id)
5853

5954

55+
@pytest.mark.order(6)
56+
@pytest.mark.usefixtures('common_case_config')
57+
@pytest.mark.pipeline_chat
58+
@pytest.mark.gpu_num_1
59+
@pytest.mark.flaky(reruns=0)
60+
@pytest.mark.parametrize('model',
61+
get_torch_model_list(tp_num=1,
62+
quant_policy=4,
63+
exclude_dup=True))
64+
def test_pipeline_chat_kvint4_tp1(config, common_case_config, model,
65+
worker_id):
66+
if 'Qwen2' in model:
67+
return # kvint4 for qwen2 is not support
68+
if 'gw' in worker_id:
69+
os.environ['CUDA_VISIBLE_DEVICES'] = get_cuda_id_by_workerid(worker_id)
70+
p = Process(target=run_pipeline_chat_test,
71+
args=(config, common_case_config, model, 'pytorch-kvint',
72+
worker_id, {
73+
'quant_policy': 4
74+
}))
75+
p.start()
76+
p.join()
77+
assert_pipeline_chat_log(config, common_case_config, model,
78+
'pytorch-kvint', worker_id)
79+
80+
81+
@pytest.mark.order(6)
82+
@pytest.mark.usefixtures('common_case_config')
83+
@pytest.mark.pipeline_chat
84+
@pytest.mark.gpu_num_2
85+
@pytest.mark.flaky(reruns=0)
86+
@pytest.mark.parametrize('model',
87+
get_torch_model_list(tp_num=2,
88+
quant_policy=4,
89+
exclude_dup=True))
90+
def test_pipeline_chat_kvint4_tp2(config, common_case_config, model,
91+
worker_id):
92+
if 'gw' in worker_id:
93+
os.environ['CUDA_VISIBLE_DEVICES'] = get_cuda_id_by_workerid(worker_id,
94+
tp_num=2)
95+
p = Process(target=run_pipeline_chat_test,
96+
args=(config, common_case_config, model, 'pytorch-kvint',
97+
worker_id, {
98+
'quant_policy': 4
99+
}))
100+
p.start()
101+
p.join()
102+
assert_pipeline_chat_log(config, common_case_config, model,
103+
'pytorch-kvint', worker_id)
104+
105+
106+
@pytest.mark.order(6)
107+
@pytest.mark.usefixtures('common_case_config')
108+
@pytest.mark.pipeline_chat
109+
@pytest.mark.gpu_num_1
110+
@pytest.mark.flaky(reruns=0)
111+
@pytest.mark.parametrize('model',
112+
get_torch_model_list(tp_num=1,
113+
quant_policy=8,
114+
exclude_dup=True))
115+
def test_pipeline_chat_kvint8_tp1(config, common_case_config, model,
116+
worker_id):
117+
if 'gw' in worker_id:
118+
os.environ['CUDA_VISIBLE_DEVICES'] = get_cuda_id_by_workerid(worker_id)
119+
p = Process(target=run_pipeline_chat_test,
120+
args=(config, common_case_config, model, 'pytorch-kvint',
121+
worker_id, {
122+
'quant_policy': 8
123+
}))
124+
p.start()
125+
p.join()
126+
assert_pipeline_chat_log(config, common_case_config, model,
127+
'pytorch-kvint', worker_id)
128+
129+
130+
@pytest.mark.order(6)
131+
@pytest.mark.usefixtures('common_case_config')
132+
@pytest.mark.pipeline_chat
133+
@pytest.mark.gpu_num_2
134+
@pytest.mark.flaky(reruns=0)
135+
@pytest.mark.parametrize('model',
136+
get_torch_model_list(tp_num=2,
137+
quant_policy=8,
138+
exclude_dup=True))
139+
def test_pipeline_chat_kvint8_tp2(config, common_case_config, model,
140+
worker_id):
141+
if 'gw' in worker_id:
142+
os.environ['CUDA_VISIBLE_DEVICES'] = get_cuda_id_by_workerid(worker_id,
143+
tp_num=2)
144+
p = Process(target=run_pipeline_chat_test,
145+
args=(config, common_case_config, model, 'pytorch-kvint',
146+
worker_id, {
147+
'quant_policy': 8
148+
}))
149+
p.start()
150+
p.join()
151+
assert_pipeline_chat_log(config, common_case_config, model,
152+
'pytorch-kvint', worker_id)
153+
154+
60155
@pytest.mark.order(6)
61156
@pytest.mark.usefixtures('common_case_config')
62157
@pytest.mark.pipeline_chat_pytorch

0 commit comments

Comments
 (0)