Skip to content

Commit 71022de

Browse files
committed
CI: update workflow docker
1 parent aa8a0bd commit 71022de

File tree

2 files changed

+10
-9
lines changed

2 files changed

+10
-9
lines changed

autotest/evaluate/eval_config_chat.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
path=MODEL_PATH,
2727
meta_template=api_meta_template,
2828
max_out_len=2048,
29+
batch_size=500,
2930
temperature=0.1,
3031
)
3132
]

autotest/utils/evaluate_utils.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
DEFAULT_PORT = 23333
99

1010

11-
def write_to_summary(model_name, tp_num, result, msg, worker_id, work_dir=None):
11+
def write_to_summary(model_name, tp_num, result, msg, worker_id, backend_type, work_dir=None):
1212
status = '✅ PASS' if result else '❌ FAIL'
1313

1414
metrics = {}
@@ -39,7 +39,7 @@ def write_to_summary(model_name, tp_num, result, msg, worker_id, work_dir=None):
3939
mmlu_value = metrics.get('mmlu', '')
4040
gsm8k_value = metrics.get('gsm8k', '')
4141

42-
summary_line = f'| {model_name} | TP{tp_num} | {status} | {mmlu_value} | {gsm8k_value} |\n'
42+
summary_line = f'| {model_name} | {backend_type} | TP{tp_num} | {status} | {mmlu_value} | {gsm8k_value} |\n'
4343

4444
summary_file = os.environ.get('GITHUB_STEP_SUMMARY', None)
4545
if summary_file:
@@ -49,17 +49,17 @@ def write_to_summary(model_name, tp_num, result, msg, worker_id, work_dir=None):
4949
else:
5050
with open(summary_file, 'r') as f:
5151
first_lines = f.read(200)
52-
if '| Model | TP | Status | mmlu | gsm8k |' not in first_lines:
52+
if '| Model | Backend | TP | Status | mmlu | gsm8k |' not in first_lines:
5353
write_header = True
5454

5555
with open(summary_file, 'a') as f:
5656
if write_header:
5757
f.write('## Model Evaluation Results\n')
58-
f.write('| Model | TP | Status | mmlu | gsm8k |\n')
59-
f.write('|-------|----|--------|------|-------|\n')
58+
f.write('| Model | Backend | TP | Status | mmlu | gsm8k |\n')
59+
f.write('|-------|---------|----|--------|------|-------|\n')
6060
f.write(summary_line)
6161
else:
62-
print(f'Summary: {model_name} | TP{tp_num} | {status} | {mmlu_value} | {gsm8k_value}')
62+
print(f'Summary: {model_name} | {backend_type} | TP{tp_num} | {status} | {mmlu_value} | {gsm8k_value}')
6363

6464

6565
def restful_test(config, run_id, prepare_environment, worker_id='gw0', port=DEFAULT_PORT):
@@ -181,7 +181,7 @@ def restful_test(config, run_id, prepare_environment, worker_id='gw0', port=DEFA
181181
if error_lines:
182182
final_msg += f'\nLog errors: {" | ".join(error_lines[:3])}'
183183

184-
write_to_summary(model_name, tp_num, final_result, final_msg, worker_id, work_dir)
184+
write_to_summary(model_name, tp_num, final_result, final_msg, worker_id, backend_type, work_dir)
185185

186186
return final_result, final_msg
187187

@@ -193,10 +193,10 @@ def restful_test(config, run_id, prepare_environment, worker_id='gw0', port=DEFA
193193
timeout_msg = (f'Evaluation timed out for {model_name} '
194194
f'after 7200 seconds')
195195
if work_dir:
196-
write_to_summary(model_name, tp_num, False, timeout_msg, worker_id, work_dir)
196+
write_to_summary(model_name, tp_num, False, timeout_msg, worker_id, backend_type, work_dir)
197197
return False, timeout_msg
198198
except Exception as e:
199199
error_msg = f'Error during evaluation for {model_name}: {str(e)}'
200200
if work_dir:
201-
write_to_summary(model_name, tp_num, False, error_msg, worker_id, work_dir)
201+
write_to_summary(model_name, tp_num, False, error_msg, worker_id, backend_type, work_dir)
202202
return False, error_msg

0 commit comments

Comments
 (0)