-
Notifications
You must be signed in to change notification settings - Fork 609
[CI] add API evaluation test #3987
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
759dca3
a955b7d
aa8a0bd
71022de
88a6836
1d20999
c68f4d2
96b8c55
30b2f3c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,137 @@ | ||
name: api_eva | ||
|
||
on: | ||
workflow_dispatch: | ||
inputs: | ||
repo_org: | ||
required: false | ||
description: 'Tested repository organization name. Default is InternLM/lmdeploy' | ||
type: string | ||
default: 'InternLM/lmdeploy' | ||
repo_ref: | ||
required: false | ||
description: 'Set branch or tag or commit id. Default is "main"' | ||
type: string | ||
default: 'main' | ||
backend: | ||
required: true | ||
description: 'Set backend testcase filter: turbomind or pytorch or turbomind, pytorch. Default is "["turbomind", "pytorch"]"' | ||
type: string | ||
default: "['turbomind', 'pytorch']" | ||
|
||
|
||
env: | ||
HOST_PIP_CACHE_DIR: /nvme/github-actions/pip-cache | ||
HOST_LOCALTIME: /usr/share/zoneinfo/Asia/Shanghai | ||
OUTPUT_FOLDER: cuda11.8_dist_${{ github.run_id }} | ||
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true | ||
REPORT_DIR: /nvme/qa_test_models/test-reports/${{ github.run_id }} | ||
COV_PARAM: --cov /opt/py3/lib/python3.10/site-packages/lmdeploy | ||
FAIL_CONFIG: '--lf' | ||
TEST_CODE_PATH: /nvme/qa_test_models/test_pkg/lmdeploy/${{ github.run_id }} | ||
OFFLINE_CODE_PATH: /nvme/qa_test_models/offline_pkg/lmdeploy | ||
OFFLINE_REQUIREMENTS: /nvme/qa_test_models/offline_pkg/requirements.txt | ||
DEEPSEEK_VL: /nvme/qa_test_models/offline_pkg/DeepSeek-VL | ||
|
||
jobs: | ||
linux-build: | ||
if: ${{ !cancelled() }} | ||
strategy: | ||
matrix: | ||
pyver: [py310] | ||
runs-on: ubuntu-latest | ||
env: | ||
PYTHON_VERSION: ${{ matrix.pyver }} | ||
PLAT_NAME: manylinux2014_x86_64 | ||
DOCKER_TAG: cuda12.4 | ||
OUTPUT_FOLDER: cuda12.4_dist_${{ github.run_id }} | ||
steps: | ||
- name: Checkout repository | ||
uses: actions/checkout@v3 | ||
with: | ||
repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }} | ||
ref: ${{github.event.inputs.repo_ref || 'main'}} | ||
- name: Build | ||
run: | | ||
echo ${PYTHON_VERSION} | ||
echo ${PLAT_NAME} | ||
echo ${DOCKER_TAG} | ||
echo ${OUTPUT_FOLDER} | ||
echo ${GITHUB_RUN_ID} | ||
# remove -it | ||
sed -i 's/docker run --rm -it/docker run --rm/g' builder/manywheel/build_wheel.sh | ||
bash builder/manywheel/build_wheel.sh ${PYTHON_VERSION} ${PLAT_NAME} ${DOCKER_TAG} ${OUTPUT_FOLDER} | ||
- name: Upload Artifacts | ||
uses: actions/upload-artifact@v4 | ||
with: | ||
if-no-files-found: error | ||
path: builder/manywheel/${{ env.OUTPUT_FOLDER }} | ||
retention-days: 1 | ||
name: my-artifact-${{ github.run_id }}-${{ matrix.pyver }} | ||
|
||
test_evaluation: | ||
needs: linux-build | ||
if: ${{ !cancelled() }} | ||
runs-on: [self-hosted, test-140] | ||
timeout-minutes: 2400 | ||
strategy: | ||
fail-fast: false | ||
matrix: | ||
backend: ${{ fromJSON(inputs.backend || '["turbomind", "pytorch"]')}} | ||
container: | ||
image: openmmlab/lmdeploy:latest-cu12.8 | ||
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never" | ||
volumes: | ||
- /nvme/github-actions/pip-cache:/root/.cache/pip | ||
- /nvme/github-actions/packages:/root/packages | ||
- /nvme/github-actions/resources:/root/resources | ||
- /nvme/github-actions/opencompass-data:/root/opencompass-data | ||
- /nvme/qa_test_models/evaluation-reports:/root/evaluation-reports | ||
- /nvme/qa_test_models:/nvme/qa_test_models | ||
- /mnt/shared:/mnt/shared | ||
- /mnt/bigdisk:/mnt/bigdisk | ||
- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro | ||
- /mnt/187:/mnt/187 | ||
steps: | ||
- name: Create and change to _wk directory | ||
run: | | ||
echo "Working directory set to: $(pwd)" | ||
- name: Clone repository | ||
uses: actions/checkout@v2 | ||
with: | ||
repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }} | ||
ref: ${{github.event.inputs.repo_ref || 'main'}} | ||
- name: Download Artifacts | ||
uses: actions/download-artifact@v4 | ||
with: | ||
name: my-artifact-${{ github.run_id }}-py310 | ||
- name: Install lmdeploy - dependency | ||
run: | | ||
python3 -m pip install -r requirements_cuda.txt | ||
- name: Install lmdeploy | ||
run: | | ||
python3 -m pip install lmdeploy-*.whl --no-deps | ||
python3 -m pip install -r requirements/test.txt | ||
- name: Install opencompass | ||
run: | | ||
python3 -m pip install opencompass | ||
- name: Check env | ||
run: | | ||
python3 -m pip list | ||
lmdeploy check_env | ||
rm -rf allure-results | ||
mkdir -p ${{ env.REPORT_DIR }}/.pytest_cache | ||
ln -s ${{ env.REPORT_DIR }}/.pytest_cache autotest | ||
- name: Setup paths for evaluation | ||
if: (matrix.backend == 'pytorch' || matrix.backend == 'turbomind') | ||
run: | | ||
overall_exit=0 | ||
ln -s /mnt/187/opencompass-data/data ./data | ||
pytest autotest/evaluate/test_api_evaluate_${{matrix.backend}}.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=${{env.REPORT_DIR}} || overall_exit=$? | ||
pytest autotest/evaluate/test_api_evaluate_${{matrix.backend}}.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=${{env.REPORT_DIR}} || overall_exit=$? | ||
exit $overall_exit | ||
- name: Clear workspace | ||
if: always() | ||
run: | | ||
export workdir=$(pwd) | ||
rm -rf $workdir/* |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
from mmengine.config import read_base | ||
from opencompass.models import OpenAISDK | ||
|
||
with read_base(): | ||
from opencompass.configs.datasets.gsm8k.gsm8k_gen_1d7fe4 import gsm8k_datasets # noqa: F401, E501 | ||
from opencompass.configs.datasets.mmlu.mmlu_gen_4d595a import mmlu_datasets # noqa: F401, E501 | ||
lvhan028 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
from opencompass.configs.summarizers.groups.mmlu import mmlu_summary_groups # noqa: F401, E501 | ||
|
||
datasets = sum([v for k, v in locals().items() if k.endswith('_datasets')], []) | ||
|
||
MODEL_NAME = '' | ||
MODEL_PATH = '' | ||
API_BASE = '' | ||
|
||
api_meta_template = dict(round=[ | ||
dict(role='HUMAN', api_role='HUMAN'), | ||
dict(role='BOT', api_role='BOT', generate=True), | ||
]) | ||
|
||
models = [ | ||
dict( | ||
type=OpenAISDK, | ||
abbr=f'{MODEL_NAME}-lmdeploy-api', | ||
openai_api_base=API_BASE, | ||
key='EMPTY', | ||
path=MODEL_PATH, | ||
meta_template=api_meta_template, | ||
max_out_len=2048, | ||
batch_size=500, | ||
temperature=0.1, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should we add There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also, should we config the following options?
I am not so familiar with OC rules. Is there a guide about it? |
||
) | ||
] | ||
|
||
summarizer = dict( | ||
dataset_abbrs=[ | ||
['mmlu', 'naive_average'], | ||
['gsm8k', 'accuracy'], | ||
], | ||
summary_groups=sum([v for k, v in locals().items() if k.endswith('_summary_groups')], []), | ||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
import pytest | ||
from utils.config_utils import get_evaluate_pytorch_model_list, get_workerid | ||
from utils.evaluate_utils import restful_test | ||
from utils.run_restful_chat import start_restful_api, stop_restful_api | ||
|
||
DEFAULT_PORT = 23333 | ||
|
||
|
||
@pytest.fixture(scope='function', autouse=True) | ||
def prepare_environment(request, config, worker_id): | ||
param = request.param | ||
model = param['model'] | ||
backend = param['backend'] | ||
model_path = config.get('model_path') + '/' + model | ||
pid, startRes = start_restful_api(config, param, model, model_path, backend, worker_id) | ||
yield param | ||
stop_restful_api(pid, startRes, param) | ||
|
||
|
||
def getModelList(tp_num): | ||
model_list = get_evaluate_pytorch_model_list(tp_num, kvint_list=[4, 8]) | ||
new_model_list = [] | ||
for model in model_list: | ||
model['cuda_prefix'] = None | ||
new_model_list.append(model) | ||
return new_model_list | ||
|
||
|
||
@pytest.mark.gpu_num_1 | ||
@pytest.mark.flaky(reruns=0) | ||
@pytest.mark.parametrize('prepare_environment', getModelList(tp_num=1), indirect=True) | ||
def test_restful_tp1(config, run_id, prepare_environment, worker_id): | ||
if get_workerid(worker_id) is None: | ||
result, msg = restful_test(config, run_id, prepare_environment, worker_id=worker_id) | ||
else: | ||
result, msg = restful_test(config, | ||
run_id, | ||
prepare_environment, | ||
worker_id=worker_id, | ||
port=DEFAULT_PORT + get_workerid(worker_id)) | ||
|
||
assert result, msg | ||
|
||
|
||
@pytest.mark.gpu_num_2 | ||
@pytest.mark.flaky(reruns=0) | ||
@pytest.mark.parametrize('prepare_environment', getModelList(tp_num=2), indirect=True) | ||
def test_restful_tp2(config, run_id, prepare_environment, worker_id): | ||
if get_workerid(worker_id) is None: | ||
result, msg = restful_test(config, run_id, prepare_environment, worker_id=worker_id) | ||
else: | ||
result, msg = restful_test(config, | ||
run_id, | ||
prepare_environment, | ||
worker_id=worker_id, | ||
port=DEFAULT_PORT + get_workerid(worker_id)) | ||
|
||
assert result, msg | ||
|
||
|
||
@pytest.mark.gpu_num_4 | ||
@pytest.mark.flaky(reruns=0) | ||
@pytest.mark.parametrize('prepare_environment', getModelList(tp_num=4), indirect=True) | ||
def test_restful_tp4(config, run_id, prepare_environment, worker_id): | ||
if get_workerid(worker_id) is None: | ||
result, msg = restful_test(config, run_id, prepare_environment, worker_id=worker_id) | ||
else: | ||
result, msg = restful_test(config, | ||
run_id, | ||
prepare_environment, | ||
worker_id=worker_id, | ||
port=DEFAULT_PORT + get_workerid(worker_id)) | ||
|
||
assert result, msg | ||
|
||
|
||
@pytest.mark.gpu_num_8 | ||
@pytest.mark.flaky(reruns=0) | ||
@pytest.mark.parametrize('prepare_environment', getModelList(tp_num=8), indirect=True) | ||
def test_restful_tp8(config, run_id, prepare_environment, worker_id): | ||
if get_workerid(worker_id) is None: | ||
result, msg = restful_test(config, run_id, prepare_environment, worker_id=worker_id) | ||
else: | ||
result, msg = restful_test(config, | ||
run_id, | ||
prepare_environment, | ||
worker_id=worker_id, | ||
port=DEFAULT_PORT + get_workerid(worker_id)) | ||
|
||
assert result, msg |
Uh oh!
There was an error while loading. Please reload this page.