daily_ete_test #388
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: daily_ete_test | |
on: | |
workflow_dispatch: | |
inputs: | |
repo_org: | |
required: false | |
description: 'Tested repository organization name. Default is InternLM' | |
type: string | |
default: 'InternLM/lmdeploy' | |
repo_ref: | |
required: false | |
description: 'Set branch or tag or commit id. Default is "main"' | |
type: string | |
default: 'main' | |
backend: | |
required: true | |
description: 'Set backend testcase filter: turbomind or pytorch or turbomind, pytorch. Default is "["turbomind", "pytorch"]"' | |
type: string | |
default: "['turbomind', 'pytorch', 'turbomind_vl']" | |
model: | |
required: true | |
description: 'Set testcase module filter: chat, restful, pipeline, quantization. Default contains all models' | |
type: string | |
default: "['pipeline','restful','chat']" | |
offline_mode: | |
required: true | |
description: 'Whether start a offline mode, if true, you should prepare code and whl package by yourself' | |
type: boolean | |
default: false | |
regression_func: | |
required: true | |
description: 'regression functions' | |
type: string | |
default: "['quant', 'tools','restful','pipeline','benchmark','evaluation']" | |
schedule: | |
- cron: '00 16 * * 0-4' | |
env: | |
HOST_PIP_CACHE_DIR: /nvme/github-actions/pip-cache | |
HOST_LOCALTIME: /usr/share/zoneinfo/Asia/Shanghai | |
OUTPUT_FOLDER: cuda11.8_dist_${{ github.run_id }} | |
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true | |
REPORT_DIR: /nvme/qa_test_models/test-reports/${{ github.run_id }} | |
COV_PARAM: --cov /opt/py3/lib/python3.10/site-packages/lmdeploy | |
FAIL_CONFIG: ${{ github.event_name == 'schedule' && github.run_attempt != 1 && '--lf --lfnf none' || '--lf'}} | |
TEST_CODE_PATH: /nvme/qa_test_models/test_pkg/lmdeploy | |
OFFLINE_CODE_PATH: /nvme/qa_test_models/offline_pkg/lmdeploy | |
OFFLINE_REQUIREMENTS: /nvme/qa_test_models/offline_pkg/requirements.txt | |
DEEPSEEK_VL: /nvme/qa_test_models/offline_pkg/DeepSeek-VL | |
jobs: | |
linux-build: | |
if: ${{!cancelled() && (github.event_name == 'schedule' || !inputs.offline_mode)}} | |
strategy: | |
matrix: | |
pyver: [py310] | |
runs-on: ubuntu-latest | |
env: | |
PYTHON_VERSION: ${{ matrix.pyver }} | |
PLAT_NAME: manylinux2014_x86_64 | |
DOCKER_TAG: cuda11.8 | |
steps: | |
- name: Checkout repository | |
uses: actions/checkout@v3 | |
with: | |
repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }} | |
ref: ${{github.event.inputs.repo_ref || 'main'}} | |
- name: Build | |
run: | | |
echo ${PYTHON_VERSION} | |
echo ${PLAT_NAME} | |
echo ${DOCKER_TAG} | |
echo ${OUTPUT_FOLDER} | |
echo ${GITHUB_RUN_ID} | |
# remove -it | |
sed -i 's/docker run --rm -it/docker run --rm/g' builder/manywheel/build_wheel.sh | |
bash builder/manywheel/build_wheel.sh ${PYTHON_VERSION} ${PLAT_NAME} ${DOCKER_TAG} ${OUTPUT_FOLDER} | |
- name: Upload Artifacts | |
uses: actions/upload-artifact@v4 | |
with: | |
if-no-files-found: error | |
path: builder/manywheel/${{ env.OUTPUT_FOLDER }} | |
retention-days: 1 | |
name: my-artifact-${{ github.run_id }}-${{ matrix.pyver }} | |
download_pkgs: | |
needs: linux-build | |
if: ${{!cancelled()}} | |
runs-on: [self-hosted, linux-a100] | |
timeout-minutes: 50 | |
container: | |
image: openmmlab/lmdeploy:latest-cu11 | |
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never" | |
volumes: | |
- /nvme/qa_test_models:/nvme/qa_test_models | |
- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro | |
steps: | |
- name: Clone repository | |
uses: actions/checkout@v2 | |
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}} | |
with: | |
repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }} | |
ref: ${{github.event.inputs.repo_ref || 'main'}} | |
- name: Copy repository | |
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}} | |
run: rm -rf ${{env.TEST_CODE_PATH}} && mkdir ${{env.TEST_CODE_PATH}} && cp -r . ${{env.TEST_CODE_PATH}} | |
- name: Copy repository - offline | |
if: ${{inputs.offline_mode}} | |
run: rm -rf ${{env.TEST_CODE_PATH}} && mkdir ${{env.TEST_CODE_PATH}} && cp -r ${{env.OFFLINE_CODE_PATH}}/. ${{env.TEST_CODE_PATH}} | |
- name: Download Artifacts | |
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}} | |
uses: actions/download-artifact@v4 | |
with: | |
name: my-artifact-${{ github.run_id }}-py310 | |
- name: Copy Artifacts | |
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}} | |
run: rm ${{env.TEST_CODE_PATH}}/lmdeploy-*.whl -f && cp lmdeploy-*.whl ${{env.TEST_CODE_PATH}} | |
- name: Copy Artifacts - offline | |
if: ${{inputs.offline_mode}} | |
run: rm ${{env.TEST_CODE_PATH}}/lmdeploy-*.whl -f && cp ${{env.OFFLINE_CODE_PATH}}/lmdeploy-*.whl ${{env.TEST_CODE_PATH}} | |
test_quantization: | |
needs: download_pkgs | |
if: ${{!cancelled() && (github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.regression_func), 'quant') )}} | |
runs-on: [self-hosted, linux-a100] | |
timeout-minutes: 120 | |
env: | |
PYTHONPATH: /nvme/qa_test_models/offline_pkg/LLaVA | |
MODELSCOPE_CACHE: /root/modelscope_hub | |
MODELSCOPE_MODULES_CACHE: /root/modelscope_modules | |
container: | |
image: openmmlab/lmdeploy:latest-cu11 | |
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never" | |
volumes: | |
- /nvme/github-actions/pip-cache:/root/.cache/pip | |
- /nvme/github-actions/packages:/root/packages | |
- /nvme/github-actions/modelscope_hub:/root/modelscope_hub | |
- /nvme/github-actions/modelscope_modules:/root/modelscope_modules | |
- /nvme/qa_test_models:/nvme/qa_test_models | |
- /mnt/shared:/mnt/shared | |
- /nvme/qa_test_models/lmdeploy/autotest:/local_case | |
- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro | |
steps: | |
- name: Copy repository and Artifacts | |
run: cp -r ${{env.TEST_CODE_PATH}}/. . | |
- name: Install lmdeploy - dependency | |
run: | | |
# manually install flash attn | |
# the install packeage from. https://github.com/Dao-AILab/flash-attention/releases | |
python3 -m pip install /root/packages/flash_attn-2.6.3+cu118torch2.3cxx11abiFALSE-cp310-cp310-linux_x86_64.whl | |
python3 -m pip install -e /root/packages/AutoAWQ_kernels | |
python3 -m pip install /root/packages/autoawq-0.2.6-cp310-cp310-manylinux2014_x86_64.whl --no-deps | |
python3 -m pip install /root/packages/xformers-0.0.27+cu118-cp310-cp310-manylinux2014_x86_64.whl --no-deps | |
python3 -m pip install -r ${{env.OFFLINE_REQUIREMENTS}} | |
- name: Install lmdeploy | |
run: | | |
python3 -m pip install lmdeploy-*.whl --no-deps | |
python3 -m pip install -r requirements/test.txt | |
pip install ${{env.DEEPSEEK_VL}} --no-deps | |
- name: Check env | |
run: | | |
pip install transformers | |
pip uninstall -y nvidia-nccl-cu11 | |
python3 -m pip list | |
lmdeploy check_env | |
rm -rf allure-results | |
# remove tmp log in testcase | |
rm -rf /nvme/qa_test_models/autotest_model/log/* | |
mkdir ${{env.REPORT_DIR}}/.pytest_cache -p | |
ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest | |
- name: Test lmdeploy - quantization w4a16 | |
continue-on-error: true | |
if: github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.backend), 'turbomind') | |
run: | | |
pytest autotest/tools/quantization/test_quantization_awq.py -m 'not pr_test' -n 8 --alluredir=${{env.REPORT_DIR}} --clean-alluredir ${{env.COV_PARAM}} || true | |
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') | |
- name: Test lmdeploy - quantization w8a8 | |
continue-on-error: true | |
if: github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.backend), 'pytorch') | |
run: | | |
pytest autotest/tools/quantization/test_quantization_w8a8.py -n 8 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') | |
- name: Test lmdeploy - convert | |
continue-on-error: true | |
if: github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.backend), 'turbomind') | |
run: | | |
pytest autotest/tools/convert -m 'not pr_test' -n 8 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') | |
- name: Clear workfile | |
if: always() | |
run: | | |
chmod -R 777 $REPORT_DIR | |
export workdir=$(pwd) | |
cd .. | |
rm -rf $workdir | |
mkdir $workdir | |
chmod -R 777 $workdir | |
test_tools: | |
needs: test_quantization | |
if: ${{!cancelled() && (github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.regression_func), 'tools'))}} | |
runs-on: [self-hosted, linux-a100] | |
timeout-minutes: 150 | |
strategy: | |
fail-fast: false | |
matrix: | |
backend: ${{ fromJSON(inputs.backend) || fromJSON('["turbomind", "pytorch", "turbomind_vl"]')}} | |
model: ${{ fromJSON(inputs.model) || fromJSON('["pipeline","restful","chat"]')}} | |
exclude: | |
- backend: turbomind_vl | |
model: chat | |
include: | |
- backend: turbomind | |
model: local_case | |
env: | |
PYTHONPATH: /nvme/qa_test_models/offline_pkg/LLaVA | |
MODELSCOPE_CACHE: /root/modelscope_hub | |
MODELSCOPE_MODULES_CACHE: /root/modelscope_modules | |
container: | |
image: openmmlab/lmdeploy:latest-cu11 | |
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never" | |
volumes: | |
- /nvme/github-actions/pip-cache:/root/.cache/pip | |
- /nvme/github-actions/packages:/root/packages | |
- /nvme/github-actions/modelscope_hub:/root/modelscope_hub | |
- /nvme/github-actions/modelscope_modules:/root/modelscope_modules | |
- /nvme/github-actions/resources/lora:/root/lora | |
- /nvme/qa_test_models:/nvme/qa_test_models | |
- /mnt/shared:/mnt/shared | |
- /nvme/qa_test_models/lmdeploy/autotest:/local_case | |
- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro | |
steps: | |
- name: Copy repository and Artifacts | |
run: cp -r ${{env.TEST_CODE_PATH}}/. . | |
- name: Install lmdeploy - dependency | |
run: | | |
# manually install flash attn | |
# the install packeage from. https://github.com/Dao-AILab/flash-attention/releases | |
python3 -m pip install /root/packages/flash_attn-2.6.3+cu118torch2.3cxx11abiFALSE-cp310-cp310-linux_x86_64.whl | |
python3 -m pip install -e /root/packages/AutoAWQ_kernels | |
python3 -m pip install /root/packages/autoawq-0.2.6-cp310-cp310-manylinux2014_x86_64.whl --no-deps | |
python3 -m pip install /root/packages/xformers-0.0.27+cu118-cp310-cp310-manylinux2014_x86_64.whl --no-deps | |
python3 -m pip install -r ${{env.OFFLINE_REQUIREMENTS}} | |
- name: Install lmdeploy | |
run: | | |
python3 -m pip install lmdeploy-*.whl --no-deps | |
python3 -m pip install -r requirements/test.txt | |
pip install ${{env.DEEPSEEK_VL}} --no-deps | |
- name: Check env | |
run: | | |
pip uninstall -y nvidia-nccl-cu11 | |
python3 -m pip list | |
lmdeploy check_env | |
cp -r /root/lora . | |
rm -rf allure-results | |
# remove tmp log in testcase | |
rm -rf /nvme/qa_test_models/autotest_model/log/* | |
mkdir ${{env.REPORT_DIR}}/.pytest_cache -p | |
ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest | |
- name: Test lmdeploy - chat workspace | |
continue-on-error: true | |
if: matrix.backend == 'turbomind' && matrix.model == 'chat' | |
run: | | |
pytest autotest/tools/chat/test_command_chat_workspace.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true | |
pytest autotest/tools/chat/test_command_chat_workspace.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') | |
- name: Test lmdeploy - chat | |
continue-on-error: true | |
if: (matrix.backend == 'pytorch' || matrix.backend == 'turbomind') && matrix.model == 'chat' | |
run: | | |
pytest autotest/tools/chat/test_command_chat_hf_${{matrix.backend}}.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true | |
pytest autotest/tools/chat/test_command_chat_hf_${{matrix.backend}}.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') | |
- name: Test lmdeploy - pipeline | |
continue-on-error: true | |
if: matrix.model == 'pipeline' | |
run: | | |
pytest autotest/tools/pipeline/test_pipeline_chat_${{matrix.backend}}.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true | |
pytest autotest/tools/pipeline/test_pipeline_chat_${{matrix.backend}}.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') | |
- name: Test lmdeploy - restful | |
continue-on-error: true | |
if: matrix.model == 'restful' | |
run: | | |
pytest autotest/tools/restful/test_restful_chat_hf_${{matrix.backend}}.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true | |
pytest autotest/tools/restful/test_restful_chat_hf_${{matrix.backend}}.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') | |
- name: Test lmdeploy - restful workspace | |
continue-on-error: true | |
if: matrix.backend == 'turbomind' && matrix.model == 'restful' | |
run: | | |
pytest autotest/tools/restful/test_restful_chat_workspace.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true | |
pytest autotest/tools/restful/test_restful_chat_workspace.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') | |
- name: Test lmdeploy - local testcase | |
if: matrix.backend == 'turbomind' && matrix.model == 'local_case' | |
run: | | |
pytest /local_case/issue_regression --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}}|| true | |
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') | |
- name: Clear workfile | |
if: always() | |
run: | | |
chmod -R 777 $REPORT_DIR | |
export workdir=$(pwd) | |
cd .. | |
rm -rf $workdir | |
mkdir $workdir | |
chmod -R 777 $workdir | |
test_restful: | |
if: ${{!cancelled() && (github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.regression_func), 'restful'))}} | |
runs-on: [self-hosted, linux-a100] | |
needs: test_quantization | |
strategy: | |
fail-fast: false | |
matrix: | |
backend: ['turbomind', 'pytorch'] | |
timeout-minutes: 60 | |
container: | |
image: openmmlab/lmdeploy:latest-cu11 | |
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never" | |
volumes: | |
- /nvme/github-actions/pip-cache:/root/.cache/pip | |
- /nvme/github-actions/packages:/root/packages | |
- /nvme/qa_test_models:/nvme/qa_test_models | |
- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro | |
steps: | |
- name: Copy repository and Artifacts | |
run: cp -r ${{env.TEST_CODE_PATH}}/. . | |
- name: Install lmdeploy - dependency | |
run: | | |
# manually install flash attn | |
# the install packeage from. https://github.com/Dao-AILab/flash-attention/releases | |
python3 -m pip install /root/packages/flash_attn-2.6.3+cu118torch2.3cxx11abiFALSE-cp310-cp310-linux_x86_64.whl | |
python3 -m pip install -e /root/packages/AutoAWQ_kernels | |
python3 -m pip install /root/packages/autoawq-0.2.6-cp310-cp310-manylinux2014_x86_64.whl --no-deps | |
python3 -m pip install /root/packages/xformers-0.0.27+cu118-cp310-cp310-manylinux2014_x86_64.whl --no-deps | |
python3 -m pip install -r ${{env.OFFLINE_REQUIREMENTS}} | |
- name: Install lmdeploy | |
run: | | |
python3 -m pip install lmdeploy-*.whl --no-deps | |
python3 -m pip install -r requirements/test.txt | |
pip install ${{env.DEEPSEEK_VL}} --no-deps | |
- name: Check env | |
run: | | |
pip uninstall -y nvidia-nccl-cu11 | |
python3 -m pip list | |
lmdeploy check_env | |
rm -rf allure-results | |
# remove tmp log in testcase | |
rm -rf /nvme/qa_test_models/autotest_model/log/* | |
mkdir ${{env.REPORT_DIR}}/.pytest_cache -p | |
ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest | |
- name: Start restful api turbomind | |
if: matrix.backend == 'turbomind' | |
run: | | |
CUDA_VISIBLE_DEVICES=6,7 lmdeploy serve api_server /nvme/qa_test_models/internlm/internlm2_5-20b-chat --tp 2 > restful.log 2>&1 & | |
echo "restful_pid=$!" >> "$GITHUB_ENV" | |
sleep 120s | |
- name: Start restful api pytorch | |
if: matrix.backend == 'pytorch' | |
run: | | |
CUDA_VISIBLE_DEVICES=6,7 lmdeploy serve api_server /nvme/qa_test_models/internlm/internlm2_5-20b-chat --tp 2 --backend pytorch > restful.log 2>&1 & | |
echo "restful_pid=$!" >> "$GITHUB_ENV" | |
sleep 180s | |
- name: Test lmdeploy - restful api | |
timeout-minutes: 75 | |
run: | | |
pytest autotest/interface/restful/test_restful_chat_func.py -n 20 -m 'not not_${{matrix.backend}}' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') | |
- name: Kill api server | |
if: always() | |
run: | | |
kill -15 "$restful_pid" | |
- name: Start restful api turbomind - base | |
if: matrix.backend == 'turbomind' | |
run: | | |
CUDA_VISIBLE_DEVICES=6,7 lmdeploy serve api_server /nvme/qa_test_models/internlm/internlm2_5-20b --tp 2 > restful.log 2>&1 & | |
echo "restful_pid=$!" >> "$GITHUB_ENV" | |
sleep 120s | |
- name: Start restful api pytorch - base | |
if: matrix.backend == 'pytorch' | |
run: | | |
CUDA_VISIBLE_DEVICES=6,7 lmdeploy serve api_server /nvme/qa_test_models/internlm/internlm2_5-20b --tp 2 --backend pytorch > restful.log 2>&1 & | |
echo "restful_pid=$!" >> "$GITHUB_ENV" | |
sleep 180s | |
- name: Test lmdeploy - restful api - base | |
timeout-minutes: 40 | |
run: | | |
pytest autotest/interface/restful/test_restful_completions_v1.py -n 20 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') | |
- name: Kill api server | |
if: always() | |
run: | | |
kill -15 "$restful_pid" | |
- name: Clear workfile | |
if: always() | |
run: | | |
chmod -R 777 $REPORT_DIR | |
export workdir=$(pwd) | |
cd .. | |
rm -rf $workdir | |
mkdir $workdir | |
chmod -R 777 $workdir | |
test_pipeline: | |
if: ${{!cancelled() && (github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.regression_func), 'pipeline'))}} | |
runs-on: [self-hosted, linux-a100] | |
needs: test_quantization | |
timeout-minutes: 120 | |
container: | |
image: openmmlab/lmdeploy:latest-cu11 | |
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never" | |
volumes: | |
- /nvme/github-actions/pip-cache:/root/.cache/pip | |
- /nvme/github-actions/packages:/root/packages | |
- /nvme/qa_test_models:/nvme/qa_test_models | |
- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro | |
steps: | |
- name: Copy repository and Artifacts | |
run: cp -r ${{env.TEST_CODE_PATH}}/. . | |
- name: Install lmdeploy - dependency | |
run: | | |
# manually install flash attn | |
# the install packeage from. https://github.com/Dao-AILab/flash-attention/releases | |
python3 -m pip install /root/packages/flash_attn-2.6.3+cu118torch2.3cxx11abiFALSE-cp310-cp310-linux_x86_64.whl | |
python3 -m pip install -e /root/packages/AutoAWQ_kernels | |
python3 -m pip install /root/packages/autoawq-0.2.6-cp310-cp310-manylinux2014_x86_64.whl --no-deps | |
python3 -m pip install /root/packages/xformers-0.0.27+cu118-cp310-cp310-manylinux2014_x86_64.whl --no-deps | |
python3 -m pip install -r ${{env.OFFLINE_REQUIREMENTS}} | |
- name: Install lmdeploy | |
run: | | |
python3 -m pip install lmdeploy-*.whl --no-deps | |
python3 -m pip install -r requirements/test.txt | |
pip install ${{env.DEEPSEEK_VL}} --no-deps | |
- name: Check env | |
run: | | |
pip uninstall -y nvidia-nccl-cu11 | |
python3 -m pip list | |
lmdeploy check_env | |
rm -rf allure-results | |
# remove tmp log in testcase | |
rm -rf /nvme/qa_test_models/autotest_model/log/* | |
mkdir ${{env.REPORT_DIR}}/.pytest_cache -p | |
ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest | |
- name: Test lmdeploy - interface pipeline case | |
run: | | |
pytest autotest/interface/pipeline/test_pipeline_func.py -m 'not pr_test' -n 4 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true | |
pytest autotest/interface/pipeline/test_pipeline_longtext_func.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true | |
pytest autotest/interface/pipeline/test_pipeline_longtext_func.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true | |
pytest autotest/interface/pipeline/test_pipeline_longtext_func.py -m 'gpu_num_4 and not pr_test' -n 2 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') | |
- name: Clear workfile | |
if: always() | |
run: | | |
chmod -R 777 $REPORT_DIR | |
export workdir=$(pwd) | |
cd .. | |
rm -rf $workdir | |
mkdir $workdir | |
chmod -R 777 $workdir | |
test_benchmark: | |
if: ${{!cancelled() && (github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.regression_func), 'benchmark'))}} | |
runs-on: [self-hosted, linux-a100] | |
needs: test_quantization | |
timeout-minutes: 120 | |
container: | |
image: openmmlab/lmdeploy:latest-cu11 | |
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never" | |
volumes: | |
- /nvme/github-actions/pip-cache:/root/.cache/pip | |
- /nvme/github-actions/packages:/root/packages | |
- /nvme/qa_test_models:/nvme/qa_test_models | |
- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro | |
steps: | |
- name: Copy repository and Artifacts | |
run: cp -r ${{env.TEST_CODE_PATH}}/. . | |
- name: Install lmdeploy - dependency | |
run: | | |
# manually install flash attn | |
# the install packeage from. https://github.com/Dao-AILab/flash-attention/releases | |
python3 -m pip install /root/packages/flash_attn-2.6.3+cu118torch2.3cxx11abiFALSE-cp310-cp310-linux_x86_64.whl | |
python3 -m pip install -e /root/packages/AutoAWQ_kernels | |
python3 -m pip install /root/packages/autoawq-0.2.6-cp310-cp310-manylinux2014_x86_64.whl --no-deps | |
python3 -m pip install /root/packages/xformers-0.0.27+cu118-cp310-cp310-manylinux2014_x86_64.whl --no-deps | |
python3 -m pip install -r ${{env.OFFLINE_REQUIREMENTS}} | |
- name: Install lmdeploy | |
run: | | |
python3 -m pip install lmdeploy-*.whl --no-deps | |
python3 -m pip install -r requirements/test.txt | |
pip install ${{env.DEEPSEEK_VL}} --no-deps | |
- name: Check env | |
run: | | |
pip uninstall -y nvidia-nccl-cu11 | |
python3 -m pip list | |
lmdeploy check_env | |
rm -rf allure-results | |
# remove tmp log in testcase | |
rm -rf /nvme/qa_test_models/autotest_model/log/* | |
mkdir ${{env.REPORT_DIR}}/.pytest_cache -p | |
ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest | |
- name: Test benchmark script | |
run: | | |
pytest autotest/benchmark -n 4 --run_id ${{ github.run_id }} -m function ${{env.FAIL_CONFIG}} --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') | |
- name: Clear workfile | |
if: always() | |
run: | | |
chmod -R 777 $REPORT_DIR | |
chmod -R 777 /nvme/qa_test_models/benchmark-reports/${{ github.run_id }} | |
export workdir=$(pwd) | |
cd .. | |
rm -rf $workdir | |
mkdir $workdir | |
chmod -R 777 $workdir | |
test_evaluation: | |
if: ${{!cancelled() && (github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.regression_func), 'evaluation'))}} | |
runs-on: [self-hosted, linux-a100] | |
needs: test_quantization | |
timeout-minutes: 120 # 2hours | |
strategy: | |
fail-fast: false | |
matrix: | |
evaluate_type: ['chat', 'base'] | |
container: | |
image: openmmlab/lmdeploy:latest-cu11 | |
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never" | |
volumes: | |
- /nvme/github-actions/pip-cache:/root/.cache/pip | |
- /nvme/github-actions/packages:/root/packages | |
- /nvme/github-actions/resources:/root/resources | |
- /nvme/github-actions/opencompass-data:/root/opencompass-data | |
- /nvme/qa_test_models/evaluation-reports:/root/evaluation-reports | |
- /nvme/qa_test_models:/nvme/qa_test_models | |
- /mnt/shared:/mnt/shared | |
- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro | |
steps: | |
- name: Copy repository and Artifacts | |
run: cp -r ${{env.TEST_CODE_PATH}}/. . | |
- name: Install lmdeploy - dependency | |
run: | | |
# manually install flash attn | |
# the install packeage from. https://github.com/Dao-AILab/flash-attention/releases | |
python3 -m pip install /root/packages/flash_attn-2.6.3+cu118torch2.3cxx11abiFALSE-cp310-cp310-linux_x86_64.whl | |
python3 -m pip install -e /root/packages/AutoAWQ_kernels | |
python3 -m pip install /root/packages/autoawq-0.2.6-cp310-cp310-manylinux2014_x86_64.whl --no-deps | |
python3 -m pip install /root/packages/xformers-0.0.27+cu118-cp310-cp310-manylinux2014_x86_64.whl --no-deps | |
python3 -m pip install -r ${{env.OFFLINE_REQUIREMENTS}} | |
- name: Install lmdeploy | |
run: | | |
python3 -m pip install lmdeploy-*.whl --no-deps | |
python3 -m pip install -r requirements/test.txt | |
pip install ${{env.DEEPSEEK_VL}} --no-deps | |
- name: Install opencompass | |
run: | | |
git clone --depth=1 https://github.com/open-compass/opencompass.git | |
cd opencompass | |
python3 -m pip install -e . | |
echo "OPENCOMPASS_DIR=$(pwd)" >> $GITHUB_ENV | |
- name: Check env | |
run: | | |
pip uninstall -y nvidia-nccl-cu11 | |
python3 -m pip list | |
lmdeploy check_env | |
rm -rf allure-results | |
# remove tmp log in testcase | |
rm -rf /nvme/qa_test_models/autotest_model/log/* | |
mkdir ${{env.REPORT_DIR}}/.pytest_cache -p | |
ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest | |
- name: Setup paths for evaluation | |
run: | | |
ln -s /root/opencompass-data ./data | |
python3 .github/scripts/action_tools.py create_model_links /nvme/qa_test_models . | |
- name: Evaluate models | |
if: matrix.evaluate_type == 'chat' | |
run: | | |
export LMDEPLOY_DIR=$(pwd) | |
python3 .github/scripts/action_tools.py evaluate "[turbomind_internlm2_5_7b_chat, turbomind_internlm2_5_7b_chat_4bits, turbomind_internlm2_5_7b_chat_kvint4, pt_internlm2_5_7b_chat, turbomind_internlm2_5_20b_chat, turbomind_internlm2_5_20b_chat_4bits, turbomind_internlm2_5_20b_chat_kvint4, pt_internlm2_5_20b_chat, turbomind_llama_3d1_8b_instruct, pt_llama_3d1_8b_instruct, turbomind_llama_3d1_8b_instruct_4bits, turbomind_llama_3d1_8b_instruct_kvint4, turbomind_qwen2_7b_instruct, turbomind_qwen2_7b_instruct_4bits, pt_qwen1_5_moe_2_7b_chat, pt_gemma_2_9b_it]" "[*race_datasets, *gsm8k_datasets]" /root/evaluation-reports/${{ github.run_id }} chat true | |
- name: Evaluate base models | |
if: matrix.evaluate_type == 'base' | |
run: | | |
export LMDEPLOY_DIR=$(pwd) | |
python3 .github/scripts/action_tools.py evaluate "[turbomind_internlm2_5_7b, turbomind_qwen2_7b, turbomind_internlm2_5_7b_batch1]" "[*mmlu_datasets, *gsm8k_datasets]" /root/evaluation-reports/${{ github.run_id }} base true | |
- name: Clear workspace | |
if: always() | |
run: | | |
export workdir=$(pwd) | |
cd .. | |
rm -rf $workdir | |
mkdir $workdir | |
chmod -R 777 $workdir | |
get_benchmark_result: | |
if: ${{!cancelled() && (github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.regression_func), 'benchmark'))}} | |
needs: [test_benchmark] | |
timeout-minutes: 5 | |
runs-on: [self-hosted, linux-a100] | |
env: | |
BENCHMARK_REPORT_DIR: /nvme/qa_test_models/benchmark-reports/${{ github.run_id }} | |
steps: | |
- name: Clone repository | |
uses: actions/checkout@v3 | |
with: | |
repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }} | |
ref: ${{github.event.inputs.repo_ref || 'main'}} | |
- name: Get overview | |
run: | | |
pip install pandas fire mmengine | |
python3 .github/scripts/action_tools.py generate_benchmark_report $BENCHMARK_REPORT_DIR | |
get_coverage_report: | |
if: ${{!cancelled()}} | |
runs-on: [self-hosted, linux-a100] | |
needs: [test_tools, test_restful, test_pipeline, test_benchmark] | |
timeout-minutes: 5 | |
container: | |
image: openmmlab/lmdeploy:latest-cu11 | |
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never" | |
volumes: | |
- /nvme/github-actions/pip-cache:/root/.cache/pip | |
- /nvme/github-actions/packages:/root/packages | |
- /nvme/qa_test_models:/nvme/qa_test_models | |
- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro | |
steps: | |
- name: Copy repository and Artifacts | |
run: cp -r ${{env.TEST_CODE_PATH}}/. . | |
- name: Install lmdeploy | |
run: | | |
python3 -m pip install lmdeploy-*.whl --no-deps | |
python3 -m pip install -r requirements/test.txt | |
- name: Get coverage report | |
run: | | |
pip install coverage | |
coverage combine ${{env.REPORT_DIR}} | |
coverage xml -o ${{env.REPORT_DIR}}/coverage.xml | |
coverage report -m | |
mv .coverage ${{env.REPORT_DIR}}/.coverage | |
- name: Clear workfile | |
if: always() | |
run: | | |
chmod -R 777 $REPORT_DIR | |
export workdir=$(pwd) | |
cd .. | |
rm -rf $workdir | |
mkdir $workdir | |
chmod -R 777 $workdir | |
notify_to_feishu: | |
if: always() && !cancelled() && (github.ref_name == 'develop' || github.ref_name == 'main') | |
needs: [get_benchmark_result, get_coverage_report, test_evaluation] | |
timeout-minutes: 5 | |
runs-on: [self-hosted, linux-a100] | |
steps: | |
- name: notify | |
if: contains(needs.*.result, 'failure') | |
run: | | |
curl -X POST -H "Content-Type: application/json" -d '{"msg_type":"post","content":{"post":{"zh_cn":{"title":"Lmdeploy- Daily test finished!!!","content":[[{"tag":"text","text":"branch: ${{github.ref_name}}, run action: ${{github.workflow}} failed. "},{"tag":"a","text":"Please click here for details ","href":"https://github.com/'${{ github.repository }}'/actions/runs/'${GITHUB_RUN_ID}'"},{"tag":"at","user_id":"'${{ secrets.FEISHU_USER_ID }}'"}]]}}}}' ${{ secrets.FEISHU_WEBHOOK_URL }} |