2020 A comma-separated list of models to benchmark, leave empty to run everything
2121 required : false
2222 type : string
23+ gpus :
24+ description : |
25+ A comma-separated list of GPUs to benchmark, i.e. h100, mi300
26+ required : true
27+ type : string
28+ default : h100,mi300
2329 pull_request :
2430 paths :
2531 - .github/workflows/vllm-benchmark.yml
@@ -47,13 +53,15 @@ jobs:
4753 shell : bash
4854 env :
4955 MODELS : ${{ inputs.models || '' }}
56+ GPUS : ${{ inputs.gpus || '' }}
5057 run : |
5158 set -eux
5259
5360 # The generated matrix is grouped by model and runner
5461 python .github/scripts/generate_vllm_benchmark_matrix.py \
5562 --benchmark-configs-dir vllm-benchmarks/benchmarks \
56- --models "${MODELS}"
63+ --models "${MODELS}" \
64+ --gpus "${GPUS}"
5765
5866 benchmarks :
5967 name : Run vLLM benchmarks
6371 fail-fast : false
6472 runs-on : ${{ matrix.runner }}
6573 environment : pytorch-x-vllm
74+ permissions :
75+ id-token : write
76+ contents : read
6677 steps :
6778 - name : Checkout repository
6879 uses : actions/checkout@v4
@@ -80,23 +91,63 @@ jobs:
8091 python-version : ' 3.12'
8192 cache : ' pip'
8293
83- - name : Set GPU device name
94+ - name : Check if the device is supported
95+ shell : bash
96+ run : |
97+ set -eux
98+
99+ if command -v nvidia-smi; then
100+ DEVICE_NAME=cuda
101+ nvidia-smi
102+ elif command -v rocm-smi; then
103+ DEVICE_NAME=rocm
104+ rocm-smi
105+ else
106+ echo "Only CUDA and ROCm benchmarks are supported at the moment"
107+ exit 1
108+ fi
109+ echo "DEVICE_NAME=$DEVICE_NAME" >> $GITHUB_ENV
110+
111+ - name : Set GPU name and type
84112 working-directory : vllm-benchmarks
113+ shell : bash
85114 run : |
86- export GPU_DEVICE=$(nvidia-smi -i 0 --query-gpu=name --format=csv,noheader | awk '{print $2}')
87- echo "GPU_DEVICE=$GPU_DEVICE" >> $GITHUB_ENV
115+ set -eux
116+
117+ if [[ "${DEVICE_NAME}" == "cuda" ]]; then
118+ DEVICE_TYPE=$(nvidia-smi -i 0 --query-gpu=name --format=csv,noheader | awk '{print $2}')
119+ elif [[ "${DEVICE_NAME}" == "rocm" ]]; then
120+ DEVICE_TYPE=$(rocminfo | grep "Marketing Name" | tail -n1 | awk -F':' '{print $2}' | xargs)
121+ fi
122+ echo "DEVICE_TYPE=$DEVICE_TYPE" >> $GITHUB_ENV
88123
89124 - name : Install dependencies
125+ shell : bash
90126 run : |
91127 set -eux
92- pip install -r .github/scripts/requirements.txt
128+
129+ if [[ "${DEVICE_NAME}" == "cuda" ]]; then
130+ pip install -r .github/scripts/requirements.txt
131+ elif [[ "${DEVICE_NAME}" == "rocm" ]]; then
132+ pip install -r .github/scripts/requirements.txt \
133+ --extra-index-url https://download.pytorch.org/whl/rocm6.3
134+ fi
135+
136+ - name : Set Docker registry
137+ shell : bash
138+ run : |
139+ if [[ "${DEVICE_NAME}" == "cuda" ]]; then
140+ DOCKER_IMAGE_PREFIX=public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo
141+ elif [[ "${DEVICE_NAME}" == "rocm" ]]; then
142+ DOCKER_IMAGE_PREFIX=docker.io/rocm/vllm-ci
143+ fi
144+ echo "DOCKER_IMAGE_PREFIX=$DOCKER_IMAGE_PREFIX" >> $GITHUB_ENV
93145
94146 - name : Check for last benchmark commit
95147 working-directory : vllm-benchmarks
96148 env :
97149 HEAD_BRANCH : ${{ inputs.vllm_branch || 'main' }}
98150 HEAD_SHA : ${{ inputs.vllm_commit || '' }}
99- DOCKER_IMAGE_PREFIX : public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo
100151 MODELS : ${{ matrix.models }}
101152 run : |
102153 set -eux
@@ -117,7 +168,7 @@ jobs:
117168 fi
118169
119170 NOT_EXIST=0
120- S3_PATH="v3/vllm-project/vllm/${HEAD_BRANCH}/${HEAD_SHA}/${GPU_DEVICE }/benchmark_results_${MODELS//\//_}.json"
171+ S3_PATH="v3/vllm-project/vllm/${HEAD_BRANCH}/${HEAD_SHA}/${DEVICE_TYPE// /_ }/benchmark_results_${MODELS//\//_}.json"
121172 aws s3api head-object --bucket ossci-benchmarks --key ${S3_PATH} || NOT_EXIST=1
122173
123174 if [[ ${NOT_EXIST} == "1" ]]; then
@@ -130,10 +181,15 @@ jobs:
130181
131182 echo "HEAD_SHA=$HEAD_SHA" >> $GITHUB_ENV
132183
133- - name : Setup GPU_FLAG for docker run
184+ - name : Setup CUDA GPU_FLAG for docker run
185+ if : env.DEVICE_NAME == 'cuda'
134186 run : |
135187 echo "GPU_FLAG=--gpus all -e NVIDIA_DRIVER_CAPABILITIES=all" >> "${GITHUB_ENV}"
136188
189+ - name : Setup ROCm
190+ if : env.DEVICE_NAME == 'rocm'
191+ uses : pytorch/pytorch/./.github/actions/setup-rocm@main
192+
137193 - name : Setup SCCACHE_SERVER_PORT environment for docker run when on container
138194 run : |
139195 echo "SCCACHE_SERVER_PORT_DOCKER_FLAG=-e SCCACHE_SERVER_PORT=$((RUNNER_UID + 4226))" >> "${GITHUB_ENV}"
@@ -165,7 +221,7 @@ jobs:
165221 SCCACHE_BUCKET : ossci-compiler-cache-circleci-v2
166222 SCCACHE_REGION : us-east-1
167223 HF_TOKEN : ${{ secrets.HF_TOKEN }}
168- DOCKER_IMAGE : public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo :${{ env.HEAD_SHA }}
224+ DOCKER_IMAGE : ${{ env.DOCKER_IMAGE_PREFIX }} :${{ env.HEAD_SHA }}
169225 # vLLM-related environment variables
170226 ENGINE_VERSION : v1
171227 SAVE_TO_PYTORCH_BENCHMARK_FORMAT : 1
@@ -177,7 +233,8 @@ jobs:
177233 ${SCCACHE_SERVER_PORT_DOCKER_FLAG:-} \
178234 -e SCCACHE_BUCKET \
179235 -e SCCACHE_REGION \
180- -e GPU_DEVICE \
236+ -e DEVICE_NAME \
237+ -e DEVICE_TYPE \
181238 -e HF_TOKEN \
182239 -e ENGINE_VERSION \
183240 -e SAVE_TO_PYTORCH_BENCHMARK_FORMAT \
@@ -189,6 +246,16 @@ jobs:
189246 "${DOCKER_IMAGE}" \
190247 bash -xc "cd vllm-benchmarks/vllm && bash .buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh"
191248
249+ - name : Authenticate with AWS
250+ # AWS CUDA runners already have access to the bucket via its runner IAM role
251+ if : env.DEVICE_NAME != 'cuda'
252+ uses : aws-actions/configure-aws-credentials@ececac1a45f3b08a01d2dd070d28d111c5fe6722 # v4.1.0
253+ with :
254+ role-to-assume : arn:aws:iam::308535385114:role/gha_workflow_upload-benchmark-results
255+ # The max duration enforced by the server side
256+ role-duration-seconds : 18000
257+ aws-region : us-east-1
258+
192259 - name : Upload the benchmark results
193260 env :
194261 BENCHMARK_RESULTS : vllm-benchmarks/vllm/benchmarks/results
@@ -203,5 +270,5 @@ jobs:
203270 --repo vllm-benchmarks/vllm \
204271 --benchmark-name "vLLM benchmark" \
205272 --benchmark-results "${BENCHMARK_RESULTS}" \
206- --device "${GPU_DEVICE }" \
273+ --device "${DEVICE_TYPE// /_ }" \
207274 --model "${MODELS//\//_}"
0 commit comments