fix for ROCm changes

louie-tsai · louie-tsai · commit 41fa9ce9369a · 2025-06-25T23:23:19.000-07:00
diff --git a/.github/scripts/generate_vllm_benchmark_matrix.py b/.github/scripts/generate_vllm_benchmark_matrix.py
@@ -22,6 +22,7 @@
     2: [
         "linux.aws.h100.4",
         "linux.rocm.gpu.mi300.2",
+        "intel-cpu-emr",
     ],
     4: [
         "linux.aws.h100.4",
@@ -31,9 +32,6 @@
         "linux.aws.h100.8",
         "linux.rocm.gpu.mi300.8",
     ],
-    2: [
-        "intel-cpu-emr",
-    ],
 }
 
 # All the different names vLLM uses to refer to their benchmark configs
@@ -79,17 +77,10 @@ def parse_args() -> Any:
         help="the comma-separated list of models to benchmark",
     )
     parser.add_argument(
-        "--gpus",
+        "--platforms",
         type=str,
         default="",
-        help="the comma-separated list of GPUs to benchmark",
-    )
-    parser.add_argument(
-        "--arch",
-        type=str,
-        default="",
-        action=ValidateDir,
-        help="architect for the runner",
+        help="the comma-separated list of platforms to benchmark",
         required=True,
     )
 
@@ -118,19 +109,19 @@ def set_output(name: str, val: Any) -> None:
 
 
 def generate_benchmark_matrix(
-    benchmark_configs_dir: str, models: List[str], gpus: List[str], arch: str
+    benchmark_configs_dir: str, models: List[str], platforms: List[str]
 ) -> Dict[str, Any]:
     """
     Parse all the JSON files in vLLM benchmark configs directory to get the
-    model name and tensor parallel size (aka number of GPUs)
+    model name and tensor parallel size (aka number of GPUs or CPU NUMA nodes)
     """
     get_all_models = True if not models else False
-    use_all_gpus = True if not gpus else False
+    use_all_plaforms = True if not platforms else False
 
     benchmark_matrix: Dict[str, Any] = {
         "include": [],
     }
-    for file in glob.glob(f"{benchmark_configs_dir}/*{arch}.json"):
+    for file in glob.glob(f"{benchmark_configs_dir}/*.json"):
         with open(file) as f:
             try:
                 configs = json.load(f)
@@ -164,12 +155,12 @@ def generate_benchmark_matrix(
 
             for runner in RUNNERS_MAPPING[tp]:
                 found_runner = False
-                for gpu in gpus:
-                    if gpu.lower() in runner:
+                for platform in platforms:
+                    if platform.lower() in runner:
                         found_runner = True
                         break
 
-                if found_runner or use_all_gpus:
+                if found_runner or use_all_platforms:
                     benchmark_matrix["include"].append(
                         {
                             "runner": runner,
@@ -185,12 +176,11 @@ def generate_benchmark_matrix(
 def main() -> None:
     args = parse_args()
     models = [m.strip().lower() for m in args.models.split(",") if m.strip()]
-    gpus = [m.strip().lower() for m in args.gpus.split(",") if m.strip()]
+    platforms = [m.strip().lower() for m in args.platforms.split(",") if m.strip()]
     benchmark_matrix = generate_benchmark_matrix(
         args.benchmark_configs_dir,
         models,
-        gpus,
-        args.arch,
+        platforms,
     )
     set_output("benchmark_matrix", benchmark_matrix)
 
diff --git a/.github/scripts/setup_vllm_benchmark.py b/.github/scripts/setup_vllm_benchmark.py
@@ -62,24 +62,23 @@ def parse_args() -> Any:
         required=True,
     )
     parser.add_argument(
-        "--arch",
+        "--device",
         type=str,
         default="",
-        action=ValidateDir,
-        help="architect for the runner",
+        help="device for the runner",
         required=True,
     )
 
     return parser.parse_args()
 
 
 def setup_benchmark_configs(
-        from_benchmark_configs_dir: str, to_benchmark_configs_dir: str, models: List[str], arch: str
+        from_benchmark_configs_dir: str, to_benchmark_configs_dir: str, models: List[str], device: str
 ) -> None:
     """
     Setup the benchmark configs to run on this runner
     """
-    for file in glob.glob(f"{from_benchmark_configs_dir}/*{arch}.json"):
+    for file in glob.glob(f"{from_benchmark_configs_dir}/*{device}.json"):
         filename = os.path.basename(file)
         benchmark_configs = []
 
@@ -116,7 +115,7 @@ def main() -> None:
         args.from_benchmark_configs_dir,
         args.to_benchmark_configs_dir,
         args.models.split(","),
-        args.arch,
+        args.device,
     )
 
 
diff --git a/.github/workflows/vllm-benchmark.yml b/.github/workflows/vllm-benchmark.yml
@@ -20,9 +20,9 @@ on:
           A comma-separated list of models to benchmark, leave empty to run everything
         required: false
         type: string
-      gpus:
+      platforms:
         description: |
-          A comma-separated list of GPUs to benchmark, i.e. h100, mi300
+          A comma-separated list of platforms to benchmark, i.e. h100, mi300, emr
         required: true
         type: string
         default: h100,mi300
@@ -53,17 +53,15 @@ jobs:
         shell: bash
         env:
           MODELS: ${{ inputs.models || '' }}
-          GPUS: ${{ inputs.gpus || '' }}
-          ARCH: ${{ inputs.arch || '' }}
+          PLATFORMS: ${{ inputs.platforms || '' }}
         run: |
           set -eux
 
           # The generated matrix is grouped by model and runner
           python .github/scripts/generate_vllm_benchmark_matrix.py \
             --benchmark-configs-dir vllm-benchmarks/benchmarks \
             --models "${MODELS}" \
-            --gpus "${GPUS}"
-            --arch "${ARCH}"
+            --platforms "${PLATFORMS}"
 
   benchmarks:
     name: Run vLLM benchmarks
@@ -105,8 +103,9 @@ jobs:
             DEVICE_NAME=rocm
             rocm-smi
           else
-            echo "Only CUDA and ROCm benchmarks are supported at the moment"
-            exit 1
+            echo "No accelerators. Use CPU instead"
+            DEVICE_NAME=cpu
+            lscpu
           fi
           echo "DEVICE_NAME=$DEVICE_NAME" >> $GITHUB_ENV
 
@@ -120,6 +119,8 @@ jobs:
             DEVICE_TYPE=$(nvidia-smi -i 0 --query-gpu=name --format=csv,noheader | awk '{print $2}')
           elif [[ "${DEVICE_NAME}" == "rocm" ]]; then
             DEVICE_TYPE=$(rocminfo | grep "Marketing Name" | tail -n1 | awk -F':' '{print $2}' | xargs)
+          elif [[ "${DEVICE_NAME}" == "cpu" ]]; then
+            DEVICE_TYPE=$(lscpu | grep 'Model name' | cut -f 2 -d ":" | awk '{$1=$1}1' | cut -f 2 -d " ")
           fi
           echo "DEVICE_TYPE=$DEVICE_TYPE" >> $GITHUB_ENV
 
@@ -128,11 +129,11 @@ jobs:
         run: |
           set -eux
 
-          if [[ "${DEVICE_NAME}" == "cuda" ]]; then
-            pip install -r .github/scripts/requirements.txt
-          elif [[ "${DEVICE_NAME}" == "rocm" ]]; then
+          if [[ "${DEVICE_NAME}" == "rocm" ]]; then
             pip install -r .github/scripts/requirements.txt \
               --extra-index-url https://download.pytorch.org/whl/rocm6.3
+          else
+            pip install -r .github/scripts/requirements.txt
           fi
 
       - name: Set Docker registry
@@ -142,6 +143,8 @@ jobs:
             DOCKER_IMAGE_PREFIX=public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo
           elif [[ "${DEVICE_NAME}" == "rocm" ]]; then
             DOCKER_IMAGE_PREFIX=docker.io/rocm/vllm-ci
+          elif [[ "${DEVICE_NAME}" == "cpu" ]]; then
+            DOCKER_IMAGE_PREFIX=public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo
           fi
           echo "DOCKER_IMAGE_PREFIX=$DOCKER_IMAGE_PREFIX" >> $GITHUB_ENV
 
@@ -213,7 +216,7 @@ jobs:
             --from-benchmark-configs-dir vllm-benchmarks/benchmarks \
             --to-benchmark-configs-dir vllm-benchmarks/vllm/.buildkite/nightly-benchmarks/tests \
             --models "${MODELS}" \
-            --arch "${ARCH}"
+            --device "${DEVICE_NAME// /_}"
 
           pushd vllm-benchmarks/vllm
           ls -lah .buildkite/nightly-benchmarks/tests
@@ -229,11 +232,13 @@ jobs:
           # vLLM-related environment variables
           ENGINE_VERSION: v1
           SAVE_TO_PYTORCH_BENCHMARK_FORMAT: 1
-          ARCH: ${{ inputs.arch || '' }}
+          ARCH: ${{ env.DEVICE_NAME }}
         run: |
           set -x
           if [[ "$ARCH" == "cpu" ]]; then
             on_cpu=1
+          else
+            on_cpu=0
           fi
           docker run \
             ${GPU_FLAG:-} \