vllm-project
diff --git a/‎tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py‎
Lines changed: 30 additions & 6 deletions b/‎tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py‎
Lines changed: 30 additions & 6 deletions
diff --git a/‎vllm_ascend/spec_decode/__init__.py‎
Lines changed: 8 additions & 1 deletion b/‎vllm_ascend/spec_decode/__init__.py‎
Lines changed: 8 additions & 1 deletion
@@ -1,11 +1,15 @@
 from __future__ import annotations
 
+import os
+
 import pytest
 from vllm import SamplingParams
 from vllm.config import CompilationConfig, CUDAGraphMode
 
 from tests.e2e.conftest import VllmRunner
 
+os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
+
 
 @pytest.fixture
 def sampling_config():
@@ -17,12 +21,11 @@ def model_name():
     return "wemaster/deepseek_mtp_main_random_bf16"
 
 
-def mtp_correctness(
-    sampling_config: SamplingParams,
-    model_name: str,
-    num_speculative_tokens: int,
-    graph_mode: CUDAGraphMode = CUDAGraphMode.PIECEWISE,
-):
+def mtp_correctness(sampling_config: SamplingParams,
+                    model_name: str,
+                    num_speculative_tokens: int,
+                    graph_mode: CUDAGraphMode = CUDAGraphMode.PIECEWISE,
+                    disable_padded_drafter_batch=True):
     example_prompts = [
         "Hello, my name is",
         "The president of the United States is",
@@ -54,6 +57,7 @@ def mtp_correctness(
             speculative_config={
                 "method": "deepseek_mtp",
                 "num_speculative_tokens": num_speculative_tokens,
+                "disable_padded_drafter_batch": disable_padded_drafter_batch,
             },
             enforce_eager=False,
             max_model_len=2000,
@@ -110,3 +114,23 @@ def test_mtp2_correctness_full_graph(
     model_name: str,
 ):
     mtp_correctness(sampling_config, model_name, 2, CUDAGraphMode.FULL)
+
+
+def test_mtp1_correctness_piecewise_graph_with_pad(
+    sampling_config: SamplingParams,
+    model_name: str,
+):
+    mtp_correctness(sampling_config,
+                    model_name,
+                    1,
+                    disable_padded_drafter_batch=False)
+
+
+def test_mtp2_correctness_piecewise_graph_with_pad(
+    sampling_config: SamplingParams,
+    model_name: str,
+):
+    mtp_correctness(sampling_config,
+                    model_name,
+                    2,
+                    disable_padded_drafter_batch=False)
@@ -19,14 +19,21 @@
 from vllm_ascend.spec_decode.eagle_proposer import EagleProposer
 from vllm_ascend.spec_decode.mtp_proposer import MtpProposer
 from vllm_ascend.spec_decode.ngram_proposer import NgramProposer
+from vllm_ascend.torchair.torchair_mtp_proposer import TorchairMtpProposer
 
 
-def get_spec_decode_method(method, vllm_config, device, runner):
+def get_spec_decode_method(method,
+                           vllm_config,
+                           device,
+                           runner,
+                           is_torchair_graph=False):
     if method == "ngram":
         return NgramProposer(vllm_config, device, runner)
     elif method in ["eagle", "eagle3"]:
         return EagleProposer(vllm_config, device, runner)
     elif method == 'deepseek_mtp':
+        if is_torchair_graph:
+            return TorchairMtpProposer(vllm_config, device, runner)
         return MtpProposer(vllm_config, device, runner)
     else:
         raise ValueError("Unknown speculative decoding method: "