|
6 | 6 | from transformers.configuration_utils import PretrainedConfig |
7 | 7 | from vllm.config import ModelConfig, VllmConfig |
8 | 8 | from vllm.model_executor.layers.rotary_embedding import ( |
9 | | - DeepseekScalingRotaryEmbedding, MRotaryEmbedding, RotaryEmbedding) |
| 9 | + DeepseekScalingRotaryEmbedding, RotaryEmbedding) |
10 | 10 |
|
11 | 11 | from tests.ut.base import TestBase |
12 | 12 | from vllm_ascend.ascend_forward_context import set_ascend_forward_context |
13 | 13 | from vllm_ascend.ops.rotary_embedding import _custom_rotary_embedding_enabled |
14 | 14 |
|
15 | 15 | MODEL = "Qwen3-0.6B" |
16 | | -MODEL_VL = "Qwen/Qwen2.5-VL-3B-Instruct" |
17 | 16 | MAX_NUM_BATCHED_TOKEND = 10000 |
18 | 17 |
|
19 | 18 |
|
@@ -377,86 +376,3 @@ def test_yarn_get_mscale(self, mock_npuplatform): |
377 | 376 | expected, |
378 | 377 | places=6, |
379 | 378 | msg=f"Failed for scale={scale}, mscale={mscale}") |
380 | | - |
381 | | - |
382 | | -class TestAscendMRotaryEmbedding(unittest.TestCase): |
383 | | - |
384 | | - def setUp(self): |
385 | | - # Common setup for tests |
386 | | - self.number_tokens = 3 |
387 | | - self.num_head = 8 |
388 | | - self.num_kvhead = 8 |
389 | | - self.head_size = 128 |
390 | | - self.max_position_embeddings = 128000 |
391 | | - self.is_neox_style = True |
392 | | - self.rope_theta = 1000000.0 |
393 | | - self.positions_1d = torch.tensor([1, 2, 3]) |
394 | | - self.positions_2d = torch.randint(1, 10, (3, self.number_tokens)) |
395 | | - |
396 | | - self.query = torch.randn( |
397 | | - (self.number_tokens, self.num_head * self.head_size), |
398 | | - dtype=torch.bfloat16) |
399 | | - self.key = torch.randn( |
400 | | - (self.number_tokens, self.num_kvhead * self.head_size), |
401 | | - dtype=torch.bfloat16) |
402 | | - |
403 | | - # Qwen2.5-VL mrope section case |
404 | | - self.mrope_section = [16, 24, 24] |
405 | | - |
406 | | - self.layer = MRotaryEmbedding(self.head_size, |
407 | | - self.head_size, |
408 | | - self.max_position_embeddings, |
409 | | - base=self.rope_theta, |
410 | | - is_neox_style=self.is_neox_style, |
411 | | - dtype=torch.bfloat16, |
412 | | - mrope_section=self.mrope_section) |
413 | | - |
414 | | - self.mock_config = MagicMock() |
415 | | - self.mock_config.torchair_graph_config.enabled = False |
416 | | - |
417 | | - def _create_vllm_config(self): |
418 | | - vllm_config = VllmConfig() |
419 | | - model_config = ModelConfig(MODEL_VL, |
420 | | - tokenizer=MODEL_VL, |
421 | | - max_model_len=MAX_NUM_BATCHED_TOKEND) |
422 | | - model_config.hf_config = PretrainedConfig() |
423 | | - vllm_config.model_config = model_config |
424 | | - return vllm_config |
425 | | - |
426 | | - @patch('torch_npu.npu_mrope') |
427 | | - @patch('vllm.config.ModelConfig.__post_init__', MagicMock()) |
428 | | - @patch('vllm.config.VllmConfig.__post_init__', MagicMock()) |
429 | | - @patch('vllm.distributed.parallel_state._DP', MagicMock(world_size=1)) |
430 | | - @patch('vllm.distributed.parallel_state._TP', MagicMock(world_size=1)) |
431 | | - def test_forward_oot_1d_positions(self, mock_npu_mrope): |
432 | | - mock_npu_mrope.return_value = (torch.zeros_like(self.query), |
433 | | - torch.zeros_like(self.key)) |
434 | | - |
435 | | - vllm_config = self._create_vllm_config() |
436 | | - with set_ascend_forward_context(None, vllm_config): |
437 | | - result_q, result_k = self.layer.forward_oot( |
438 | | - self.positions_1d, self.query, self.key) |
439 | | - |
440 | | - mock_npu_mrope.assert_called_once() |
441 | | - self.assertFalse(torch.isnan(result_q).any().item()) |
442 | | - self.assertFalse(torch.isnan(result_k).any().item()) |
443 | | - self.assertEqual(result_q.shape, self.query.shape) |
444 | | - |
445 | | - @patch('torch_npu.npu_mrope') |
446 | | - @patch('vllm.config.ModelConfig.__post_init__', MagicMock()) |
447 | | - @patch('vllm.config.VllmConfig.__post_init__', MagicMock()) |
448 | | - @patch('vllm.distributed.parallel_state._DP', MagicMock(world_size=1)) |
449 | | - @patch('vllm.distributed.parallel_state._TP', MagicMock(world_size=1)) |
450 | | - def test_forward_oot_2d_positions(self, mock_npu_mrope): |
451 | | - mock_npu_mrope.return_value = (torch.zeros_like(self.query), |
452 | | - torch.zeros_like(self.key)) |
453 | | - |
454 | | - vllm_config = self._create_vllm_config() |
455 | | - with set_ascend_forward_context(None, vllm_config): |
456 | | - result_q, result_k = self.layer.forward_oot( |
457 | | - self.positions_2d, self.query, self.key) |
458 | | - |
459 | | - mock_npu_mrope.assert_called_once() |
460 | | - self.assertFalse(torch.isnan(result_q).any().item()) |
461 | | - self.assertFalse(torch.isnan(result_k).any().item()) |
462 | | - self.assertEqual(result_q.shape, self.query.shape) |
0 commit comments