|
552 | 552 | "random_input_len": 5250, |
553 | 553 | "random_output_len": 8250 |
554 | 554 | } |
| 555 | + }, |
| 556 | + { |
| 557 | + "test_name": "serving_gemma3_12b_it_fp8_torchao", |
| 558 | + "qps_list": [1, 4, 16, "inf"], |
| 559 | + "server_parameters": { |
| 560 | + "model": "pytorch/gemma-3-12b-it-FP8", |
| 561 | + "tokenizer": "google/gemma-3-12b-it", |
| 562 | + "quantization": "torchao", |
| 563 | + "tensor_parallel_size": 1, |
| 564 | + "swap_space": 16, |
| 565 | + "disable_log_stats": "", |
| 566 | + "disable_log_requests": "", |
| 567 | + "load_format": "auto" |
| 568 | + }, |
| 569 | + "client_parameters": { |
| 570 | + "model": "pytorch/gemma-3-12b-it-FP8", |
| 571 | + "backend": "vllm", |
| 572 | + "dataset_name": "random", |
| 573 | + "num_prompts": 200, |
| 574 | + "random_input_len": 1024, |
| 575 | + "random_output_len": 2048 |
| 576 | + } |
| 577 | + }, |
| 578 | + { |
| 579 | + "test_name": "serving_gemma3_12b_it_int4_torchao", |
| 580 | + "qps_list": [1, 4, 16, "inf"], |
| 581 | + "server_parameters": { |
| 582 | + "model": "pytorch/gemma-3-12b-it-INT4", |
| 583 | + "tokenizer": "google/gemma-3-12b-it", |
| 584 | + "quantization": "torchao", |
| 585 | + "tensor_parallel_size": 1, |
| 586 | + "swap_space": 16, |
| 587 | + "disable_log_stats": "", |
| 588 | + "disable_log_requests": "", |
| 589 | + "load_format": "auto" |
| 590 | + }, |
| 591 | + "client_parameters": { |
| 592 | + "model": "pytorch/gemma-3-12b-it-INT4", |
| 593 | + "backend": "vllm", |
| 594 | + "dataset_name": "random", |
| 595 | + "num_prompts": 200, |
| 596 | + "random_input_len": 1024, |
| 597 | + "random_output_len": 2048 |
| 598 | + } |
| 599 | + }, |
| 600 | + { |
| 601 | + "test_name": "serving_gemma3_27b_it_fp8_torchao", |
| 602 | + "qps_list": [1, 4, 16, "inf"], |
| 603 | + "server_parameters": { |
| 604 | + "model": "pytorch/gemma-3-27b-it-FP8", |
| 605 | + "tokenizer": "google/gemma-3-27b-it", |
| 606 | + "quantization": "torchao", |
| 607 | + "tensor_parallel_size": 1, |
| 608 | + "swap_space": 16, |
| 609 | + "disable_log_stats": "", |
| 610 | + "disable_log_requests": "", |
| 611 | + "load_format": "auto" |
| 612 | + }, |
| 613 | + "client_parameters": { |
| 614 | + "model": "pytorch/gemma-3-27b-it-FP8", |
| 615 | + "backend": "vllm", |
| 616 | + "dataset_name": "random", |
| 617 | + "num_prompts": 200, |
| 618 | + "random_input_len": 1024, |
| 619 | + "random_output_len": 2048 |
| 620 | + } |
| 621 | + }, |
| 622 | + { |
| 623 | + "test_name": "serving_gemma3_27b_it_int4_torchao", |
| 624 | + "qps_list": [1, 4, 16, "inf"], |
| 625 | + "server_parameters": { |
| 626 | + "model": "pytorch/gemma-3-27b-it-INT4", |
| 627 | + "tokenizer": "google/gemma-3-27b-it", |
| 628 | + "quantization": "torchao", |
| 629 | + "tensor_parallel_size": 1, |
| 630 | + "swap_space": 16, |
| 631 | + "disable_log_stats": "", |
| 632 | + "disable_log_requests": "", |
| 633 | + "load_format": "auto" |
| 634 | + }, |
| 635 | + "client_parameters": { |
| 636 | + "model": "pytorch/gemma-3-27b-it-INT4", |
| 637 | + "backend": "vllm", |
| 638 | + "dataset_name": "random", |
| 639 | + "num_prompts": 200, |
| 640 | + "random_input_len": 1024, |
| 641 | + "random_output_len": 2048 |
| 642 | + } |
555 | 643 | } |
556 | 644 | ] |
0 commit comments