Skip to content

Commit a904ea7

Browse files
authored
[benchmark] add peak throughput metrics and plot (#23867)
Signed-off-by: simon-mo <[email protected]>
1 parent b7433ca commit a904ea7

File tree

2 files changed

+134
-69
lines changed

2 files changed

+134
-69
lines changed

vllm/benchmarks/lib/endpoint_request_func.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ class RequestFuncOutput:
8989
tpot: float = 0.0 # avg next-token latencies
9090
prompt_len: int = 0
9191
error: str = ""
92+
start_time: float = 0.0
9293

9394

9495
async def async_request_openai_completions(
@@ -140,6 +141,7 @@ async def async_request_openai_completions(
140141

141142
generated_text = ""
142143
st = time.perf_counter()
144+
output.start_time = st
143145
most_recent_timestamp = st
144146
try:
145147
async with session.post(url=api_url, json=payload,
@@ -272,6 +274,7 @@ async def async_request_openai_chat_completions(
272274
generated_text = ""
273275
ttft = 0.0
274276
st = time.perf_counter()
277+
output.start_time = st
275278
most_recent_timestamp = st
276279
try:
277280
async with session.post(url=api_url, json=payload,
@@ -396,6 +399,7 @@ def to_bytes(y, sr):
396399
generated_text = ""
397400
ttft = 0.0
398401
st = time.perf_counter()
402+
output.start_time = st
399403
most_recent_timestamp = st
400404
try:
401405
async with session.post(url=api_url,
@@ -475,6 +479,7 @@ async def async_request_openai_embeddings(
475479

476480
output = RequestFuncOutput()
477481
st = time.perf_counter()
482+
output.start_time = st
478483
try:
479484
async with session.post(
480485
url=api_url,

0 commit comments

Comments
 (0)