vllm-project
diff --git a/‎vllm/benchmarks/lib/endpoint_request_func.py‎
Lines changed: 5 additions & 0 deletions b/‎vllm/benchmarks/lib/endpoint_request_func.py‎
Lines changed: 5 additions & 0 deletions
@@ -89,6 +89,7 @@ class RequestFuncOutput:
     tpot: float = 0.0  # avg next-token latencies
     prompt_len: int = 0
     error: str = ""
+    start_time: float = 0.0
 
 
 async def async_request_openai_completions(
@@ -140,6 +141,7 @@ async def async_request_openai_completions(
 
     generated_text = ""
     st = time.perf_counter()
+    output.start_time = st
     most_recent_timestamp = st
     try:
         async with session.post(url=api_url, json=payload,
@@ -272,6 +274,7 @@ async def async_request_openai_chat_completions(
     generated_text = ""
     ttft = 0.0
     st = time.perf_counter()
+    output.start_time = st
     most_recent_timestamp = st
     try:
         async with session.post(url=api_url, json=payload,
@@ -396,6 +399,7 @@ def to_bytes(y, sr):
         generated_text = ""
         ttft = 0.0
         st = time.perf_counter()
+        output.start_time = st
         most_recent_timestamp = st
         try:
             async with session.post(url=api_url,
@@ -475,6 +479,7 @@ async def async_request_openai_embeddings(
 
     output = RequestFuncOutput()
     st = time.perf_counter()
+    output.start_time = st
     try:
         async with session.post(
             url=api_url,