Skip to content

Commit b65638e

Browse files
committed
[Feature][Response API] Add streaming support for non-harmony
Signed-off-by: Kebe <[email protected]>
1 parent e039407 commit b65638e

File tree

2 files changed

+377
-70
lines changed

2 files changed

+377
-70
lines changed

vllm/entrypoints/context.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,17 @@ class SimpleContext(ConversationContext):
4949

5050
def __init__(self):
5151
self.last_output = None
52+
self.num_prompt_tokens = 0
53+
self.num_output_tokens = 0
54+
self.num_cached_tokens = 0
5255

5356
def append_output(self, output) -> None:
5457
self.last_output = output
58+
if not isinstance(output, RequestOutput):
59+
raise ValueError("SimpleContext only supports RequestOutput.")
60+
self.num_prompt_tokens = len(output.prompt_token_ids)
61+
self.num_cached_tokens = output.num_cached_tokens
62+
self.num_output_tokens += len(output.outputs[0].token_ids)
5563

5664
def need_builtin_tool_call(self) -> bool:
5765
return False

0 commit comments

Comments
 (0)