Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions tests/v1/entrypoints/openai/responses/test_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project

import openai # use the official client for correctness check
import openai.types.responses as openai_responses_types
import pytest


Expand Down Expand Up @@ -86,3 +87,18 @@ async def test_logprobs(client: openai.AsyncOpenAI):
outputs = response.output
assert outputs[-1].content[-1].logprobs
assert len(outputs[-1].content[-1].logprobs[0].top_logprobs) == 5


@pytest.mark.asyncio
async def test_streaming(client: openai.AsyncOpenAI):
stream = await client.responses.create(
input="What is 13 * 24?",
stream=True,
)
events = [event for event in stream]
assert isinstance(events[0], openai_responses_types.ResponseCreatedEvent)
assert any(
isinstance(event, openai_responses_types.ResponseTextDeltaEvent)
for event in events)
assert isinstance(events[-1],
openai_responses_types.ResponseCompletedEvent)
8 changes: 8 additions & 0 deletions vllm/entrypoints/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,17 @@ class SimpleContext(ConversationContext):

def __init__(self):
self.last_output = None
self.num_prompt_tokens = 0
self.num_output_tokens = 0
self.num_cached_tokens = 0

def append_output(self, output) -> None:
self.last_output = output
if not isinstance(output, RequestOutput):
raise ValueError("SimpleContext only supports RequestOutput.")
self.num_prompt_tokens = len(output.prompt_token_ids or [])
self.num_cached_tokens = output.num_cached_tokens or 0
self.num_output_tokens += len(output.outputs[0].token_ids or [])

def need_builtin_tool_call(self) -> bool:
return False
Expand Down
Loading