Skip to content

Commit 64c41d1

Browse files
authored
Merge pull request #162 from willccbb/cursor/update-docs-for-pydantic-type-signature-changes-6997
2 parents 4c6f386 + dda1d5f commit 64c41d1

File tree

4 files changed

+91
-29
lines changed

4 files changed

+91
-29
lines changed

docs/source/api_reference.md

Lines changed: 41 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,31 @@ This guide explains the key types and data structures in Verifiers.
44

55
## Core Types
66

7+
### Pydantic Models
8+
9+
Verifiers uses Pydantic models for structured data:
10+
11+
```python
12+
from pydantic import BaseModel
13+
14+
class GenerateInputs(BaseModel):
15+
"""Pydantic model for generation inputs."""
16+
prompt: List[Messages]
17+
answer: Optional[List[str]] = None
18+
info: Optional[List[Dict]] = None
19+
task: Optional[List[str]] = None
20+
completion: Optional[List[Messages]] = None
21+
22+
class ProcessedOutputs(BaseModel):
23+
"""Pydantic model for processed outputs."""
24+
prompt_ids: List[List[int]]
25+
prompt_mask: List[List[int]]
26+
completion_ids: List[List[int]]
27+
completion_mask: List[List[int]]
28+
completion_logprobs: List[List[float]]
29+
rewards: List[float]
30+
```
31+
732
### State Dictionary
833

934
The `State` object tracks rollout information throughout an interaction:
@@ -36,13 +61,17 @@ The `responses` field contains raw API response objects with:
3661
### Message Formats
3762

3863
```python
64+
# Import from verifiers.types
65+
from verifiers.types import ChatMessage, Messages
66+
3967
# Chat format (recommended)
40-
ChatMessage = TypedDict({
68+
# ChatMessage is a dict with these fields:
69+
ChatMessage = {
4170
"role": str, # "system", "user", or "assistant"
4271
"content": str, # Message text
4372
"tool_calls": List[...], # Optional tool calls
4473
"tool_call_id": str, # Optional tool call ID
45-
})
74+
}
4675

4776
Messages = Union[str, List[ChatMessage]] # Can be string (completion) or chat
4877

@@ -86,15 +115,20 @@ def env_response(
86115
messages: List[ChatMessage],
87116
state: State,
88117
**kwargs
89-
) -> Tuple[Union[str, ChatMessage], State]:
118+
) -> Tuple[Messages, State]:
90119
"""
91120
Returns:
92-
- Response message (string or ChatMessage dict)
121+
- Response messages (List[ChatMessage] or str for completion mode)
93122
- Updated state dictionary
94123
"""
95-
response = "Environment feedback" # or {"role": "user", "content": "..."}
96-
new_state = {**state, "turn": state.get("turn", 0) + 1}
97-
return response, new_state
124+
# Return a list of ChatMessage dicts (typical case)
125+
response = [{"role": "user", "content": "Environment feedback"}]
126+
127+
# Update state as needed
128+
state["turn"] = state.get("turn", 0) + 1
129+
state["last_action"] = "provided feedback"
130+
131+
return response, state
98132
```
99133

100134
### Sampling Arguments

docs/source/components.md

Lines changed: 22 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -343,41 +343,50 @@ def load_environment(**kwargs):
343343
Build a Wordle-like game with multi-turn interaction:
344344

345345
```python
346+
from verifiers.types import Messages, State
347+
from typing import Tuple
348+
346349
class WordleEnv(vf.MultiTurnEnv):
347350
def __init__(self, **kwargs):
348351
super().__init__(**kwargs)
349352
self.max_guesses = 6
350353

351-
def env_response(self, messages, state):
354+
def env_response(self, messages: Messages, state: State) -> Tuple[Messages, State]:
352355
if state.get("turn", 0) == 0:
353356
# First turn: initialize
354-
return "Guess a 5-letter word. You have 6 attempts.", {
355-
"turn": 1,
356-
"target": state["answer"],
357-
"guesses": []
358-
}
357+
state["turn"] = 1
358+
state["target"] = state["answer"]
359+
state["guesses"] = []
360+
return [{"role": "user", "content": "Guess a 5-letter word. You have 6 attempts."}], state
359361

360-
guess = messages[-1]["content"].strip().upper()
362+
# Get the last assistant message
363+
last_msg = messages[-1]
364+
if last_msg["role"] != "assistant":
365+
return [], state # No response if not assistant message
366+
367+
guess = last_msg["content"].strip().upper()
361368
target = state["target"]
362369

363370
# Validate guess
364371
if len(guess) != 5 or not guess.isalpha():
365-
return "Please guess a 5-letter word.", state
372+
return [{"role": "user", "content": "Please guess a 5-letter word."}], state
366373

367374
# Generate feedback
368375
feedback = self.get_feedback(guess, target)
369376
state["guesses"].append(guess)
370377
state["turn"] += 1
371378

372379
if guess == target:
373-
return f"Correct! The word was {target}.", {**state, "solved": True}
380+
state["solved"] = True
381+
return [{"role": "user", "content": f"Correct! The word was {target}."}], state
374382
elif state["turn"] > self.max_guesses:
375-
return f"Out of guesses. The word was {target}.", {**state, "failed": True}
383+
state["failed"] = True
384+
return [{"role": "user", "content": f"Out of guesses. The word was {target}."}], state
376385
else:
377386
remaining = self.max_guesses - state["turn"] + 1
378-
return f"{feedback}\n{remaining} guesses remaining.", state
387+
return [{"role": "user", "content": f"{feedback}\n{remaining} guesses remaining."}], state
379388

380-
def is_completed(self, messages, state):
389+
def is_completed(self, messages: Messages, state: State) -> bool:
381390
return state.get("solved", False) or state.get("failed", False)
382391
```
383392

@@ -491,4 +500,4 @@ def load_math_suite(**kwargs):
491500

492501
- Build your own environments using these components in [Environments](environments.md)
493502
- Train models with your environments in [Training](training.md)
494-
- Understand the type system in [Type Reference](api_reference.md)
503+
- Understand the type system in [Type Reference](api_reference.md)

docs/source/environments.md

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -205,19 +205,32 @@ def load_environment(**kwargs):
205205
For interactive tasks requiring multiple steps:
206206

207207
```python
208+
from verifiers.types import Messages, State
209+
from typing import Tuple
210+
208211
class MyGameEnv(vf.MultiTurnEnv):
209-
def env_response(self, messages, state):
212+
def env_response(self, messages: Messages, state: State) -> Tuple[Messages, State]:
210213
"""Define how the environment responds."""
211-
last_msg = messages[-1]["content"]
214+
# Get the last message from the assistant
215+
last_msg = messages[-1]
216+
if last_msg["role"] == "assistant":
217+
player_action = last_msg["content"]
218+
else:
219+
return [], state # No response if not assistant message
212220

221+
# Check game state
213222
if self.is_game_over(state):
214-
return "Game over!", {"done": True}
223+
response = [{"role": "user", "content": "Game over!"}]
224+
state["done"] = True
225+
return response, state
215226

216227
# Update game state
217-
new_state = self.update_state(state, last_msg)
218-
response = self.get_game_feedback(new_state)
228+
state = self.update_state(state, player_action)
229+
feedback = self.get_game_feedback(state)
219230

220-
return response, new_state
231+
# Return list of ChatMessage dicts
232+
response = [{"role": "user", "content": feedback}]
233+
return response, state
221234

222235
def load_environment(**kwargs):
223236
return MyGameEnv(dataset=dataset, **kwargs)

docs/source/overview.md

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,13 +29,19 @@ Environment (orchestration layer)
2929
The base class for custom interaction protocols:
3030

3131
```python
32+
from verifiers.types import Messages, State
33+
from typing import Tuple
34+
3235
class MyProtocol(vf.MultiTurnEnv):
33-
def env_response(self, messages, state):
36+
def env_response(self, messages: Messages, state: State) -> Tuple[Messages, State]:
3437
"""Define how environment responds to model"""
3538
# Custom logic for your protocol
36-
return response, new_state
39+
response = [{"role": "user", "content": "Environment feedback"}]
40+
# Update state
41+
state["turn"] = state.get("turn", 0) + 1
42+
return response, state
3743

38-
def is_completed(self, messages, state):
44+
def is_completed(self, messages: Messages, state: State) -> bool:
3945
"""Define when interaction ends"""
4046
return state.get("task_complete", False)
4147
```

0 commit comments

Comments
 (0)