Merge pull request #162 from willccbb/cursor/update-docs-for-pydantic-type-signature-changes-6997

willccbb · web-flow · commit 64c41d105818 · 2025-07-27T18:13:36.000+02:00
diff --git a/docs/source/api_reference.md b/docs/source/api_reference.md
@@ -4,6 +4,31 @@ This guide explains the key types and data structures in Verifiers.
 
 ## Core Types
 
+### Pydantic Models
+
+Verifiers uses Pydantic models for structured data:
+
+```python
+from pydantic import BaseModel
+
+class GenerateInputs(BaseModel):
+    """Pydantic model for generation inputs."""
+    prompt: List[Messages]
+    answer: Optional[List[str]] = None
+    info: Optional[List[Dict]] = None
+    task: Optional[List[str]] = None
+    completion: Optional[List[Messages]] = None
+
+class ProcessedOutputs(BaseModel):
+    """Pydantic model for processed outputs."""
+    prompt_ids: List[List[int]]
+    prompt_mask: List[List[int]]
+    completion_ids: List[List[int]]
+    completion_mask: List[List[int]]
+    completion_logprobs: List[List[float]]
+    rewards: List[float]
+```
+
 ### State Dictionary
 
 The `State` object tracks rollout information throughout an interaction:
@@ -36,13 +61,17 @@ The `responses` field contains raw API response objects with:
 ### Message Formats
 
 ```python
+# Import from verifiers.types
+from verifiers.types import ChatMessage, Messages
+
 # Chat format (recommended)
-ChatMessage = TypedDict({
+# ChatMessage is a dict with these fields:
+ChatMessage = {
     "role": str,                    # "system", "user", or "assistant"
     "content": str,                 # Message text
     "tool_calls": List[...],        # Optional tool calls
     "tool_call_id": str,            # Optional tool call ID
-})
+}
 
 Messages = Union[str, List[ChatMessage]]  # Can be string (completion) or chat
 
@@ -86,15 +115,20 @@ def env_response(
     messages: List[ChatMessage],
     state: State,
     **kwargs
-) -> Tuple[Union[str, ChatMessage], State]:
+) -> Tuple[Messages, State]:
     """
     Returns:
-        - Response message (string or ChatMessage dict)
+        - Response messages (List[ChatMessage] or str for completion mode)
         - Updated state dictionary
     """
-    response = "Environment feedback"  # or {"role": "user", "content": "..."}
-    new_state = {**state, "turn": state.get("turn", 0) + 1}
-    return response, new_state
+    # Return a list of ChatMessage dicts (typical case)
+    response = [{"role": "user", "content": "Environment feedback"}]
+    
+    # Update state as needed
+    state["turn"] = state.get("turn", 0) + 1
+    state["last_action"] = "provided feedback"
+    
+    return response, state
 ```
 
 ### Sampling Arguments
diff --git a/docs/source/components.md b/docs/source/components.md
@@ -343,41 +343,50 @@ def load_environment(**kwargs):
 Build a Wordle-like game with multi-turn interaction:
 
 ```python
+from verifiers.types import Messages, State
+from typing import Tuple
+
 class WordleEnv(vf.MultiTurnEnv):
     def __init__(self, **kwargs):
         super().__init__(**kwargs)
         self.max_guesses = 6
     
-    def env_response(self, messages, state):
+    def env_response(self, messages: Messages, state: State) -> Tuple[Messages, State]:
         if state.get("turn", 0) == 0:
             # First turn: initialize
-            return "Guess a 5-letter word. You have 6 attempts.", {
-                "turn": 1,
-                "target": state["answer"],
-                "guesses": []
-            }
+            state["turn"] = 1
+            state["target"] = state["answer"]
+            state["guesses"] = []
+            return [{"role": "user", "content": "Guess a 5-letter word. You have 6 attempts."}], state
         
-        guess = messages[-1]["content"].strip().upper()
+        # Get the last assistant message
+        last_msg = messages[-1]
+        if last_msg["role"] != "assistant":
+            return [], state  # No response if not assistant message
+            
+        guess = last_msg["content"].strip().upper()
         target = state["target"]
         
         # Validate guess
         if len(guess) != 5 or not guess.isalpha():
-            return "Please guess a 5-letter word.", state
+            return [{"role": "user", "content": "Please guess a 5-letter word."}], state
         
         # Generate feedback
         feedback = self.get_feedback(guess, target)
         state["guesses"].append(guess)
         state["turn"] += 1
         
         if guess == target:
-            return f"Correct! The word was {target}.", {**state, "solved": True}
+            state["solved"] = True
+            return [{"role": "user", "content": f"Correct! The word was {target}."}], state
         elif state["turn"] > self.max_guesses:
-            return f"Out of guesses. The word was {target}.", {**state, "failed": True}
+            state["failed"] = True
+            return [{"role": "user", "content": f"Out of guesses. The word was {target}."}], state
         else:
             remaining = self.max_guesses - state["turn"] + 1
-            return f"{feedback}\n{remaining} guesses remaining.", state
+            return [{"role": "user", "content": f"{feedback}\n{remaining} guesses remaining."}], state
     
-    def is_completed(self, messages, state):
+    def is_completed(self, messages: Messages, state: State) -> bool:
         return state.get("solved", False) or state.get("failed", False)
 ```
 
@@ -491,4 +500,4 @@ def load_math_suite(**kwargs):
 
 - Build your own environments using these components in [Environments](environments.md)
 - Train models with your environments in [Training](training.md)
-- Understand the type system in [Type Reference](api_reference.md) 
+- Understand the type system in [Type Reference](api_reference.md)
diff --git a/docs/source/environments.md b/docs/source/environments.md
@@ -205,19 +205,32 @@ def load_environment(**kwargs):
 For interactive tasks requiring multiple steps:
 
 ```python
+from verifiers.types import Messages, State
+from typing import Tuple
+
 class MyGameEnv(vf.MultiTurnEnv):
-    def env_response(self, messages, state):
+    def env_response(self, messages: Messages, state: State) -> Tuple[Messages, State]:
         """Define how the environment responds."""
-        last_msg = messages[-1]["content"]
+        # Get the last message from the assistant
+        last_msg = messages[-1]
+        if last_msg["role"] == "assistant":
+            player_action = last_msg["content"]
+        else:
+            return [], state  # No response if not assistant message
         
+        # Check game state
         if self.is_game_over(state):
-            return "Game over!", {"done": True}
+            response = [{"role": "user", "content": "Game over!"}]
+            state["done"] = True
+            return response, state
         
         # Update game state
-        new_state = self.update_state(state, last_msg)
-        response = self.get_game_feedback(new_state)
+        state = self.update_state(state, player_action)
+        feedback = self.get_game_feedback(state)
         
-        return response, new_state
+        # Return list of ChatMessage dicts
+        response = [{"role": "user", "content": feedback}]
+        return response, state
 
 def load_environment(**kwargs):
     return MyGameEnv(dataset=dataset, **kwargs)
diff --git a/docs/source/overview.md b/docs/source/overview.md
@@ -29,13 +29,19 @@ Environment (orchestration layer)
 The base class for custom interaction protocols:
 
 ```python
+from verifiers.types import Messages, State
+from typing import Tuple
+
 class MyProtocol(vf.MultiTurnEnv):
-    def env_response(self, messages, state):
+    def env_response(self, messages: Messages, state: State) -> Tuple[Messages, State]:
         """Define how environment responds to model"""
         # Custom logic for your protocol
-        return response, new_state
+        response = [{"role": "user", "content": "Environment feedback"}]
+        # Update state
+        state["turn"] = state.get("turn", 0) + 1
+        return response, state
     
-    def is_completed(self, messages, state):
+    def is_completed(self, messages: Messages, state: State) -> bool:
         """Define when interaction ends"""
         return state.get("task_complete", False)
 ```