fix(litellm): map LiteLLM context-window errors to ContextWindowOverflowException

Ratish1 · Ratish1 · commit 9b94a1bdb0b7 · 2025-10-07T15:11:33.000+04:00
diff --git a/src/strands/models/litellm.py b/src/strands/models/litellm.py
@@ -13,6 +13,7 @@
 from typing_extensions import Unpack, override
 
 from ..types.content import ContentBlock, Messages
+from ..types.exceptions import ContextWindowOverflowException
 from ..types.streaming import StreamEvent
 from ..types.tools import ToolChoice, ToolSpec
 from ._validation import validate_config_keys
@@ -22,6 +23,15 @@
 
 T = TypeVar("T", bound=BaseModel)
 
+LITELLM_CONTEXT_WINDOW_OVERFLOW_MESSAGES = [
+    "Context Window Error",
+    "Context Window Exceeded",
+    "ContextWindowExceeded",
+    "Context window exceeded",
+    "Input is too long",
+    "ContextWindowExceededError",
+]
+
 
 class LiteLLMModel(OpenAIModel):
     """LiteLLM model provider implementation."""
@@ -135,7 +145,25 @@ async def stream(
         logger.debug("request=<%s>", request)
 
         logger.debug("invoking model")
-        response = await litellm.acompletion(**self.client_args, **request)
+        try:
+            response = await litellm.acompletion(**self.client_args, **request)
+        except Exception as e:
+            # Prefer litellm-specific typed exception if exposed
+            litellm_exc_type = getattr(litellm, "ContextWindowExceededError", None) or getattr(
+                litellm, "ContextWindowExceeded", None
+            )
+            if litellm_exc_type and isinstance(e, litellm_exc_type):
+                logger.warning("litellm client raised context window overflow")
+                raise ContextWindowOverflowException(e) from e
+
+            # Fallback to substring checks similar to Bedrock handling
+            error_message = str(e)
+            if any(substr in error_message for substr in LITELLM_CONTEXT_WINDOW_OVERFLOW_MESSAGES):
+                logger.warning("litellm threw context window overflow error")
+                raise ContextWindowOverflowException(e) from e
+
+            # Not a context-window error — re-raise original
+            raise
 
         logger.debug("got response from model")
         yield self.format_chunk({"chunk_type": "message_start"})
@@ -205,15 +233,37 @@ async def structured_output(
         Yields:
             Model events with the last being the structured output.
         """
-        if not supports_response_schema(self.get_config()["model_id"]):
+        supports_schema = supports_response_schema(self.get_config()["model_id"])
+
+        # If the provider does not support response schemas, we cannot reliably parse structured output.
+        # In that case we must not call the provider and must raise the documented ValueError.
+        if not supports_schema:
             raise ValueError("Model does not support response_format")
 
-        response = await litellm.acompletion(
-            **self.client_args,
-            model=self.get_config()["model_id"],
-            messages=self.format_request(prompt, system_prompt=system_prompt)["messages"],
-            response_format=output_model,
-        )
+        # For providers that DO support response schemas, call litellm and map context-window errors.
+        try:
+            response = await litellm.acompletion(
+                **self.client_args,
+                model=self.get_config()["model_id"],
+                messages=self.format_request(prompt, system_prompt=system_prompt)["messages"],
+                response_format=output_model,
+            )
+        except Exception as e:
+            # Prefer litellm-specific typed exception if exposed
+            litellm_exc_type = getattr(litellm, "ContextWindowExceededError", None) or getattr(
+                litellm, "ContextWindowExceeded", None
+            )
+            if litellm_exc_type and isinstance(e, litellm_exc_type):
+                logger.warning("litellm client raised context window overflow in structured_output")
+                raise ContextWindowOverflowException(e) from e
+
+            error_message = str(e)
+            if any(substr in error_message for substr in LITELLM_CONTEXT_WINDOW_OVERFLOW_MESSAGES):
+                logger.warning("litellm threw context window overflow error in structured_output")
+                raise ContextWindowOverflowException(e) from e
+
+            # Not a context-window error — re-raise original
+            raise
 
         if len(response.choices) > 1:
             raise ValueError("Multiple choices found in the response.")
diff --git a/tests/strands/models/test_litellm.py b/tests/strands/models/test_litellm.py
@@ -6,6 +6,7 @@
 
 import strands
 from strands.models.litellm import LiteLLMModel
+from strands.types.exceptions import ContextWindowOverflowException
 
 
 @pytest.fixture
@@ -301,6 +302,32 @@ async def test_structured_output_unsupported_model(litellm_acompletion, model, t
     litellm_acompletion.assert_not_called()
 
 
+@pytest.mark.asyncio
+async def test_stream_context_window_maps_to_exception(litellm_acompletion, model):
+    # Make the litellm client raise an error that indicates a context-window overflow.
+    litellm_acompletion.side_effect = Exception("Input is too long for requested model")
+
+    with pytest.raises(ContextWindowOverflowException):
+        async for _ in model.stream([{"role": "user", "content": [{"text": "x"}]}]):
+            pass
+
+
+@pytest.mark.asyncio
+async def test_structured_output_context_window_maps_to_exception(litellm_acompletion, model, test_output_model_cls):
+    # Litellm structured_output path raising similar message should be mapped too.
+    litellm_acompletion.side_effect = Exception("Context Window Error - Input too long")
+
+    # Ensure supports_response_schema returns True so structured_output will call litellm.acompletion
+    # and we can observe mapping to ContextWindowOverflowException.
+    with unittest.mock.patch.object(strands.models.litellm, "supports_response_schema", return_value=True):
+        with pytest.raises(ContextWindowOverflowException):
+            # structured_output is async generator; consuming it should raise our mapped exception.
+            async for _ in model.structured_output(
+                output_model=test_output_model_cls, prompt=[{"role": "user", "content": [{"text": "x"}]}]
+            ):
+                pass
+
+
 def test_config_validation_warns_on_unknown_keys(litellm_acompletion, captured_warnings):
     """Test that unknown config keys emit a warning."""
     LiteLLMModel(client_args={"api_key": "test"}, model_id="test-model", invalid_param="test")