Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions llama-index-core/llama_index/core/base/llms/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -425,6 +425,24 @@ def validate_cited_content(cls, v: Any) -> Any:
return v


class ThinkingBlock(BaseModel):
"""A representation of the content streamed from reasoning/thinking processes by LLMs"""

block_type: Literal["thinking"] = "thinking"
content: Optional[str] = Field(
description="Content of the reasoning/thinking process, if available",
default=None,
)
num_tokens: Optional[int] = Field(
description="Number of token used for reasoning/thinking, if available",
default=None,
)
additional_information: Dict[str, Any] = Field(
description="Additional information related to the thinking/reasoning process, if available",
default_factory=dict,
)


ContentBlock = Annotated[
Union[
TextBlock,
Expand All @@ -435,6 +453,7 @@ def validate_cited_content(cls, v: Any) -> Any:
CachePoint,
CitableBlock,
CitationBlock,
ThinkingBlock,
],
Field(discriminator="block_type"),
]
Expand Down
2 changes: 2 additions & 0 deletions llama-index-core/llama_index/core/memory/memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
CachePoint,
CitableBlock,
CitationBlock,
ThinkingBlock,
)
from llama_index.core.bridge.pydantic import (
BaseModel,
Expand Down Expand Up @@ -343,6 +344,7 @@ def _estimate_token_count(
DocumentBlock,
CitableBlock,
CitationBlock,
ThinkingBlock,
]
] = []

Expand Down
18 changes: 18 additions & 0 deletions llama-index-core/tests/base/llms/test_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
AudioBlock,
CachePoint,
CacheControl,
ThinkingBlock,
)
from llama_index.core.bridge.pydantic import BaseModel
from llama_index.core.bridge.pydantic import ValidationError
Expand Down Expand Up @@ -455,3 +456,20 @@ def test_video_block_store_as_base64(mp4_bytes: bytes, mp4_base64: bytes):
assert VideoBlock(video=mp4_bytes).video == mp4_base64
# Store already encoded data
assert VideoBlock(video=mp4_base64).video == mp4_base64


def test_thinking_block():
block = ThinkingBlock()
assert block.block_type == "thinking"
assert block.additional_information == {}
assert block.content is None
assert block.num_tokens is None
block = ThinkingBlock(
content="hello world",
num_tokens=100,
additional_information={"total_thinking_tokens": 1000},
)
assert block.block_type == "thinking"
assert block.additional_information == {"total_thinking_tokens": 1000}
assert block.content == "hello world"
assert block.num_tokens == 100
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
)
from llama_index.core.base.llms.types import TextBlock as LITextBlock
from llama_index.core.base.llms.types import CitationBlock as LICitationBlock
from llama_index.core.base.llms.types import ThinkingBlock as LIThinkingBlock
from llama_index.core.bridge.pydantic import Field, PrivateAttr
from llama_index.core.callbacks import CallbackManager
from llama_index.core.constants import DEFAULT_TEMPERATURE
Expand Down Expand Up @@ -204,6 +205,9 @@ def __init__(
) -> None:
additional_kwargs = additional_kwargs or {}
callback_manager = callback_manager or CallbackManager([])
# set the temperature to 1 when thinking is enabled, as per: https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#important-considerations-when-using-extended-thinking
if thinking_dict and thinking_dict.get("type") == "enabled":
temperature = 1

super().__init__(
temperature=temperature,
Expand Down Expand Up @@ -340,11 +344,8 @@ def _completion_response_from_chat_response(

def _get_blocks_and_tool_calls_and_thinking(
self, response: Any
) -> Tuple[
List[ContentBlock], List[Dict[str, Any]], Dict[str, Any], List[Dict[str, Any]]
]:
) -> Tuple[List[ContentBlock], List[Dict[str, Any]], List[Dict[str, Any]]]:
tool_calls = []
thinking = None
blocks: List[ContentBlock] = []
citations: List[TextCitation] = []
tracked_citations: Set[str] = set()
Expand Down Expand Up @@ -375,11 +376,18 @@ def _get_blocks_and_tool_calls_and_thinking(
citations.extend(content_block.citations)
# this assumes a single thinking block, which as of 2025-03-06, is always true
elif isinstance(content_block, ThinkingBlock):
thinking = content_block.model_dump()
blocks.append(
LIThinkingBlock(
content=content_block.thinking,
additional_information=content_block.model_dump(
exclude={"thinking"}
),
)
)
elif isinstance(content_block, ToolUseBlock):
tool_calls.append(content_block.model_dump())

return blocks, tool_calls, thinking, [x.model_dump() for x in citations]
return blocks, tool_calls, [x.model_dump() for x in citations]

@llm_chat_callback()
def chat(
Expand All @@ -397,8 +405,8 @@ def chat(
**all_kwargs,
)

blocks, tool_calls, thinking, citations = (
self._get_blocks_and_tool_calls_and_thinking(response)
blocks, tool_calls, citations = self._get_blocks_and_tool_calls_and_thinking(
response
)

return AnthropicChatResponse(
Expand All @@ -407,7 +415,6 @@ def chat(
blocks=blocks,
additional_kwargs={
"tool_calls": tool_calls,
"thinking": thinking,
},
),
citations=citations,
Expand Down Expand Up @@ -570,8 +577,8 @@ async def achat(
**all_kwargs,
)

blocks, tool_calls, thinking, citations = (
self._get_blocks_and_tool_calls_and_thinking(response)
blocks, tool_calls, citations = self._get_blocks_and_tool_calls_and_thinking(
response
)

return AnthropicChatResponse(
Expand All @@ -580,7 +587,6 @@ async def achat(
blocks=blocks,
additional_kwargs={
"tool_calls": tool_calls,
"thinking": thinking,
},
),
citations=citations,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
CachePoint,
CitableBlock,
CitationBlock,
ThinkingBlock,
ContentBlock,
)

Expand All @@ -26,7 +27,7 @@
CacheControlEphemeralParam,
Base64PDFSourceParam,
)
from anthropic.types import ContentBlock as AnthropicContentBlock
from anthropic.types import ContentBlockParam as AnthropicContentBlock
from anthropic.types.beta import (
BetaSearchResultBlockParam,
BetaTextBlockParam,
Expand Down Expand Up @@ -201,9 +202,6 @@ def blocks_to_anthropic_blocks(
if kwargs.get("cache_control"):
global_cache_control = CacheControlEphemeralParam(**kwargs["cache_control"])

if kwargs.get("thinking"):
anthropic_blocks.append(ThinkingBlockParam(**kwargs["thinking"]))

for block in blocks:
if isinstance(block, TextBlock):
if block.text:
Expand Down Expand Up @@ -251,6 +249,17 @@ def blocks_to_anthropic_blocks(
if global_cache_control:
anthropic_blocks[-1]["cache_control"] = global_cache_control

elif isinstance(block, ThinkingBlock):
if block.content:
signature = block.additional_information.get("signature", "")
anthropic_blocks.append(
ThinkingBlockParam(
signature=signature, thinking=block.content, type="thinking"
)
)
if global_cache_control:
anthropic_blocks[-1]["cache_control"] = global_cache_control

elif isinstance(block, CachePoint):
if len(anthropic_blocks) > 0:
anthropic_blocks[-1]["cache_control"] = CacheControlEphemeralParam(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ dev = [

[project]
name = "llama-index-llms-anthropic"
version = "0.8.6"
version = "0.9.0"
description = "llama-index llms anthropic integration"
authors = [{name = "Your Name", email = "[email protected]"}]
requires-python = ">=3.9,<4.0"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
CachePoint,
CacheControl,
)
from llama_index.core.base.llms.types import ThinkingBlock
from llama_index.core.tools import FunctionTool
from llama_index.llms.anthropic import Anthropic
from llama_index.llms.anthropic.base import AnthropicChatResponse
Expand Down Expand Up @@ -384,6 +385,59 @@ def test_cache_point_to_cache_control() -> None:
assert ant_messages[0]["content"][-1]["cache_control"]["ttl"] == "5m"


def test_thinking_input():
messages = [
ChatMessage(
role="assistant",
blocks=[
ThinkingBlock(content="Hello"),
TextBlock(text="World"),
],
),
]
ant_messages, _ = messages_to_anthropic_messages(messages)
assert ant_messages[0]["role"] == "assistant"
assert ant_messages[0]["content"][0]["type"] == "thinking"
assert ant_messages[0]["content"][0]["thinking"] == "Hello"
assert ant_messages[0]["content"][1]["type"] == "text"
assert ant_messages[0]["content"][1]["text"] == "World"


@pytest.mark.skipif(
os.getenv("ANTHROPIC_API_KEY") is None,
reason="Anthropic API key not available to test Anthropic document uploading ",
)
def test_thinking():
llm = Anthropic(
model="claude-sonnet-4-0",
# max_tokens must be greater than budget_tokens
max_tokens=64000,
# temperature must be 1.0 for thinking to work
temperature=1.0,
thinking_dict={"type": "enabled", "budget_tokens": 1600},
)
res = llm.chat(
messages=[
ChatMessage(
content="Please solve the following equation for x: x^2+12x+7=0. Please think before providing a response."
)
]
)
assert any(isinstance(block, ThinkingBlock) for block in res.message.blocks)
assert (
len(
"".join(
[
block.content or ""
for block in res.message.blocks
if isinstance(block, ThinkingBlock)
]
)
)
> 0
)


@pytest.mark.skipif(
os.getenv("ANTHROPIC_API_KEY") is None,
reason="Anthropic API key not available to test Anthropic document uploading ",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
CompletionResponseGen,
LLMMetadata,
MessageRole,
ThinkingBlock,
)
from llama_index.core.bridge.pydantic import BaseModel, Field, PrivateAttr
from llama_index.core.callbacks import CallbackManager
Expand Down Expand Up @@ -393,7 +394,7 @@ def gen() -> ChatResponseGen:
)
llama_resp.delta = content_delta
llama_resp.message.content = content
llama_resp.message.additional_kwargs["thoughts"] = thoughts
llama_resp.message.blocks.append(ThinkingBlock(content=thoughts))
llama_resp.message.additional_kwargs["tool_calls"] = existing_tool_calls
yield llama_resp

Expand Down Expand Up @@ -453,7 +454,9 @@ async def gen() -> ChatResponseAsyncGen:
)
llama_resp.delta = content_delta
llama_resp.message.content = content
llama_resp.message.additional_kwargs["thoughts"] = thoughts
llama_resp.message.blocks.append(
ThinkingBlock(content=thoughts)
)
llama_resp.message.additional_kwargs["tool_calls"] = (
existing_tool_calls
)
Expand Down
Loading
Loading