Skip to content

[Bug]: DocumentBlock is not working with OpenAI ChatMessage #20120

@MohammedShokr

Description

@MohammedShokr

Bug Description

I have a FunctionAgent that uses gpt-4o model from OpenAI, I need to allow the user to attach files, images with his request. When I tried to use DocumentBlock inside ChatMessage, I got the following error BadRequestError: Error code: 400 - {'error': {'message': "Invalid value: 'input_file'. Supported values are: 'text', 'image_url', 'input_audio', 'refusal', 'audio', and 'file'.", 'type': 'invalid_request_error', 'param': 'messages[0].content[1].type', 'code': 'invalid_value'}}

Version

0.14.5

Steps to Reproduce

Run this simple agent

from llama_index.core.agent.workflow import FunctionAgent

from llama_index.core.tools import FunctionTool
from llama_index.llms.openai import OpenAI
from llama_index.core.agent.workflow import FunctionAgent
from llama_index.core.llms import ChatMessage, ImageBlock, TextBlock, DocumentBlock

def add(x: int, y: int) -> int:
    """Useful function to add two numbers."""
    return x + y


def multiply(x: int, y: int) -> int:
    """Useful function to multiply two numbers."""
    return x * y


tools = [
    FunctionTool.from_defaults(add),
    FunctionTool.from_defaults(multiply),
]

agent = FunctionAgent(
    llm=OpenAI(model="gpt-4o"), tools=tools, timeout=120, verbose=True
)

msg = ChatMessage(
    role="user",
    blocks=[
        TextBlock(text="summarize the following document"),
        DocumentBlock(path="./data/doc2.pdf"),
    ],
)

ret = await agent.run(msg)

Relevant Logs/Tracbacks

---------------------------------------------------------------------------
BadRequestError                           Traceback (most recent call last)
Cell In[2], line 35
     23 agent = FunctionAgent(
     24     llm=OpenAI(model="gpt-4o"), tools=tools, timeout=120, verbose=True
     25 )
     27 msg = ChatMessage(
     28     role="user",
     29     blocks=[
   (...)     32     ],
     33 )
---> 35 ret = await agent.run(msg)

File ~/workspace/ai-service-iadam-agent/.venv/lib/python3.12/site-packages/workflows/workflow.py:439, in Workflow.run.<locals>._run_workflow(ctx)
    435 if exception_raised:
    436     # cancel the stream
    437     ctx.write_event_to_stream(StopEvent())
--> 439     raise exception_raised
    441 if not we_done:
    442     # cancel the stream
    443     ctx.write_event_to_stream(StopEvent())

File ~/workspace/ai-service-iadam-agent/.venv/lib/python3.12/site-packages/workflows/context/context.py:822, in Context._step_worker(self, name, step, config, verbose, run_id, worker_id, resource_manager)
    813 self.write_event_to_stream(
    814     StepStateChanged(
    815         name=name,
   (...)    819     )
    820 )
    821 try:
--> 822     new_ev = await instrumented_step(**kwargs)
    823     kwargs.clear()
    824     break  # exit the retrying loop

File ~/workspace/ai-service-iadam-agent/.venv/lib/python3.12/site-packages/llama_index_instrumentation/dispatcher.py:386, in Dispatcher.span.<locals>.async_wrapper(func, instance, args, kwargs)
    378 self.span_enter(
    379     id_=id_,
    380     bound_args=bound_args,
   (...)    383     tags=tags,
    384 )
    385 try:
--> 386     result = await func(*args, **kwargs)
    387 except BaseException as e:
    388     self.event(SpanDropEvent(span_id=id_, err_str=str(e)))

File ~/workspace/ai-service-iadam-agent/.venv/lib/python3.12/site-packages/llama_index/core/agent/workflow/base_agent.py:390, in BaseWorkflowAgent.run_agent_step(self, ctx, ev)
    387 user_msg_str = await ctx.store.get("user_msg_str")
    388 tools = await self.get_tools(user_msg_str or "")
--> 390 agent_output = await self.take_step(
    391     ctx,
    392     ev.input,
    393     tools,
    394     memory,
    395 )
    397 ctx.write_event_to_stream(agent_output)
    398 return agent_output

File ~/workspace/ai-service-iadam-agent/.venv/lib/python3.12/site-packages/llama_index/core/agent/workflow/function_agent.py:120, in FunctionAgent.take_step(self, ctx, llm_input, tools, memory)
    115 ctx.write_event_to_stream(
    116     AgentInput(input=current_llm_input, current_agent_name=self.name)
    117 )
    119 if self.streaming:
--> 120     last_chat_response = await self._get_streaming_response(
    121         ctx, current_llm_input, tools
    122     )
    123 else:
    124     last_chat_response = await self._get_response(current_llm_input, tools)

File ~/workspace/ai-service-iadam-agent/.venv/lib/python3.12/site-packages/llama_index/core/agent/workflow/function_agent.py:75, in FunctionAgent._get_streaming_response(self, ctx, current_llm_input, tools)
     72 # last_chat_response will be used later, after the loop.
     73 # We initialize it so it's valid even when 'response' is empty
     74 last_chat_response = ChatResponse(message=ChatMessage())
---> 75 async for last_chat_response in response:
     76     tool_calls = self.llm.get_tool_calls_from_response(  # type: ignore
     77         last_chat_response, error_on_no_tool_call=False
     78     )
     79     raw = (
     80         last_chat_response.raw.model_dump()
     81         if isinstance(last_chat_response.raw, BaseModel)
     82         else last_chat_response.raw
     83     )

File ~/workspace/ai-service-iadam-agent/.venv/lib/python3.12/site-packages/llama_index/core/llms/callbacks.py:89, in llm_chat_callback.<locals>.wrap.<locals>.wrapped_async_llm_chat.<locals>.wrapped_gen()
     87 last_response = None
     88 try:
---> 89     async for x in f_return_val:
     90         dispatcher.event(
     91             LLMChatInProgressEvent(
     92                 messages=messages,
   (...)     95             )
     96         )
     97         yield cast(ChatResponse, x)

File ~/workspace/ai-service-iadam-agent/.venv/lib/python3.12/site-packages/llama_index/llms/openai/base.py:803, in OpenAI._astream_chat.<locals>.gen()
    801 is_function = False
    802 first_chat_chunk = True
--> 803 async for response in await aclient.chat.completions.create(
    804     messages=message_dicts,
    805     **self._get_model_kwargs(stream=True, **kwargs),
    806 ):
    807     blocks = []
    808     response = cast(ChatCompletionChunk, response)

File ~/workspace/ai-service-iadam-agent/.venv/lib/python3.12/site-packages/openai/resources/chat/completions/completions.py:2585, in AsyncCompletions.create(self, messages, model, audio, frequency_penalty, function_call, functions, logit_bias, logprobs, max_completion_tokens, max_tokens, metadata, modalities, n, parallel_tool_calls, prediction, presence_penalty, prompt_cache_key, reasoning_effort, response_format, safety_identifier, seed, service_tier, stop, store, stream, stream_options, temperature, tool_choice, tools, top_logprobs, top_p, user, verbosity, web_search_options, extra_headers, extra_query, extra_body, timeout)
   2539 @required_args(["messages", "model"], ["messages", "model", "stream"])
   2540 async def create(
   2541     self,
   (...)   2582     timeout: float | httpx.Timeout | None | NotGiven = not_given,
   2583 ) -> ChatCompletion | AsyncStream[ChatCompletionChunk]:
   2584     validate_response_format(response_format)
-> 2585     return await self._post(
   2586         "/chat/completions",
   2587         body=await async_maybe_transform(
   2588             {
   2589                 "messages": messages,
   2590                 "model": model,
   2591                 "audio": audio,
   2592                 "frequency_penalty": frequency_penalty,
   2593                 "function_call": function_call,
   2594                 "functions": functions,
   2595                 "logit_bias": logit_bias,
   2596                 "logprobs": logprobs,
   2597                 "max_completion_tokens": max_completion_tokens,
   2598                 "max_tokens": max_tokens,
   2599                 "metadata": metadata,
   2600                 "modalities": modalities,
   2601                 "n": n,
   2602                 "parallel_tool_calls": parallel_tool_calls,
   2603                 "prediction": prediction,
   2604                 "presence_penalty": presence_penalty,
   2605                 "prompt_cache_key": prompt_cache_key,
   2606                 "reasoning_effort": reasoning_effort,
   2607                 "response_format": response_format,
   2608                 "safety_identifier": safety_identifier,
   2609                 "seed": seed,
   2610                 "service_tier": service_tier,
   2611                 "stop": stop,
   2612                 "store": store,
   2613                 "stream": stream,
   2614                 "stream_options": stream_options,
   2615                 "temperature": temperature,
   2616                 "tool_choice": tool_choice,
   2617                 "tools": tools,
   2618                 "top_logprobs": top_logprobs,
   2619                 "top_p": top_p,
   2620                 "user": user,
   2621                 "verbosity": verbosity,
   2622                 "web_search_options": web_search_options,
   2623             },
   2624             completion_create_params.CompletionCreateParamsStreaming
   2625             if stream
   2626             else completion_create_params.CompletionCreateParamsNonStreaming,
   2627         ),
   2628         options=make_request_options(
   2629             extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
   2630         ),
   2631         cast_to=ChatCompletion,
   2632         stream=stream or False,
   2633         stream_cls=AsyncStream[ChatCompletionChunk],
   2634     )

File ~/workspace/ai-service-iadam-agent/.venv/lib/python3.12/site-packages/openai/_base_client.py:1794, in AsyncAPIClient.post(self, path, cast_to, body, files, options, stream, stream_cls)
   1780 async def post(
   1781     self,
   1782     path: str,
   (...)   1789     stream_cls: type[_AsyncStreamT] | None = None,
   1790 ) -> ResponseT | _AsyncStreamT:
   1791     opts = FinalRequestOptions.construct(
   1792         method="post", url=path, json_data=body, files=await async_to_httpx_files(files), **options
   1793     )
-> 1794     return await self.request(cast_to, opts, stream=stream, stream_cls=stream_cls)

File ~/workspace/ai-service-iadam-agent/.venv/lib/python3.12/site-packages/openai/_base_client.py:1594, in AsyncAPIClient.request(self, cast_to, options, stream, stream_cls)
   1591             await err.response.aread()
   1593         log.debug("Re-raising status error")
-> 1594         raise self._make_status_error_from_response(err.response) from None
   1596     break
   1598 assert response is not None, "could not resolve response (should never happen)"

BadRequestError: Error code: 400 - {'error': {'message': "Invalid value: 'input_file'. Supported values are: 'text', 'image_url', 'input_audio', 'refusal', 'audio', and 'file'.", 'type': 'invalid_request_error', 'param': 'messages[0].content[1].type', 'code': 'invalid_value'}}

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't workingtriageIssue needs to be triaged/prioritized

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions