Skip to content
39 changes: 37 additions & 2 deletions pydantic_ai_slim/pydantic_ai/messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,6 +385,25 @@ def format(self) -> ImageFormat:
"""
return _image_format_lookup[self.media_type]

def _is_text_like_media_type(media_type: str) -> bool:
return (
media_type.startswith('text/')
or media_type == 'application/json'
or media_type.endswith('+json')
or media_type == 'application/xml'
or media_type.endswith('+xml')
or media_type in ('application/x-yaml', 'application/yaml')
)

def _inline_text_file_part(text: str, *, media_type: str, identifier: str):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This doesn't return a "part" so the name should be changed

text = '\n'.join(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can return this directly, no need for the text variable

[
f'-----BEGIN FILE id="{identifier}" type="{media_type}"-----',
text,
f'-----END FILE id="{identifier}"-----',
]
)
return text

@dataclass(init=False, repr=False)
class DocumentUrl(FileUrl):
Expand Down Expand Up @@ -457,6 +476,14 @@ def format(self) -> DocumentFormat:
return _document_format_lookup[media_type]
except KeyError as e:
raise ValueError(f'Unknown document media type: {media_type}') from e

@staticmethod
def is_text_like_media_type(self) -> bool:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The self variable name is confusing; this is media_type: str right?

return _is_text_like_media_type(self)

@staticmethod
def inline_text_file_part(text: str, *, media_type: str, identifier: str) -> str:
return _inline_text_file_part(text, media_type=media_type, identifier=identifier)


@dataclass(init=False, repr=False)
Expand Down Expand Up @@ -513,9 +540,17 @@ def narrow_type(bc: BinaryContent) -> BinaryContent | BinaryImage:
identifier=bc.identifier,
vendor_metadata=bc.vendor_metadata,
)
else:
return bc
else:
return bc # pragma: no cover
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This pragma: no cover shouldn't be necessary


@staticmethod
def is_text_like_media_type(self) -> bool:
return _is_text_like_media_type(self)

@staticmethod
def inline_text_file_part(text: str, *, media_type: str, identifier: str) -> str:
return _inline_text_file_part(text, media_type=media_type, identifier=identifier)

@classmethod
def from_data_uri(cls, data_uri: str) -> BinaryContent:
"""Create a `BinaryContent` from a data URI."""
Expand Down
43 changes: 37 additions & 6 deletions pydantic_ai_slim/pydantic_ai/models/google.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
BinaryContent,
BuiltinToolCallPart,
BuiltinToolReturnPart,
DocumentUrl,
FilePart,
FileUrl,
FinishReason,
Expand Down Expand Up @@ -565,17 +566,46 @@ async def _map_user_prompt(self, part: UserPromptPart) -> list[PartDict]:
if isinstance(item, str):
content.append({'text': item})
elif isinstance(item, BinaryContent):
inline_data_dict: BlobDict = {'data': item.data, 'mime_type': item.media_type}
part_dict: PartDict = {'inline_data': inline_data_dict}
if item.vendor_metadata:
part_dict['video_metadata'] = cast(VideoMetadataDict, item.vendor_metadata)
content.append(part_dict)
if BinaryContent.is_text_like_media_type(item.media_type):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should also update the OpenAI implementation to use the new methods

content.append(
BinaryContent.inline_text_file_part(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The error you asked about in the other thread is because content is a list of PartDicts, but inline_text_file_part returns a str. Like on line 567, you need to wrap it in {'text': ...}.

Note that if you were running a type checker like pyright in your IDE or manually, it would have told you this.

item.data.decode('utf-8'),
media_type=item.media_type,
identifier=item.identifier,
)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of passing 3 fields of item to the static method, could it be an instance method on item?

)
else:
inline_data_dict: BlobDict = {'data': item.data, 'mime_type': item.media_type}
part_dict: PartDict = {'inline_data': inline_data_dict}
if item.vendor_metadata:
part_dict['video_metadata'] = cast(VideoMetadataDict, item.vendor_metadata)
content.append(part_dict)

elif isinstance(item, DocumentUrl):
if DocumentUrl.is_text_like_media_type(item.media_type):
downloaded_text = await download_item(item, data_format='text')
content.append(
DocumentUrl.inline_text_file_part(
downloaded_text['data'],
media_type=item.media_type,
identifier=item.identifier,
)
)
else:
downloaded_item = await download_item(item, data_format='bytes')
inline_data_dict: BlobDict = {
'data': downloaded_item['data'],
'mime_type': downloaded_item['data_type'],
}
content.append({'inline_data': inline_data_dict})

elif isinstance(item, VideoUrl) and item.is_youtube:
file_data_dict: FileDataDict = {'file_uri': item.url, 'mime_type': item.media_type}
part_dict: PartDict = {'file_data': file_data_dict}
if item.vendor_metadata: # pragma: no branch
part_dict['video_metadata'] = cast(VideoMetadataDict, item.vendor_metadata)
content.append(part_dict)

elif isinstance(item, FileUrl):
if item.force_download or (
# google-gla does not support passing file urls directly, except for youtube videos
Expand All @@ -594,7 +624,8 @@ async def _map_user_prompt(self, part: UserPromptPart) -> list[PartDict]:
content.append({'file_data': file_data_dict}) # pragma: lax no cover
else:
assert_never(item)
return content

return content

def _map_response_schema(self, o: OutputObjectDefinition) -> dict[str, Any]:
response_schema = o.json_schema.copy()
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
interactions:
- request:
body: ''
headers:
accept:
- '*/*'
accept-encoding:
- gzip, deflate
connection:
- keep-alive
host:
- kamalscraping-collab.github.io
method: GET
uri: https://kamalscraping-collab.github.io/sample-data/sample_transcript.json
response:
body:
string: |-
{
"items": [
{
"id": "GR_ad8d2a461fc5",
"type": "message",
"role": "assistant",
"content": [
"Hello, how can I help you today?"
],
"interrupted": false
},
{
"id": "item_13ecd51e0dcc",
"type": "function_call",
"call_id": "function-call-18124021183837676163",
"arguments": "{\"location\": \"Kampala, Uganda\"}",
"name": "lookup_weather"
},
{
"id": "GI_14a70e7c2d20",
"type": "message",
"role": "user",
"content": [
"Haide, can you please tell me the weather in compiler Uganda"
],
"interrupted": false
},
{
"id": "item_000f739d4414",
"type": "function_call_output",
"name": "lookup_weather",
"call_id": "function-call-18124021183837676163",
"output": "{'weather': 'sunny', 'temperature_f': 70}",
"is_error": false
},
{
"id": "GR_95c91db6b975",
"type": "message",
"role": "assistant",
"content": [
"The weather in Kampala, Uganda is sunny with a temperature of 70 degrees Fahrenheit."
],
"interrupted": false
},
{
"id": "GI_c8cc9177073f",
"type": "message",
"role": "user",
"content": [
"what can you please tell me what are the best things to do in compiler you're"
],
"interrupted": false
},
{
"id": "GR_792c5f6fbc89",
"type": "message",
"role": "assistant",
"content": [
"While I can tell you the weather, I'm not able to provide information on the best things to do in a specific location. Is there anything else I can help you with?"
],
"interrupted": false
}
]
}
headers:
cache-control:
- max-age=604800
- public
connection:
- keep-alive
content-length:
- '2574'
content-type:
- text/plain; charset=UTF-8
etag:
- W/"61efea10-a0e"
expires:
- Fri, 26 Dec 2025 16:42:28 GMT
last-modified:
- Tue, 25 Jan 2022 12:16:16 GMT
strict-transport-security:
- max-age=15552000; includeSubDomains
transfer-encoding:
- chunked
vary:
- Accept-Encoding
status:
code: 200
message: OK
- request:
headers:
accept:
- '*/*'
accept-encoding:
- gzip, deflate
connection:
- keep-alive
content-length:
- '3701'
content-type:
- application/json
host:
- generativelanguage.googleapis.com
method: POST
parsed_body:
contents:
- parts:
- text: What is the main content on this document?
- inlineData:
data: VFhUIHRlc3QgZmlsZQpQdXJwb3NlOiBQcm92aWRlIGV4YW1wbGUgb2YgdGhpcyBmaWxlIHR5cGUKRG9jdW1lbnQgZmlsZSB0eXBlOiBUWFQKVmVyc2lvbjogMS4wClJlbWFyazoKCkV4YW1wbGUgY29udGVudDoKVGhlIG5hbWVzICJKb2huIERvZSIgZm9yIG1hbGVzLCAiSmFuZSBEb2UiIG9yICJKYW5lIFJvZSIgZm9yIGZlbWFsZXMsIG9yICJKb25uaWUgRG9lIiBhbmQgIkphbmllIERvZSIgZm9yIGNoaWxkcmVuLCBvciBqdXN0ICJEb2UiIG5vbi1nZW5kZXItc3BlY2lmaWNhbGx5IGFyZSB1c2VkIGFzIHBsYWNlaG9sZGVyIG5hbWVzIGZvciBhIHBhcnR5IHdob3NlIHRydWUgaWRlbnRpdHkgaXMgdW5rbm93biBvciBtdXN0IGJlIHdpdGhoZWxkIGluIGEgbGVnYWwgYWN0aW9uLCBjYXNlLCBvciBkaXNjdXNzaW9uLiBUaGUgbmFtZXMgYXJlIGFsc28gdXNlZCB0byByZWZlciB0byBhY29ycHNlIG9yIGhvc3BpdGFsIHBhdGllbnQgd2hvc2UgaWRlbnRpdHkgaXMgdW5rbm93bi4gVGhpcyBwcmFjdGljZSBpcyB3aWRlbHkgdXNlZCBpbiB0aGUgVW5pdGVkIFN0YXRlcyBhbmQgQ2FuYWRhLCBidXQgaXMgcmFyZWx5IHVzZWQgaW4gb3RoZXIgRW5nbGlzaC1zcGVha2luZyBjb3VudHJpZXMgaW5jbHVkaW5nIHRoZSBVbml0ZWQgS2luZ2RvbSBpdHNlbGYsIGZyb20gd2hlcmUgdGhlIHVzZSBvZiAiSm9obiBEb2UiIGluIGEgbGVnYWwgY29udGV4dCBvcmlnaW5hdGVzLiBUaGUgbmFtZXMgSm9lIEJsb2dncyBvciBKb2huIFNtaXRoIGFyZSB1c2VkIGluIHRoZSBVSyBpbnN0ZWFkLCBhcyB3ZWxsIGFzIGluIEF1c3RyYWxpYSBhbmQgTmV3IFplYWxhbmQuCgpKb2huIERvZSBpcyBzb21ldGltZXMgdXNlZCB0byByZWZlciB0byBhIHR5cGljYWwgbWFsZSBpbiBvdGhlciBjb250ZXh0cyBhcyB3ZWxsLCBpbiBhIHNpbWlsYXIgbWFubmVyIHRvIEpvaG4gUS4gUHVibGljLCBrbm93biBpbiBHcmVhdCBCcml0YWluIGFzIEpvZSBQdWJsaWMsIEpvaG4gU21pdGggb3IgSm9lIEJsb2dncy4gRm9yIGV4YW1wbGUsIHRoZSBmaXJzdCBuYW1lIGxpc3RlZCBvbiBhIGZvcm0gaXMgb2Z0ZW4gSm9obiBEb2UsIGFsb25nIHdpdGggYSBmaWN0aW9uYWwgYWRkcmVzcyBvciBvdGhlciBmaWN0aW9uYWwgaW5mb3JtYXRpb24gdG8gcHJvdmlkZSBhbiBleGFtcGxlIG9mIGhvdyB0byBmaWxsIGluIHRoZSBmb3JtLiBUaGUgbmFtZSBpcyBhbHNvIHVzZWQgZnJlcXVlbnRseSBpbiBwb3B1bGFyIGN1bHR1cmUsIGZvciBleGFtcGxlIGluIHRoZSBGcmFuayBDYXByYSBmaWxtIE1lZXQgSm9obiBEb2UuIEpvaG4gRG9lIHdhcyBhbHNvIHRoZSBuYW1lIG9mIGEgMjAwMiBBbWVyaWNhbiB0ZWxldmlzaW9uIHNlcmllcy4KClNpbWlsYXJseSwgYSBjaGlsZCBvciBiYWJ5IHdob3NlIGlkZW50aXR5IGlzIHVua25vd24gbWF5IGJlIHJlZmVycmVkIHRvIGFzIEJhYnkgRG9lLiBBIG5vdG9yaW91cyBtdXJkZXIgY2FzZSBpbiBLYW5zYXMgQ2l0eSwgTWlzc291cmksIHJlZmVycmVkIHRvIHRoZSBiYWJ5IHZpY3RpbSBhcyBQcmVjaW91cyBEb2UuIE90aGVyIHVuaWRlbnRpZmllZCBmZW1hbGUgbXVyZGVyIHZpY3RpbXMgYXJlIENhbGkgRG9lIGFuZCBQcmluY2VzcyBEb2UuIEFkZGl0aW9uYWwgcGVyc29ucyBtYXkgYmUgY2FsbGVkIEphbWVzIERvZSwgSnVkeSBEb2UsIGV0Yy4gSG93ZXZlciwgdG8gYXZvaWQgcG9zc2libGUgY29uZnVzaW9uLCBpZiB0d28gYW5vbnltb3VzIG9yIHVua25vd24gcGFydGllcyBhcmUgY2l0ZWQgaW4gYSBzcGVjaWZpYyBjYXNlIG9yIGFjdGlvbiwgdGhlIHN1cm5hbWVzIERvZSBhbmQgUm9lIG1heSBiZSB1c2VkIHNpbXVsdGFuZW91c2x5OyBmb3IgZXhhbXBsZSwgIkpvaG4gRG9lIHYuIEphbmUgUm9lIi4gSWYgc2V2ZXJhbCBhbm9ueW1vdXMgcGFydGllcyBhcmUgcmVmZXJlbmNlZCwgdGhleSBtYXkgc2ltcGx5IGJlIGxhYmVsbGVkIEpvaG4gRG9lICMxLCBKb2huIERvZSAjMiwgZXRjLiAodGhlIFUuUy4gT3BlcmF0aW9uIERlbGVnbyBjaXRlZCAyMSAobnVtYmVyZWQpICJKb2huIERvZSJzKSBvciBsYWJlbGxlZCB3aXRoIG90aGVyIHZhcmlhbnRzIG9mIERvZSAvIFJvZSAvIFBvZSAvIGV0Yy4gT3RoZXIgZWFybHkgYWx0ZXJuYXRpdmVzIHN1Y2ggYXMgSm9obiBTdGlsZXMgYW5kIFJpY2hhcmQgTWlsZXMgYXJlIG5vdyByYXJlbHkgdXNlZCwgYW5kIE1hcnkgTWFqb3IgaGFzIGJlZW4gdXNlZCBpbiBzb21lIEFtZXJpY2FuIGZlZGVyYWwgY2FzZXMuCgoKCkZpbGUgY3JlYXRlZCBieSBodHRwczovL3d3dy5vbmxpbmUtY29udmVydC5jb20KTW9yZSBleGFtcGxlIGZpbGVzOiBodHRwczovL3d3dy5vbmxpbmUtY29udmVydC5jb20vZmlsZS10eXBlClRleHQgb2YgRXhhbXBsZSBjb250ZW50OiBXaWtpcGVkaWEgKGh0dHBzOi8vZW4ud2lraXBlZGlhLm9yZy93aWtpL0pvaG5fRG9lKQpMaWNlbnNlOiBBdHRyaWJ1dGlvbi1TaGFyZUFsaWtlIDQuMCAoaHR0cHM6Ly9jcmVhdGl2ZWNvbW1vbnMub3JnL2xpY2Vuc2VzL2J5LXNhLzQuMC8pCgpGZWVsIGZyZWUgdG8gdXNlIGFuZCBzaGFyZSB0aGUgZmlsZSBhY2NvcmRpbmcgdG8gdGhlIGxpY2Vuc2UgYWJvdmUu
mimeType: application/json
role: user
generationConfig: {}
systemInstruction:
parts:
- text: You are a helpful chatbot.
role: user
uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-pro:generateContent
response:
headers:
alt-svc:
- h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
content-length:
- '985'
content-type:
- application/json; charset=UTF-8
server-timing:
- gfet4t7; dur=888
transfer-encoding:
- chunked
vary:
- Origin
- X-Origin
- Referer
parsed_body:
candidates:
- avgLogprobs: -0.5004191543116714
content:
parts:
- text: |
Based on the JSON data provided, the document contains the log of a conversation between a user and an AI assistant.
role: model
finishReason: STOP
modelVersion: gemini-2.5-pro
responseId: 9YfNaLGGDuOmqtsPoLXu4AQ
usageMetadata:
candidatesTokenCount: 66
candidatesTokensDetails:
- modality: TEXT
tokenCount: 66
promptTokenCount: 614
promptTokensDetails:
- modality: TEXT
tokenCount: 614
totalTokenCount: 680
status:
code: 200
message: OK
version: 1
12 changes: 12 additions & 0 deletions tests/models/test_google.py
Original file line number Diff line number Diff line change
Expand Up @@ -835,6 +835,18 @@ async def test_google_model_text_document_url_input(allow_model_requests: None,
)


async def test_google_model_json_document_url_input(allow_model_requests: None, google_provider: GoogleProvider):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The VCR cassette will be generated automatically when you call uv run pytest <path> --record-mode=rewrite, is that what you did?

m = GoogleModel('gemini-2.5-pro', provider=google_provider)
agent = Agent(m, system_prompt='You are a helpful chatbot.')

json_document_url = DocumentUrl(url='https://kamalscraping-collab.github.io/sample-data/sample_transcript.json')
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we please use a different public JSON file that's not dependent on your repo?


result = await agent.run(['What is the main content of this document?', json_document_url])
assert result.output == snapshot(
'Based on the JSON data provided, the document contains the log of a conversation between a user and an AI assistant.\n'
)


async def test_google_model_text_as_binary_content_input(allow_model_requests: None, google_provider: GoogleProvider):
m = GoogleModel('gemini-2.0-flash', provider=google_provider)
agent = Agent(m, system_prompt='You are a helpful chatbot.')
Expand Down