Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,31 @@ agent = FunctionAgent(
await agent.run("What happened in the latest Burning Man festival?")
```

`search`: Search for relevant dynamic data based on a query. Returns a list of urls and their relevant content.
## Available Functions

`search`: Search for relevant dynamic data based on a query. Returns a list of Document objects with urls and their relevant content.

`extract`: Extract raw content from specific URLs using Tavily Extract API. Returns a list of Document objects containing the extracted content and metadata.

### Extract Function Example

```python
from llama_index.tools.tavily_research import TavilyToolSpec

tavily_tool = TavilyToolSpec(api_key="your-key")

# Extract content from specific URLs
documents = tavily_tool.extract(
urls=["https://example.com/article1", "https://example.com/article2"],
include_images=True,
include_favicon=True,
extract_depth="advanced", # "basic" or "advanced"
format="markdown", # "markdown" or "text"
)

for doc in documents:
print(f"URL: {doc.extra_info['url']}")
print(f"Content: {doc.text[:200]}...")
```

This loader is designed to be used as a way to load data as a Tool in an Agent.
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,45 @@
"tavily_tool.search(\"What happened in the latest Burning Man festival?\", max_results=3)"
]
},
{
"cell_type": "markdown",
"id": "e61abb6d",
"metadata": {},
"source": [
"## Testing the Tavily extract tool\n",
"\n",
"The extract function allows you to extract raw content from specific URLs. This is useful when you have specific URLs you want to extract content from, rather than searching for content."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "360386ef",
"metadata": {},
"outputs": [],
"source": [
"# Extract content from specific URLs\n",
"urls_to_extract = [\n",
" \"https://en.wikipedia.org/wiki/Burning_Man\",\n",
" \"https://burningman.org/about/\",\n",
"]\n",
"\n",
"extracted_docs = tavily_tool.extract(\n",
" urls=urls_to_extract,\n",
" include_images=False,\n",
" include_favicon=True,\n",
" extract_depth=\"basic\",\n",
" format=\"markdown\",\n",
")\n",
"\n",
"print(f\"Extracted {len(extracted_docs)} documents:\")\n",
"for i, doc in enumerate(extracted_docs):\n",
" print(f\"\\nDocument {i+1}:\")\n",
" print(f\"URL: {doc.extra_info.get('url', 'N/A')}\")\n",
" print(f\"Content preview: {doc.text[:300]}...\")\n",
" print(f\"Has favicon: {doc.extra_info.get('favicon') is not None}\")"
]
},
{
"cell_type": "markdown",
"id": "1210906d-87a7-466a-9712-1d17dba2c2ec",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ class TavilyToolSpec(BaseToolSpec):

spec_functions = [
"search",
"extract",
]

def __init__(self, api_key: str) -> None:
Expand Down Expand Up @@ -40,3 +41,51 @@ def search(self, query: str, max_results: Optional[int] = 6) -> List[Document]:
Document(text=result["content"], extra_info={"url": result["url"]})
for result in response["results"]
]

def extract(
self,
urls: List[str],
include_images: bool = False,
include_favicon: bool = False,
extract_depth: str = "basic",
format: str = "markdown",
) -> List[Document]:
"""
Extract raw content from a URL using Tavily Extract API.

Args:
urls: The URL(s) to extract content from.
include_images: Whether to include images in the response.
include_favicon: Whether to include the favicon in the response.
extract_depth: 'basic' or 'advanced' (default: 'basic').
format: 'markdown' or 'text' (default: 'markdown').

Returns:
A list of Document objects containing the extracted content and metadata,
or an empty list if no results were returned.

"""
response = self.client.extract(
urls,
include_images=include_images,
include_favicon=include_favicon,
extract_depth=extract_depth,
format=format,
)

results = response.get("results", [])

if not results:
return []

return [
Document(
text=result.get("raw_content", ""),
extra_info={
"url": result.get("url"),
"favicon": result.get("favicon"),
"images": result.get("images"),
},
)
for result in results
]
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ dev = [

[project]
name = "llama-index-tools-tavily-research"
version = "0.4.1"
version = "0.4.2"
description = "llama-index tools tavily_research integration"
authors = [{name = "Your Name", email = "[email protected]"}]
requires-python = ">=3.9,<4.0"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,214 @@
from unittest.mock import Mock, patch
from llama_index.core.tools.tool_spec.base import BaseToolSpec
from llama_index.core.schema import Document
from llama_index.tools.tavily_research import TavilyToolSpec


def test_class():
names_of_base_classes = [b.__name__ for b in TavilyToolSpec.__mro__]
assert BaseToolSpec.__name__ in names_of_base_classes


def test_spec_functions():
"""Test that spec_functions includes both search and extract methods."""
assert "search" in TavilyToolSpec.spec_functions
assert "extract" in TavilyToolSpec.spec_functions


@patch("tavily.TavilyClient")
def test_init(mock_tavily_client):
"""Test TavilyToolSpec initialization."""
api_key = "test_api_key"
tool = TavilyToolSpec(api_key=api_key)

mock_tavily_client.assert_called_once_with(api_key=api_key)
assert tool.client == mock_tavily_client.return_value


@patch("tavily.TavilyClient")
def test_search(mock_tavily_client):
"""Test search method returns properly formatted Document objects."""
# Setup mock response
mock_response = {
"results": [
{"content": "Test content 1", "url": "https://example1.com"},
{"content": "Test content 2", "url": "https://example2.com"},
]
}

mock_client_instance = Mock()
mock_client_instance.search.return_value = mock_response
mock_tavily_client.return_value = mock_client_instance

# Create tool and call search
tool = TavilyToolSpec(api_key="test_key")
results = tool.search("test query", max_results=5)

# Verify client.search was called correctly
mock_client_instance.search.assert_called_once_with(
"test query", max_results=5, search_depth="advanced"
)

# Verify results
assert len(results) == 2
assert all(isinstance(doc, Document) for doc in results)

assert results[0].text == "Test content 1"
assert results[0].extra_info["url"] == "https://example1.com"

assert results[1].text == "Test content 2"
assert results[1].extra_info["url"] == "https://example2.com"


@patch("tavily.TavilyClient")
def test_search_with_default_max_results(mock_tavily_client):
"""Test search method uses default max_results of 6."""
mock_response = {"results": []}

mock_client_instance = Mock()
mock_client_instance.search.return_value = mock_response
mock_tavily_client.return_value = mock_client_instance

tool = TavilyToolSpec(api_key="test_key")
tool.search("test query")

mock_client_instance.search.assert_called_once_with(
"test query", max_results=6, search_depth="advanced"
)


@patch("tavily.TavilyClient")
def test_extract(mock_tavily_client):
"""Test extract method returns properly formatted Document objects."""
# Setup mock response
mock_response = {
"results": [
{
"raw_content": "Extracted content 1",
"url": "https://example1.com",
"favicon": "https://example1.com/favicon.ico",
"images": ["https://example1.com/image1.jpg"],
},
{
"raw_content": "Extracted content 2",
"url": "https://example2.com",
"favicon": "https://example2.com/favicon.ico",
"images": ["https://example2.com/image2.jpg"],
},
]
}

mock_client_instance = Mock()
mock_client_instance.extract.return_value = mock_response
mock_tavily_client.return_value = mock_client_instance

# Create tool and call extract
tool = TavilyToolSpec(api_key="test_key")
urls = ["https://example1.com", "https://example2.com"]
results = tool.extract(
urls=urls,
include_images=True,
include_favicon=True,
extract_depth="advanced",
format="text",
)

# Verify client.extract was called correctly
mock_client_instance.extract.assert_called_once_with(
urls,
include_images=True,
include_favicon=True,
extract_depth="advanced",
format="text",
)

# Verify results
assert len(results) == 2
assert all(isinstance(doc, Document) for doc in results)

assert results[0].text == "Extracted content 1"
assert results[0].extra_info["url"] == "https://example1.com"
assert results[0].extra_info["favicon"] == "https://example1.com/favicon.ico"
assert results[0].extra_info["images"] == ["https://example1.com/image1.jpg"]

assert results[1].text == "Extracted content 2"
assert results[1].extra_info["url"] == "https://example2.com"


@patch("tavily.TavilyClient")
def test_extract_with_defaults(mock_tavily_client):
"""Test extract method uses correct default parameters."""
mock_response = {"results": []}

mock_client_instance = Mock()
mock_client_instance.extract.return_value = mock_response
mock_tavily_client.return_value = mock_client_instance

tool = TavilyToolSpec(api_key="test_key")
urls = ["https://example.com"]
tool.extract(urls)

mock_client_instance.extract.assert_called_once_with(
urls,
include_images=False,
include_favicon=False,
extract_depth="basic",
format="markdown",
)


@patch("tavily.TavilyClient")
def test_extract_empty_results(mock_tavily_client):
"""Test extract method handles empty results gracefully."""
mock_response = {"results": []}

mock_client_instance = Mock()
mock_client_instance.extract.return_value = mock_response
mock_tavily_client.return_value = mock_client_instance

tool = TavilyToolSpec(api_key="test_key")
results = tool.extract(urls=["https://example.com"])

assert results == []


@patch("tavily.TavilyClient")
def test_extract_missing_fields(mock_tavily_client):
"""Test extract method handles missing fields in response."""
# Mock response with missing fields
mock_response = {
"results": [
{
"url": "https://example.com"
# Missing raw_content, favicon, images
}
]
}

mock_client_instance = Mock()
mock_client_instance.extract.return_value = mock_response
mock_tavily_client.return_value = mock_client_instance

tool = TavilyToolSpec(api_key="test_key")
results = tool.extract(urls=["https://example.com"])

assert len(results) == 1
assert results[0].text == "" # Empty string for missing raw_content
assert results[0].extra_info["url"] == "https://example.com"
assert results[0].extra_info["favicon"] is None
assert results[0].extra_info["images"] is None


@patch("tavily.TavilyClient")
def test_extract_no_results_key(mock_tavily_client):
"""Test extract method handles response without 'results' key."""
mock_response = {} # No 'results' key

mock_client_instance = Mock()
mock_client_instance.extract.return_value = mock_response
mock_tavily_client.return_value = mock_client_instance

tool = TavilyToolSpec(api_key="test_key")
results = tool.extract(urls=["https://example.com"])

assert results == []
Loading