Skip to content

Commit 255b814

Browse files
authored
ci(e2e): Fix navigation e2e test - make it more flexible (#237)
1 parent 52164c5 commit 255b814

File tree

4 files changed

+78
-20
lines changed

4 files changed

+78
-20
lines changed

tests/conftest.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import pytest
22
import os
3+
import asyncio
4+
import gc
35
from dotenv import load_dotenv
46
from mcp.client.sse import sse_client
57
from mcp.client.stdio import stdio_client
@@ -20,6 +22,16 @@ def anyio_backend():
2022
return "asyncio"
2123

2224

25+
@pytest.fixture(autouse=True)
26+
async def cleanup_sessions():
27+
"""Clean up any lingering HTTP sessions after each test."""
28+
yield
29+
# Force garbage collection to clean up any unclosed sessions
30+
gc.collect()
31+
# Give a brief moment for cleanup
32+
await asyncio.sleep(0.01)
33+
34+
2335
@pytest.fixture
2436
def mcp_transport():
2537
return os.environ.get("MCP_TRANSPORT", DEFAULT_MCP_TRANSPORT)

tests/loki_test.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
from conftest import models
1313
from utils import (
1414
get_converted_tools,
15-
llm_tool_call_sequence,
15+
flexible_tool_call,
1616
)
1717

1818
pytestmark = pytest.mark.anyio
@@ -30,7 +30,7 @@ async def test_loki_logs_tool(model: str, mcp_client: ClientSession):
3030
]
3131

3232
# 1. List datasources
33-
messages = await llm_tool_call_sequence(
33+
messages = await flexible_tool_call(
3434
model, messages, tools, mcp_client, "list_datasources"
3535
)
3636
datasources_response = messages[-1].content
@@ -39,8 +39,9 @@ async def test_loki_logs_tool(model: str, mcp_client: ClientSession):
3939
print(f"\nFound Loki datasource: {loki_ds['name']} (uid: {loki_ds['uid']})")
4040

4141
# 2. Query logs
42-
messages = await llm_tool_call_sequence(
43-
model, messages, tools, mcp_client, "query_loki_logs", {"datasourceUid": loki_ds["uid"], "logql": "{container=\"mcp-grafana-grafana-1\"}"}
42+
messages = await flexible_tool_call(
43+
model, messages, tools, mcp_client, "query_loki_logs",
44+
required_params={"datasourceUid": loki_ds["uid"]}
4445
)
4546

4647
# 3. Final LLM response
@@ -66,7 +67,7 @@ async def test_loki_container_labels(model: str, mcp_client: ClientSession):
6667
]
6768

6869
# 1. List datasources
69-
messages = await llm_tool_call_sequence(
70+
messages = await flexible_tool_call(
7071
model, messages, tools, mcp_client, "list_datasources"
7172
)
7273
datasources_response = messages[-1].content
@@ -75,9 +76,9 @@ async def test_loki_container_labels(model: str, mcp_client: ClientSession):
7576
print(f"\nFound Loki datasource: {loki_ds['name']} (uid: {loki_ds['uid']})")
7677

7778
# 2. List label values for 'container'
78-
messages = await llm_tool_call_sequence(
79+
messages = await flexible_tool_call(
7980
model, messages, tools, mcp_client, "list_loki_label_values",
80-
{"datasourceUid": loki_ds["uid"], "labelName": "container"}
81+
required_params={"datasourceUid": loki_ds["uid"], "labelName": "container"}
8182
)
8283

8384
# 3. Final LLM response

tests/navigation_test.py

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import json
12
import pytest
23
from langevals import expect
34
from langevals_langevals.llm_boolean import (
@@ -6,11 +7,13 @@
67
)
78
from litellm import Message, acompletion
89
from mcp import ClientSession
10+
from mcp.types import TextContent
911

1012
from conftest import models
1113
from utils import (
1214
get_converted_tools,
1315
llm_tool_call_sequence,
16+
flexible_tool_call,
1417
)
1518

1619
pytestmark = pytest.mark.anyio
@@ -149,31 +152,27 @@ async def test_generate_deeplink_with_time_range(model: str, mcp_client: ClientS
149152

150153
@pytest.mark.parametrize("model", models)
151154
@pytest.mark.flaky(max_runs=3)
152-
async def test_generate_deeplink_with_custom_params(model: str, mcp_client: ClientSession):
155+
async def test_generate_deeplink_with_query_params(model: str, mcp_client: ClientSession):
153156
tools = await get_converted_tools(mcp_client)
154-
prompt = "Generate a dashboard deeplink for 'test-uid' with custom variables"
157+
prompt = "Use the generate_deeplink tool to create a dashboard link for UID 'test-uid' with var-datasource=prometheus and refresh=30s as query parameters"
155158

156159
messages = [
157160
Message(role="system", content="You are a helpful assistant."),
158161
Message(role="user", content=prompt),
159162
]
160163

161-
messages = await llm_tool_call_sequence(
164+
# Use flexible tool call with required parameters
165+
messages = await flexible_tool_call(
162166
model, messages, tools, mcp_client, "generate_deeplink",
163-
{
164-
"resourceType": "dashboard",
165-
"dashboardUid": "test-uid",
166-
"queryParams": {
167-
"var-datasource": "prometheus",
168-
"refresh": "30s"
169-
}
170-
}
167+
required_params={"resourceType": "dashboard", "dashboardUid": "test-uid"}
171168
)
172169

173170
response = await acompletion(model=model, messages=messages, tools=tools)
174171
content = response.choices[0].message.content
175172

176-
assert "var-datasource=prometheus" in content, f"Expected custom parameters, got: {content}"
173+
# Verify both specific query parameters are in the final URL
174+
assert "var-datasource=prometheus" in content, f"Expected var-datasource=prometheus in URL, got: {content}"
175+
assert "refresh=30s" in content, f"Expected refresh=30s in URL, got: {content}"
177176

178177
custom_params_checker = CustomLLMBooleanEvaluator(
179178
settings=CustomLLMBooleanSettings(

tests/utils.py

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,4 +68,50 @@ async def llm_tool_call_sequence(
6868

6969
async def get_converted_tools(mcp_client):
7070
tools = await mcp_client.list_tools()
71-
return [convert_tool(t) for t in tools.tools]
71+
return [convert_tool(t) for t in tools.tools]
72+
73+
74+
async def flexible_tool_call(model, messages, tools, mcp_client, expected_tool_name, required_params=None):
75+
"""
76+
Make a flexible tool call that only checks essential parameters.
77+
Returns updated messages list.
78+
79+
Args:
80+
model: The LLM model to use
81+
messages: Current conversation messages
82+
tools: Available tools
83+
mcp_client: MCP client session
84+
expected_tool_name: Name of the tool we expect to be called
85+
required_params: Dict of essential parameters to check (optional)
86+
87+
Returns:
88+
Updated messages list including tool call and result
89+
"""
90+
response = await acompletion(model=model, messages=messages, tools=tools)
91+
92+
# Check that a tool call was made
93+
assert response.choices[0].message.tool_calls is not None, f"Expected tool call for {expected_tool_name}"
94+
assert len(response.choices[0].message.tool_calls) >= 1, f"Expected at least one tool call for {expected_tool_name}"
95+
96+
tool_call = response.choices[0].message.tool_calls[0]
97+
assert tool_call.function.name == expected_tool_name, f"Expected {expected_tool_name} tool, got {tool_call.function.name}"
98+
99+
arguments = json.loads(tool_call.function.arguments)
100+
101+
# Check required parameters if specified
102+
if required_params:
103+
for key, expected_value in required_params.items():
104+
assert key in arguments, f"Expected parameter '{key}' in tool arguments"
105+
if expected_value is not None:
106+
assert arguments[key] == expected_value, f"Expected {key}='{expected_value}', got {key}='{arguments.get(key)}'"
107+
108+
# Call the tool to verify it works
109+
result = await mcp_client.call_tool(tool_call.function.name, arguments)
110+
assert len(result.content) == 1
111+
assert isinstance(result.content[0], TextContent)
112+
113+
# Add both the tool call and result to message history
114+
messages.append(response.choices[0].message)
115+
messages.append(Message(role="tool", tool_call_id=tool_call.id, content=result.content[0].text))
116+
117+
return messages

0 commit comments

Comments
 (0)