|
| 1 | +import json |
1 | 2 | import pytest
|
2 | 3 | from langevals import expect
|
3 | 4 | from langevals_langevals.llm_boolean import (
|
@@ -42,3 +43,74 @@ async def test_dashboard_panel_queries_tool(model: str, mcp_client: ClientSessio
|
42 | 43 | )
|
43 | 44 | print("content", content)
|
44 | 45 | expect(input=prompt, output=content).to_pass(panel_queries_checker)
|
| 46 | + |
| 47 | + |
| 48 | +@pytest.mark.parametrize("model", models) |
| 49 | +@pytest.mark.flaky(max_runs=3) |
| 50 | +async def test_dashboard_update_with_patch_operations(model: str, mcp_client: ClientSession): |
| 51 | + """Test that LLMs naturally use patch operations for dashboard updates""" |
| 52 | + tools = await get_converted_tools(mcp_client) |
| 53 | + |
| 54 | + # First, create a non-provisioned test dashboard by copying the demo dashboard |
| 55 | + # 1. Get the demo dashboard JSON |
| 56 | + demo_result = await mcp_client.call_tool("get_dashboard_by_uid", {"uid": "fe9gm6guyzi0wd"}) |
| 57 | + demo_data = json.loads(demo_result.content[0].text) |
| 58 | + dashboard_json = demo_data["dashboard"] |
| 59 | + |
| 60 | + # 2. Remove uid and id to create a new dashboard |
| 61 | + if "uid" in dashboard_json: |
| 62 | + del dashboard_json["uid"] |
| 63 | + if "id" in dashboard_json: |
| 64 | + del dashboard_json["id"] |
| 65 | + |
| 66 | + # 3. Set a new title |
| 67 | + title = f"Test Dashboard" |
| 68 | + dashboard_json["title"] = title |
| 69 | + dashboard_json["tags"] = ["python-integration-test"] |
| 70 | + |
| 71 | + # 4. Create the dashboard in Grafana |
| 72 | + create_result = await mcp_client.call_tool("update_dashboard", { |
| 73 | + "dashboard": dashboard_json, |
| 74 | + "folderUid": "", |
| 75 | + "overwrite": False |
| 76 | + }) |
| 77 | + create_data = json.loads(create_result.content[0].text) |
| 78 | + created_dashboard_uid = create_data["uid"] |
| 79 | + |
| 80 | + # 5. Update the dashboard title |
| 81 | + updated_title = f"Updated {title}" |
| 82 | + title_prompt = f"Update the title of the Test Dashboard to {updated_title}. Search for the dashboard by title first." |
| 83 | + |
| 84 | + messages = [ |
| 85 | + Message(role="system", content="You are a helpful assistant"), |
| 86 | + Message(role="user", content=title_prompt), |
| 87 | + ] |
| 88 | + |
| 89 | + # 6. Search for the test dashboard |
| 90 | + messages = await llm_tool_call_sequence( |
| 91 | + model, messages, tools, mcp_client, "search_dashboards", |
| 92 | + {"query": title} |
| 93 | + ) |
| 94 | + |
| 95 | + # 7. Update the dashboard using patch operations |
| 96 | + messages = await llm_tool_call_sequence( |
| 97 | + model, messages, tools, mcp_client, "update_dashboard", |
| 98 | + { |
| 99 | + "uid": created_dashboard_uid, |
| 100 | + "operations": [ |
| 101 | + { |
| 102 | + "op": "replace", |
| 103 | + "path": "$.title", |
| 104 | + "value": updated_title |
| 105 | + } |
| 106 | + ] |
| 107 | + } |
| 108 | + ) |
| 109 | + |
| 110 | + # 8. Final LLM response - just verify it completes successfully |
| 111 | + response = await acompletion(model=model, messages=messages, tools=tools) |
| 112 | + content = response.choices[0].message.content |
| 113 | + |
| 114 | + # Test passes if we get here - the tool call sequence worked correctly |
| 115 | + assert len(content) > 0, "LLM should provide a response after updating the dashboard" |
| 116 | + |
0 commit comments