Skip to content

Commit bc89c9d

Browse files
chore: extend accuracy tests with vector search
Extends describeAccuracyTests interface to accept user config and cluster config to allow for running accuracy tests against different cluster configuration and user configuration.
1 parent a8c6425 commit bc89c9d

File tree

4 files changed

+167
-154
lines changed

4 files changed

+167
-154
lines changed

tests/accuracy/createIndex.test.ts

Lines changed: 135 additions & 130 deletions
Original file line numberDiff line numberDiff line change
@@ -1,140 +1,145 @@
11
import { describeAccuracyTests } from "./sdk/describeAccuracyTests.js";
22
import { Matcher } from "./sdk/matcher.js";
33

4-
// TODO: supply this with a proper config API once we refactor describeAccuracyTests to support it
5-
process.env.MDB_VOYAGE_API_KEY = "valid-key";
6-
7-
describeAccuracyTests([
8-
{
9-
prompt: "Create an index that covers the following query on 'mflix.movies' namespace - { \"release_year\": 1992 }",
10-
expectedToolCalls: [
11-
{
12-
toolName: "create-index",
13-
parameters: {
14-
database: "mflix",
15-
collection: "movies",
16-
name: Matcher.anyOf(Matcher.undefined, Matcher.string()),
17-
definition: [
18-
{
19-
type: "classic",
20-
keys: {
21-
release_year: 1,
4+
describeAccuracyTests(
5+
[
6+
{
7+
prompt: "Create an index that covers the following query on 'mflix.movies' namespace - { \"release_year\": 1992 }",
8+
expectedToolCalls: [
9+
{
10+
toolName: "create-index",
11+
parameters: {
12+
database: "mflix",
13+
collection: "movies",
14+
name: Matcher.anyOf(Matcher.undefined, Matcher.string()),
15+
definition: [
16+
{
17+
type: "classic",
18+
keys: {
19+
release_year: 1,
20+
},
2221
},
23-
},
24-
],
22+
],
23+
},
2524
},
26-
},
27-
],
28-
},
29-
{
30-
prompt: "Create a text index on title field in 'mflix.movies' namespace",
31-
expectedToolCalls: [
32-
{
33-
toolName: "create-index",
34-
parameters: {
35-
database: "mflix",
36-
collection: "movies",
37-
name: Matcher.anyOf(Matcher.undefined, Matcher.string()),
38-
definition: [
39-
{
40-
type: "classic",
41-
keys: {
42-
title: "text",
25+
],
26+
},
27+
{
28+
prompt: "Create a text index on title field in 'mflix.movies' namespace",
29+
expectedToolCalls: [
30+
{
31+
toolName: "create-index",
32+
parameters: {
33+
database: "mflix",
34+
collection: "movies",
35+
name: Matcher.anyOf(Matcher.undefined, Matcher.string()),
36+
definition: [
37+
{
38+
type: "classic",
39+
keys: {
40+
title: "text",
41+
},
4342
},
44-
},
45-
],
43+
],
44+
},
4645
},
47-
},
48-
],
49-
},
50-
{
51-
prompt: "Create a vector search index on 'mydb.movies' namespace on the 'plotSummary' field. The index should use 1024 dimensions.",
52-
expectedToolCalls: [
53-
{
54-
toolName: "create-index",
55-
parameters: {
56-
database: "mydb",
57-
collection: "movies",
58-
name: Matcher.anyOf(Matcher.undefined, Matcher.string()),
59-
definition: [
60-
{
61-
type: "vectorSearch",
62-
fields: [
63-
{
64-
type: "vector",
65-
path: "plotSummary",
66-
numDimensions: 1024,
67-
},
68-
],
69-
},
70-
],
46+
],
47+
},
48+
{
49+
prompt: "Create a vector search index on 'mydb.movies' namespace on the 'plotSummary' field. The index should use 1024 dimensions.",
50+
expectedToolCalls: [
51+
{
52+
toolName: "create-index",
53+
parameters: {
54+
database: "mydb",
55+
collection: "movies",
56+
name: Matcher.anyOf(Matcher.undefined, Matcher.string()),
57+
definition: [
58+
{
59+
type: "vectorSearch",
60+
fields: [
61+
{
62+
type: "vector",
63+
path: "plotSummary",
64+
numDimensions: 1024,
65+
},
66+
],
67+
},
68+
],
69+
},
7170
},
72-
},
73-
],
74-
},
75-
{
76-
prompt: "Create a vector search index on 'mydb.movies' namespace with on the 'plotSummary' field and 'genre' field, both of which contain vector embeddings. Pick a sensible number of dimensions for a voyage 3.5 model.",
77-
expectedToolCalls: [
78-
{
79-
toolName: "create-index",
80-
parameters: {
81-
database: "mydb",
82-
collection: "movies",
83-
name: Matcher.anyOf(Matcher.undefined, Matcher.string()),
84-
definition: [
85-
{
86-
type: "vectorSearch",
87-
fields: [
88-
{
89-
type: "vector",
90-
path: "plotSummary",
91-
numDimensions: Matcher.number(
92-
(value) => value % 2 === 0 && value >= 256 && value <= 8192
93-
),
94-
similarity: Matcher.anyOf(Matcher.undefined, Matcher.string()),
95-
},
96-
{
97-
type: "vector",
98-
path: "genre",
99-
numDimensions: Matcher.number(
100-
(value) => value % 2 === 0 && value >= 256 && value <= 8192
101-
),
102-
similarity: Matcher.anyOf(Matcher.undefined, Matcher.string()),
103-
},
104-
],
105-
},
106-
],
71+
],
72+
},
73+
{
74+
prompt: "Create a vector search index on 'mydb.movies' namespace with on the 'plotSummary' field and 'genre' field, both of which contain vector embeddings. Pick a sensible number of dimensions for a voyage 3.5 model.",
75+
expectedToolCalls: [
76+
{
77+
toolName: "create-index",
78+
parameters: {
79+
database: "mydb",
80+
collection: "movies",
81+
name: Matcher.anyOf(Matcher.undefined, Matcher.string()),
82+
definition: [
83+
{
84+
type: "vectorSearch",
85+
fields: [
86+
{
87+
type: "vector",
88+
path: "plotSummary",
89+
numDimensions: Matcher.number(
90+
(value) => value % 2 === 0 && value >= 256 && value <= 8192
91+
),
92+
similarity: Matcher.anyOf(Matcher.undefined, Matcher.string()),
93+
},
94+
{
95+
type: "vector",
96+
path: "genre",
97+
numDimensions: Matcher.number(
98+
(value) => value % 2 === 0 && value >= 256 && value <= 8192
99+
),
100+
similarity: Matcher.anyOf(Matcher.undefined, Matcher.string()),
101+
},
102+
],
103+
},
104+
],
105+
},
107106
},
108-
},
109-
],
110-
},
111-
{
112-
prompt: "Create a vector search index on 'mydb.movies' namespace where the 'plotSummary' field is indexed as a 1024-dimensional vector and the 'releaseDate' field is indexed as a regular field.",
113-
expectedToolCalls: [
114-
{
115-
toolName: "create-index",
116-
parameters: {
117-
database: "mydb",
118-
collection: "movies",
119-
name: Matcher.anyOf(Matcher.undefined, Matcher.string()),
120-
definition: [
121-
{
122-
type: "vectorSearch",
123-
fields: [
124-
{
125-
type: "vector",
126-
path: "plotSummary",
127-
numDimensions: 1024,
128-
},
129-
{
130-
type: "filter",
131-
path: "releaseDate",
132-
},
133-
],
134-
},
135-
],
107+
],
108+
},
109+
{
110+
prompt: "Create a vector search index on 'mydb.movies' namespace where the 'plotSummary' field is indexed as a 1024-dimensional vector and the 'releaseDate' field is indexed as a regular field.",
111+
expectedToolCalls: [
112+
{
113+
toolName: "create-index",
114+
parameters: {
115+
database: "mydb",
116+
collection: "movies",
117+
name: Matcher.anyOf(Matcher.undefined, Matcher.string()),
118+
definition: [
119+
{
120+
type: "vectorSearch",
121+
fields: [
122+
{
123+
type: "vector",
124+
path: "plotSummary",
125+
numDimensions: 1024,
126+
},
127+
{
128+
type: "filter",
129+
path: "releaseDate",
130+
},
131+
],
132+
},
133+
],
134+
},
136135
},
137-
},
138-
],
139-
},
140-
]);
136+
],
137+
},
138+
],
139+
{
140+
userConfig: { voyageApiKey: "valid-key" },
141+
clusterConfig: {
142+
search: true,
143+
},
144+
}
145+
);

tests/accuracy/sdk/accuracyTestingClient.ts

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import { StdioClientTransport } from "@modelcontextprotocol/sdk/client/stdio.js"
66
import { MCP_SERVER_CLI_SCRIPT } from "./constants.js";
77
import type { LLMToolCall } from "./accuracyResultStorage/resultStorage.js";
88
import type { VercelMCPClient, VercelMCPClientTools } from "./agent.js";
9+
import type { UserConfig } from "../../../src/lib.js";
910

1011
type ToolResultGeneratorFn = (parameters: Record<string, unknown>) => CallToolResult | Promise<CallToolResult>;
1112
export type MockedTools = Record<string, ToolResultGeneratorFn>;
@@ -81,18 +82,13 @@ export class AccuracyTestingClient {
8182

8283
static async initializeClient(
8384
mdbConnectionString: string,
84-
atlasApiClientId?: string,
85-
atlasApiClientSecret?: string,
86-
voyageApiKey?: string
85+
userConfig: Partial<{ [k in keyof UserConfig]: string }> = {}
8786
): Promise<AccuracyTestingClient> {
88-
const args = [
89-
MCP_SERVER_CLI_SCRIPT,
90-
"--connectionString",
91-
mdbConnectionString,
92-
...(atlasApiClientId ? ["--apiClientId", atlasApiClientId] : []),
93-
...(atlasApiClientSecret ? ["--apiClientSecret", atlasApiClientSecret] : []),
94-
...(voyageApiKey ? ["--voyageApiKey", voyageApiKey] : []),
95-
];
87+
const additionalArgs = Object.entries(userConfig).flatMap(([key, value]) => {
88+
return [`--${key}`, value];
89+
});
90+
91+
const args = [MCP_SERVER_CLI_SCRIPT, "--connectionString", mdbConnectionString, ...additionalArgs];
9692

9793
const clientTransport = new StdioClientTransport({
9894
command: process.execPath,

tests/accuracy/sdk/describeAccuracyTests.ts

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,11 @@ import type { AccuracyResultStorage, ExpectedToolCall, LLMToolCall } from "./acc
1010
import { getAccuracyResultStorage } from "./accuracyResultStorage/getAccuracyResultStorage.js";
1111
import { getCommitSHA } from "./gitInfo.js";
1212
import type { MongoClient } from "mongodb";
13+
import type { UserConfig } from "../../../src/lib.js";
14+
import {
15+
MongoDBClusterProcess,
16+
type MongoClusterConfiguration,
17+
} from "../../integration/tools/mongodb/mongodbClusterProcess.js";
1318

1419
export interface AccuracyTestConfig {
1520
/** The prompt to be provided to LLM for evaluation. */
@@ -48,7 +53,13 @@ export interface AccuracyTestConfig {
4853
) => Promise<number> | number;
4954
}
5055

51-
export function describeAccuracyTests(accuracyTestConfigs: AccuracyTestConfig[]): void {
56+
export function describeAccuracyTests(
57+
accuracyTestConfigs: AccuracyTestConfig[],
58+
{
59+
userConfig: partialUserConfig,
60+
clusterConfig,
61+
}: { userConfig?: Partial<{ [k in keyof UserConfig]: string }>; clusterConfig?: MongoClusterConfiguration } = {}
62+
): void {
5263
if (!process.env.MDB_ACCURACY_RUN_ID) {
5364
throw new Error("MDB_ACCURACY_RUN_ID env variable is required for accuracy test runs!");
5465
}
@@ -58,17 +69,22 @@ export function describeAccuracyTests(accuracyTestConfigs: AccuracyTestConfig[])
5869
throw new Error("No models available to test. Ensure that the API keys are properly setup!");
5970
}
6071

61-
const eachModel = describe.each(models);
72+
const shouldSkip = clusterConfig && !MongoDBClusterProcess.isConfigurationSupportedInCurrentEnv(clusterConfig);
73+
74+
const eachModel = describe.skipIf(shouldSkip).each(models);
6275

6376
eachModel(`$displayName`, function (model) {
6477
const configsWithDescriptions = getConfigsWithDescriptions(accuracyTestConfigs);
6578
const accuracyRunId = `${process.env.MDB_ACCURACY_RUN_ID}`;
66-
const mdbIntegration = setupMongoDBIntegrationTest();
79+
const mdbIntegration = setupMongoDBIntegrationTest(clusterConfig);
6780
const { populateTestData, cleanupTestDatabases } = prepareTestData(mdbIntegration);
6881

69-
const atlasApiClientId = process.env.MDB_MCP_API_CLIENT_ID;
70-
const atlasApiClientSecret = process.env.MDB_MCP_API_CLIENT_SECRET;
71-
const voyageApiKey = process.env.MDB_VOYAGE_API_KEY;
82+
const userConfig: Partial<{ [k in keyof UserConfig]: string }> = {
83+
apiClientId: process.env.MDB_MCP_API_CLIENT_ID,
84+
apiClientSecret: process.env.MDB_MCP_API_CLIENT_SECRET,
85+
voyageApiKey: process.env.MDB_VOYAGE_API_KEY,
86+
...partialUserConfig,
87+
};
7288

7389
let commitSHA: string;
7490
let accuracyResultStorage: AccuracyResultStorage;
@@ -83,12 +99,7 @@ export function describeAccuracyTests(accuracyTestConfigs: AccuracyTestConfig[])
8399
commitSHA = retrievedCommitSHA;
84100

85101
accuracyResultStorage = getAccuracyResultStorage();
86-
testMCPClient = await AccuracyTestingClient.initializeClient(
87-
mdbIntegration.connectionString(),
88-
atlasApiClientId,
89-
atlasApiClientSecret,
90-
voyageApiKey
91-
);
102+
testMCPClient = await AccuracyTestingClient.initializeClient(mdbIntegration.connectionString(), userConfig);
92103
agent = getVercelToolCallingAgent();
93104
});
94105

tests/integration/tools/mongodb/mongodbClusterProcess.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,8 @@ export class MongoDBClusterProcess {
2727

2828
return new MongoDBClusterProcess(
2929
() => runningContainer.stop(),
30-
() => `mongodb://${runningContainer.getHost()}:${runningContainer.getMappedPort(27017)}`
30+
() =>
31+
`mongodb://${runningContainer.getHost()}:${runningContainer.getMappedPort(27017)}/?directConnection=true`
3132
);
3233
} else if (MongoDBClusterProcess.isMongoRunnerOptions(config)) {
3334
const { downloadOptions, serverArgs } = config;

0 commit comments

Comments
 (0)