diff --git a/src/helpers/isObjectEmpty.ts b/src/helpers/isObjectEmpty.ts new file mode 100644 index 000000000..7584c2f51 --- /dev/null +++ b/src/helpers/isObjectEmpty.ts @@ -0,0 +1,15 @@ +type EmptyObject = { [x: string]: never } | null | undefined; + +export function isObjectEmpty(value: object | null | undefined): value is EmptyObject { + if (!value) { + return true; + } + + for (const prop in value) { + if (Object.prototype.hasOwnProperty.call(value, prop)) { + return false; + } + } + + return true; +} diff --git a/src/tools/mongodb/metadata/collectionSchema.ts b/src/tools/mongodb/metadata/collectionSchema.ts index fa6ea3c0d..f03e9b9d1 100644 --- a/src/tools/mongodb/metadata/collectionSchema.ts +++ b/src/tools/mongodb/metadata/collectionSchema.ts @@ -1,23 +1,49 @@ import type { CallToolResult } from "@modelcontextprotocol/sdk/types.js"; import { DbOperationArgs, MongoDBToolBase } from "../mongodbTool.js"; -import type { ToolArgs, OperationType } from "../../tool.js"; +import type { ToolArgs, OperationType, ToolExecutionContext } from "../../tool.js"; import { formatUntrustedData } from "../../tool.js"; import { getSimplifiedSchema } from "mongodb-schema"; +import z from "zod"; +import { ONE_MB } from "../../../helpers/constants.js"; +import { collectCursorUntilMaxBytesLimit } from "../../../helpers/collectCursorUntilMaxBytes.js"; +import { isObjectEmpty } from "../../../helpers/isObjectEmpty.js"; + +const MAXIMUM_SAMPLE_SIZE_HARD_LIMIT = 50_000; export class CollectionSchemaTool extends MongoDBToolBase { public name = "collection-schema"; protected description = "Describe the schema for a collection"; - protected argsShape = DbOperationArgs; + protected argsShape = { + ...DbOperationArgs, + sampleSize: z.number().optional().default(50).describe("Number of documents to sample for schema inference"), + responseBytesLimit: z + .number() + .optional() + .default(ONE_MB) + .describe( + `The maximum number of bytes to return in the response. This value is capped by the server’s configured maxBytesPerQuery and cannot be exceeded.` + ), + }; public operationType: OperationType = "metadata"; - protected async execute({ database, collection }: ToolArgs): Promise { + protected async execute( + { database, collection, sampleSize, responseBytesLimit }: ToolArgs, + { signal }: ToolExecutionContext + ): Promise { const provider = await this.ensureConnected(); - const documents = await provider.find(database, collection, {}, { limit: 5 }).toArray(); + const cursor = provider.aggregate(database, collection, [ + { $sample: { size: Math.min(sampleSize, MAXIMUM_SAMPLE_SIZE_HARD_LIMIT) } }, + ]); + const { cappedBy, documents } = await collectCursorUntilMaxBytesLimit({ + cursor, + configuredMaxBytesPerQuery: this.config.maxBytesPerQuery, + toolResponseBytesLimit: responseBytesLimit, + abortSignal: signal, + }); const schema = await getSimplifiedSchema(documents); - const fieldsCount = Object.entries(schema).length; - if (fieldsCount === 0) { + if (isObjectEmpty(schema)) { return { content: [ { @@ -28,11 +54,15 @@ export class CollectionSchemaTool extends MongoDBToolBase { }; } + const fieldsCount = Object.keys(schema).length; + const header = `Found ${fieldsCount} fields in the schema for "${database}.${collection}"`; + const cappedWarning = + cappedBy !== undefined + ? `\nThe schema was inferred from a subset of documents due to the response size limit. (${cappedBy})` + : ""; + return { - content: formatUntrustedData( - `Found ${fieldsCount} fields in the schema for "${database}.${collection}"`, - JSON.stringify(schema) - ), + content: formatUntrustedData(`${header}${cappedWarning}`, JSON.stringify(schema)), }; } } diff --git a/tests/integration/common/isObjectEmpty.test.ts b/tests/integration/common/isObjectEmpty.test.ts new file mode 100644 index 000000000..5c1b80571 --- /dev/null +++ b/tests/integration/common/isObjectEmpty.test.ts @@ -0,0 +1,20 @@ +import { isObjectEmpty } from "../../../src/helpers/isObjectEmpty.js"; +import { describe, expect, it } from "vitest"; + +describe("isObjectEmpty", () => { + it("returns true for null", () => { + expect(isObjectEmpty(null)).toBe(true); + }); + + it("returns true for undefined", () => { + expect(isObjectEmpty(undefined)).toBe(true); + }); + + it("returns true for empty object", () => { + expect(isObjectEmpty({})).toBe(true); + }); + + it("returns false for object with properties", () => { + expect(isObjectEmpty({ a: 1 })).toBe(false); + }); +}); diff --git a/tests/integration/tools/mongodb/metadata/collectionSchema.test.ts b/tests/integration/tools/mongodb/metadata/collectionSchema.test.ts index 4130da1f8..47f117b28 100644 --- a/tests/integration/tools/mongodb/metadata/collectionSchema.test.ts +++ b/tests/integration/tools/mongodb/metadata/collectionSchema.test.ts @@ -15,12 +15,21 @@ import type { SimplifiedSchema } from "mongodb-schema"; import { describe, expect, it } from "vitest"; describeWithMongoDB("collectionSchema tool", (integration) => { - validateToolMetadata( - integration, - "collection-schema", - "Describe the schema for a collection", - databaseCollectionParameters - ); + validateToolMetadata(integration, "collection-schema", "Describe the schema for a collection", [ + ...databaseCollectionParameters, + { + name: "sampleSize", + type: "number", + description: "Number of documents to sample for schema inference", + required: false, + }, + { + name: "responseBytesLimit", + type: "number", + description: `The maximum number of bytes to return in the response. This value is capped by the server’s configured maxBytesPerQuery and cannot be exceeded.`, + required: false, + }, + ]); validateThrowsForInvalidArguments(integration, "collection-schema", databaseCollectionInvalidArgs);