11import type { CallToolResult } from "@modelcontextprotocol/sdk/types.js" ;
22import { DbOperationArgs , MongoDBToolBase } from "../mongodbTool.js" ;
3- import type { ToolArgs , OperationType } from "../../tool.js" ;
3+ import type { ToolArgs , OperationType , ToolExecutionContext } from "../../tool.js" ;
44import { formatUntrustedData } from "../../tool.js" ;
55import { getSimplifiedSchema } from "mongodb-schema" ;
6+ import z from "zod" ;
7+ import { ONE_MB } from "../../../helpers/constants.js" ;
8+ import { collectCursorUntilMaxBytesLimit } from "../../../helpers/collectCursorUntilMaxBytes.js" ;
9+ import { isObjectEmpty } from "../../../helpers/isObjectEmpty.js" ;
10+
11+ const MAXIMUM_SAMPLE_SIZE_HARD_LIMIT = 50_000 ;
612
713export class CollectionSchemaTool extends MongoDBToolBase {
814 public name = "collection-schema" ;
915 protected description = "Describe the schema for a collection" ;
10- protected argsShape = DbOperationArgs ;
16+ protected argsShape = {
17+ ...DbOperationArgs ,
18+ sampleSize : z . number ( ) . optional ( ) . default ( 50 ) . describe ( "Number of documents to sample for schema inference" ) ,
19+ responseBytesLimit : z
20+ . number ( )
21+ . optional ( )
22+ . default ( ONE_MB )
23+ . describe (
24+ `The maximum number of bytes to return in the response. This value is capped by the server’s configured maxBytesPerQuery and cannot be exceeded.`
25+ ) ,
26+ } ;
1127
1228 public operationType : OperationType = "metadata" ;
1329
14- protected async execute ( { database, collection } : ToolArgs < typeof DbOperationArgs > ) : Promise < CallToolResult > {
30+ protected async execute (
31+ { database, collection, sampleSize, responseBytesLimit } : ToolArgs < typeof this . argsShape > ,
32+ { signal } : ToolExecutionContext
33+ ) : Promise < CallToolResult > {
1534 const provider = await this . ensureConnected ( ) ;
16- const documents = await provider . find ( database , collection , { } , { limit : 5 } ) . toArray ( ) ;
35+ const cursor = provider . aggregate ( database , collection , [
36+ { $sample : { size : Math . min ( sampleSize , MAXIMUM_SAMPLE_SIZE_HARD_LIMIT ) } } ,
37+ ] ) ;
38+ const { cappedBy, documents } = await collectCursorUntilMaxBytesLimit ( {
39+ cursor,
40+ configuredMaxBytesPerQuery : this . config . maxBytesPerQuery ,
41+ toolResponseBytesLimit : responseBytesLimit ,
42+ abortSignal : signal ,
43+ } ) ;
1744 const schema = await getSimplifiedSchema ( documents ) ;
1845
19- const fieldsCount = Object . entries ( schema ) . length ;
20- if ( fieldsCount === 0 ) {
46+ if ( isObjectEmpty ( schema ) ) {
2147 return {
2248 content : [
2349 {
@@ -28,11 +54,15 @@ export class CollectionSchemaTool extends MongoDBToolBase {
2854 } ;
2955 }
3056
57+ const fieldsCount = Object . keys ( schema ) . length ;
58+ const header = `Found ${ fieldsCount } fields in the schema for "${ database } .${ collection } "` ;
59+ const cappedWarning =
60+ cappedBy !== undefined
61+ ? `\nThe schema was inferred from a subset of documents due to the response size limit. (${ cappedBy } )`
62+ : "" ;
63+
3164 return {
32- content : formatUntrustedData (
33- `Found ${ fieldsCount } fields in the schema for "${ database } .${ collection } "` ,
34- JSON . stringify ( schema )
35- ) ,
65+ content : formatUntrustedData ( `${ header } ${ cappedWarning } ` , JSON . stringify ( schema ) ) ,
3666 } ;
3767 }
3868}
0 commit comments