diff --git a/.github/scripts/determine-test-suites.py b/.github/scripts/determine-test-suites.py new file mode 100644 index 00000000..5e9024d2 --- /dev/null +++ b/.github/scripts/determine-test-suites.py @@ -0,0 +1,231 @@ +#!/usr/bin/env python3 +""" +Determine which integration test suites to run based on changed files in a PR. + +This script analyzes git diff to identify changed files and maps them to test suites. +Critical paths trigger running all tests for safety. +""" + +import json +import subprocess +import sys +from typing import Set + + +# Define all possible test suites organized by job type +ALL_REST_SYNC_SUITES = [ + "control/serverless", + "control/resources/index", + "control/resources/collections", + "inference/sync", + "plugins", + "data", +] + +ALL_REST_ASYNCIO_SUITES = [ + "control_asyncio/resources/index", + "control_asyncio/*.py", + "inference/asyncio", + "data_asyncio", +] + +ALL_GRPC_SYNC_SUITES = ["data", "data_grpc_futures"] + +ALL_ADMIN_SUITES = ["admin"] + +# Critical paths that require running all tests +CRITICAL_PATHS = [ + "pinecone/config/", + "pinecone/core/", + "pinecone/openapi_support/", + "pinecone/utils/", + "pinecone/exceptions/", # Used across all test suites for error handling + "pinecone/pinecone.py", + "pinecone/pinecone_asyncio.py", + "pinecone/pinecone_interface_asyncio.py", # Core asyncio interface + "pinecone/legacy_pinecone_interface.py", # Legacy interface affects many tests + "pinecone/deprecation_warnings.py", # Affects all code paths + "pinecone/__init__.py", + "pinecone/__init__.pyi", +] + +# Path to test suite mappings +# Format: (path_pattern, [list of test suites]) +PATH_MAPPINGS = [ + # db_control mappings + ( + "pinecone/db_control/", + [ + "control/serverless", + "control/resources/index", + "control/resources/collections", + "control_asyncio/resources/index", + "control_asyncio/*.py", + ], + ), + # db_data mappings + ("pinecone/db_data/", ["data", "data_asyncio", "data_grpc_futures"]), + # inference mappings + ("pinecone/inference/", ["inference/sync", "inference/asyncio"]), + # admin mappings + ("pinecone/admin/", ["admin"]), + # grpc mappings + ( + "pinecone/grpc/", + [ + "data_grpc_futures", + "data", # grpc affects data tests too + ], + ), + # plugin mappings + ("pinecone/deprecated_plugins.py", ["plugins"]), + ("pinecone/langchain_import_warnings.py", ["plugins"]), +] + + +def get_changed_files(base_ref: str = "main") -> Set[str]: + """Get list of changed files compared to base branch.""" + try: + # For PRs, compare against the base branch + # For local testing, compare against HEAD + result = subprocess.run( + ["git", "diff", "--name-only", f"origin/{base_ref}...HEAD"], + capture_output=True, + text=True, + check=True, + ) + files = {line.strip() for line in result.stdout.strip().split("\n") if line.strip()} + return files + except subprocess.CalledProcessError: + # Fallback: try comparing against HEAD~1 for local testing + try: + result = subprocess.run( + ["git", "diff", "--name-only", "HEAD~1"], capture_output=True, text=True, check=True + ) + files = {line.strip() for line in result.stdout.strip().split("\n") if line.strip()} + return files + except subprocess.CalledProcessError: + # If git commands fail, return empty set (will trigger full suite) + return set() + + +def is_critical_path(file_path: str) -> bool: + """Check if a file path is in a critical area that requires all tests.""" + return any(file_path.startswith(critical) for critical in CRITICAL_PATHS) + + +def map_file_to_test_suites(file_path: str) -> Set[str]: + """Map a single file path to its relevant test suites.""" + suites = set() + + for path_pattern, test_suites in PATH_MAPPINGS: + if file_path.startswith(path_pattern): + suites.update(test_suites) + + return suites + + +def determine_test_suites(changed_files: Set[str], run_all: bool = False) -> dict: + """ + Determine which test suites to run based on changed files. + + Returns a dict with keys: rest_sync, rest_asyncio, grpc_sync, admin + Each value is a list of test suite names to run. + """ + if run_all or not changed_files: + # Run all tests if explicitly requested or no files changed + return { + "rest_sync": ALL_REST_SYNC_SUITES, + "rest_asyncio": ALL_REST_ASYNCIO_SUITES, + "grpc_sync": ALL_GRPC_SYNC_SUITES, + "admin": ALL_ADMIN_SUITES, + } + + # Check for critical paths + has_critical = any(is_critical_path(f) for f in changed_files) + if has_critical: + # Run all tests if critical paths are touched + return { + "rest_sync": ALL_REST_SYNC_SUITES, + "rest_asyncio": ALL_REST_ASYNCIO_SUITES, + "grpc_sync": ALL_GRPC_SYNC_SUITES, + "admin": ALL_ADMIN_SUITES, + } + + # Map files to test suites + rest_sync_suites = set() + rest_asyncio_suites = set() + grpc_sync_suites = set() + admin_suites = set() + + for file_path in changed_files: + # Skip non-Python files and test files + if not file_path.startswith("pinecone/"): + continue + + suites = map_file_to_test_suites(file_path) + + # Categorize suites by job type + for suite in suites: + if suite in ALL_REST_SYNC_SUITES: + rest_sync_suites.add(suite) + if suite in ALL_REST_ASYNCIO_SUITES: + rest_asyncio_suites.add(suite) + if suite in ALL_GRPC_SYNC_SUITES: + grpc_sync_suites.add(suite) + if suite in ALL_ADMIN_SUITES: + admin_suites.add(suite) + + # If no tests matched, run all (safety fallback) + if not (rest_sync_suites or rest_asyncio_suites or grpc_sync_suites or admin_suites): + return { + "rest_sync": ALL_REST_SYNC_SUITES, + "rest_asyncio": ALL_REST_ASYNCIO_SUITES, + "grpc_sync": ALL_GRPC_SYNC_SUITES, + "admin": ALL_ADMIN_SUITES, + } + + return { + "rest_sync": sorted(list(rest_sync_suites)), + "rest_asyncio": sorted(list(rest_asyncio_suites)), + "grpc_sync": sorted(list(grpc_sync_suites)), + "admin": sorted(list(admin_suites)), + } + + +def main(): + """Main entry point.""" + import argparse + + parser = argparse.ArgumentParser( + description="Determine test suites to run based on changed files" + ) + parser.add_argument( + "--base-ref", default="main", help="Base branch/ref to compare against (default: main)" + ) + parser.add_argument("--run-all", action="store_true", help="Force running all test suites") + parser.add_argument( + "--output-format", + choices=["json", "json-pretty"], + default="json", + help="Output format (default: json)", + ) + + args = parser.parse_args() + + changed_files = get_changed_files(args.base_ref) + test_suites = determine_test_suites(changed_files, run_all=args.run_all) + + # Output as JSON + if args.output_format == "json-pretty": + print(json.dumps(test_suites, indent=2)) + else: + print(json.dumps(test_suites)) + + # Exit with non-zero if no test suites selected (shouldn't happen with safety fallback) + if not any(test_suites.values()): + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/.github/workflows/on-pr.yaml b/.github/workflows/on-pr.yaml index 80406d71..f60290f3 100644 --- a/.github/workflows/on-pr.yaml +++ b/.github/workflows/on-pr.yaml @@ -16,7 +16,13 @@ on: - '*.gif' - '*.svg' - '*.example' - workflow_dispatch: {} + workflow_dispatch: + inputs: + run_all_tests: + description: 'Run all integration tests regardless of changes' + required: false + default: 'false' + type: boolean permissions: {} @@ -34,6 +40,62 @@ jobs: with: python_versions_json: '["3.9"]' + determine-test-suites: + name: Determine test suites + runs-on: ubuntu-latest + outputs: + rest_sync_suites: ${{ steps.determine.outputs.rest_sync_suites }} + rest_asyncio_suites: ${{ steps.determine.outputs.rest_asyncio_suites }} + grpc_sync_suites: ${{ steps.determine.outputs.grpc_sync_suites }} + admin_suites: ${{ steps.determine.outputs.admin_suites }} + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 # Fetch full history for git diff + - name: Determine test suites + id: determine + run: | + run_all="${{ github.event.inputs.run_all_tests == 'true' }}" + if [ "${{ github.event_name }}" = "pull_request" ]; then + base_ref="${{ github.event.pull_request.base.ref }}" + else + base_ref="main" + fi + + if [ "$run_all" = "true" ]; then + echo "Running all tests (manual override)" + python3 .github/scripts/determine-test-suites.py --run-all --output-format json > test_suites.json + else + echo "Determining test suites based on changed files (base: $base_ref)" + if ! python3 .github/scripts/determine-test-suites.py --base-ref "$base_ref" --output-format json > test_suites.json 2>&1; then + echo "Script failed, falling back to all tests" + python3 .github/scripts/determine-test-suites.py --run-all --output-format json > test_suites.json + fi + fi + + # Validate JSON was created + if [ ! -f test_suites.json ] || ! jq empty test_suites.json 2>/dev/null; then + echo "Error: Failed to generate valid test_suites.json, falling back to all tests" + python3 .github/scripts/determine-test-suites.py --run-all --output-format json > test_suites.json + fi + + # Extract each job type's suites and set as outputs + rest_sync=$(jq -c '.rest_sync' test_suites.json) + rest_asyncio=$(jq -c '.rest_asyncio' test_suites.json) + grpc_sync=$(jq -c '.grpc_sync' test_suites.json) + admin=$(jq -c '.admin' test_suites.json) + + echo "rest_sync_suites=$rest_sync" >> $GITHUB_OUTPUT + echo "rest_asyncio_suites=$rest_asyncio" >> $GITHUB_OUTPUT + echo "grpc_sync_suites=$grpc_sync" >> $GITHUB_OUTPUT + echo "admin_suites=$admin" >> $GITHUB_OUTPUT + + echo "Selected test suites:" + echo "REST sync: $rest_sync" + echo "REST asyncio: $rest_asyncio" + echo "gRPC sync: $grpc_sync" + echo "Admin: $admin" + create-project: uses: './.github/workflows/project-setup.yaml' secrets: inherit @@ -41,14 +103,20 @@ jobs: - unit-tests integration-tests: + if: always() && (needs.unit-tests.result == 'success' && needs.create-project.result == 'success' && needs.determine-test-suites.result == 'success') uses: './.github/workflows/testing-integration.yaml' secrets: inherit needs: - unit-tests - create-project + - determine-test-suites with: encrypted_project_api_key: ${{ needs.create-project.outputs.encrypted_project_api_key }} python_versions_json: '["3.13", "3.9"]' + rest_sync_suites_json: ${{ needs.determine-test-suites.outputs.rest_sync_suites || '' }} + rest_asyncio_suites_json: ${{ needs.determine-test-suites.outputs.rest_asyncio_suites || '' }} + grpc_sync_suites_json: ${{ needs.determine-test-suites.outputs.grpc_sync_suites || '' }} + admin_suites_json: ${{ needs.determine-test-suites.outputs.admin_suites || '' }} cleanup-project: if: ${{ always() }} diff --git a/.github/workflows/testing-integration.yaml b/.github/workflows/testing-integration.yaml index e267792a..7ea013c8 100644 --- a/.github/workflows/testing-integration.yaml +++ b/.github/workflows/testing-integration.yaml @@ -8,6 +8,22 @@ on: python_versions_json: required: true type: string + rest_sync_suites_json: + required: false + type: string + description: 'JSON array of REST sync test suites to run (if not provided, runs all)' + rest_asyncio_suites_json: + required: false + type: string + description: 'JSON array of REST asyncio test suites to run (if not provided, runs all)' + grpc_sync_suites_json: + required: false + type: string + description: 'JSON array of gRPC sync test suites to run (if not provided, runs all)' + admin_suites_json: + required: false + type: string + description: 'JSON array of admin test suites to run (if not provided, runs all)' permissions: {} @@ -15,20 +31,13 @@ jobs: rest-sync: name: rest ${{ matrix.python_version }} ${{ matrix.test_suite }} runs-on: ubuntu-latest + if: ${{ inputs.rest_sync_suites_json == '' || (inputs.rest_sync_suites_json != '' && fromJson(inputs.rest_sync_suites_json)[0] != null) }} strategy: fail-fast: false max-parallel: 4 matrix: python_version: ${{ fromJson(inputs.python_versions_json) }} - test_suite: - - control/serverless - - control/resources/index - - control/resources/collections - # - control/resources/backup - - inference/sync - - plugins - - data - # - control/resources/restore_job # Backup tests must run before these + test_suite: ${{ inputs.rest_sync_suites_json != '' && fromJson(inputs.rest_sync_suites_json) || fromJson('["control/serverless", "control/resources/index", "control/resources/collections", "inference/sync", "plugins", "data"]') }} steps: - uses: actions/checkout@v4 - name: Setup Poetry @@ -47,18 +56,13 @@ jobs: rest-asyncio: name: asyncio ${{ matrix.python_version }} ${{ matrix.test_suite }} runs-on: ubuntu-latest + if: ${{ inputs.rest_asyncio_suites_json == '' || (inputs.rest_asyncio_suites_json != '' && fromJson(inputs.rest_asyncio_suites_json)[0] != null) }} strategy: fail-fast: false max-parallel: 4 matrix: python_version: ${{ fromJson(inputs.python_versions_json) }} - test_suite: - - control_asyncio/resources/index - # - control_asyncio/resources/backup - - control_asyncio/*.py - - inference/asyncio - - data_asyncio - # - control_asyncio/resources/restore_job # Backup tests must run before these + test_suite: ${{ inputs.rest_asyncio_suites_json != '' && fromJson(inputs.rest_asyncio_suites_json) || fromJson('["control_asyncio/resources/index", "control_asyncio/*.py", "inference/asyncio", "data_asyncio"]') }} steps: - uses: actions/checkout@v4 - name: Setup Poetry @@ -76,13 +80,12 @@ jobs: grpc-sync: name: grpc sync ${{ matrix.python_version }} ${{ matrix.test_suite }} runs-on: ubuntu-latest + if: ${{ inputs.grpc_sync_suites_json == '' || (inputs.grpc_sync_suites_json != '' && fromJson(inputs.grpc_sync_suites_json)[0] != null) }} strategy: fail-fast: false matrix: python_version: ${{ fromJson(inputs.python_versions_json) }} - test_suite: - - data - - data_grpc_futures + test_suite: ${{ inputs.grpc_sync_suites_json != '' && fromJson(inputs.grpc_sync_suites_json) || fromJson('["data", "data_grpc_futures"]') }} steps: - uses: actions/checkout@v4 - name: Setup Poetry @@ -101,12 +104,12 @@ jobs: admin: name: admin ${{ matrix.python_version }} runs-on: ubuntu-latest + if: ${{ inputs.admin_suites_json == '' || (inputs.admin_suites_json != '' && fromJson(inputs.admin_suites_json)[0] != null) }} strategy: fail-fast: false matrix: python_version: ${{ fromJson(inputs.python_versions_json) }} - test_suite: - - admin + test_suite: ${{ inputs.admin_suites_json != '' && fromJson(inputs.admin_suites_json) || fromJson('["admin"]') }} steps: - uses: actions/checkout@v4 - name: Setup Poetry diff --git a/.gitignore b/.gitignore index 7ebc5b82..b0622c3f 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,7 @@ scratch # misc. *.model *pdf +PR_DESCRIPTION.md tmp *swp