Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
231 changes: 231 additions & 0 deletions .github/scripts/determine-test-suites.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,231 @@
#!/usr/bin/env python3
"""
Determine which integration test suites to run based on changed files in a PR.
This script analyzes git diff to identify changed files and maps them to test suites.
Critical paths trigger running all tests for safety.
"""

import json
import subprocess
import sys
from typing import Set


# Define all possible test suites organized by job type
ALL_REST_SYNC_SUITES = [
"control/serverless",
"control/resources/index",
"control/resources/collections",
"inference/sync",
"plugins",
"data",
]

ALL_REST_ASYNCIO_SUITES = [
"control_asyncio/resources/index",
"control_asyncio/*.py",
"inference/asyncio",
"data_asyncio",
]

ALL_GRPC_SYNC_SUITES = ["data", "data_grpc_futures"]

ALL_ADMIN_SUITES = ["admin"]

# Critical paths that require running all tests
CRITICAL_PATHS = [
"pinecone/config/",
"pinecone/core/",
"pinecone/openapi_support/",
"pinecone/utils/",
"pinecone/exceptions/", # Used across all test suites for error handling
"pinecone/pinecone.py",
"pinecone/pinecone_asyncio.py",
"pinecone/pinecone_interface_asyncio.py", # Core asyncio interface
"pinecone/legacy_pinecone_interface.py", # Legacy interface affects many tests
"pinecone/deprecation_warnings.py", # Affects all code paths
"pinecone/__init__.py",
"pinecone/__init__.pyi",
]

# Path to test suite mappings
# Format: (path_pattern, [list of test suites])
PATH_MAPPINGS = [
# db_control mappings
(
"pinecone/db_control/",
[
"control/serverless",
"control/resources/index",
"control/resources/collections",
"control_asyncio/resources/index",
"control_asyncio/*.py",
],
),
# db_data mappings
("pinecone/db_data/", ["data", "data_asyncio", "data_grpc_futures"]),
# inference mappings
("pinecone/inference/", ["inference/sync", "inference/asyncio"]),
# admin mappings
("pinecone/admin/", ["admin"]),
# grpc mappings
(
"pinecone/grpc/",
[
"data_grpc_futures",
"data", # grpc affects data tests too
],
),
# plugin mappings
("pinecone/deprecated_plugins.py", ["plugins"]),
("pinecone/langchain_import_warnings.py", ["plugins"]),
]


def get_changed_files(base_ref: str = "main") -> Set[str]:
"""Get list of changed files compared to base branch."""
try:
# For PRs, compare against the base branch
# For local testing, compare against HEAD
result = subprocess.run(
["git", "diff", "--name-only", f"origin/{base_ref}...HEAD"],
capture_output=True,
text=True,
check=True,
)
files = {line.strip() for line in result.stdout.strip().split("\n") if line.strip()}
return files
except subprocess.CalledProcessError:
# Fallback: try comparing against HEAD~1 for local testing
try:
result = subprocess.run(
["git", "diff", "--name-only", "HEAD~1"], capture_output=True, text=True, check=True
)
files = {line.strip() for line in result.stdout.strip().split("\n") if line.strip()}
return files
except subprocess.CalledProcessError:
# If git commands fail, return empty set (will trigger full suite)
return set()


def is_critical_path(file_path: str) -> bool:
"""Check if a file path is in a critical area that requires all tests."""
return any(file_path.startswith(critical) for critical in CRITICAL_PATHS)


def map_file_to_test_suites(file_path: str) -> Set[str]:
"""Map a single file path to its relevant test suites."""
suites = set()

for path_pattern, test_suites in PATH_MAPPINGS:
if file_path.startswith(path_pattern):
suites.update(test_suites)

return suites


def determine_test_suites(changed_files: Set[str], run_all: bool = False) -> dict:
"""
Determine which test suites to run based on changed files.
Returns a dict with keys: rest_sync, rest_asyncio, grpc_sync, admin
Each value is a list of test suite names to run.
"""
if run_all or not changed_files:
# Run all tests if explicitly requested or no files changed
return {
"rest_sync": ALL_REST_SYNC_SUITES,
"rest_asyncio": ALL_REST_ASYNCIO_SUITES,
"grpc_sync": ALL_GRPC_SYNC_SUITES,
"admin": ALL_ADMIN_SUITES,
}

# Check for critical paths
has_critical = any(is_critical_path(f) for f in changed_files)
if has_critical:
# Run all tests if critical paths are touched
return {
"rest_sync": ALL_REST_SYNC_SUITES,
"rest_asyncio": ALL_REST_ASYNCIO_SUITES,
"grpc_sync": ALL_GRPC_SYNC_SUITES,
"admin": ALL_ADMIN_SUITES,
}

# Map files to test suites
rest_sync_suites = set()
rest_asyncio_suites = set()
grpc_sync_suites = set()
admin_suites = set()

for file_path in changed_files:
# Skip non-Python files and test files
if not file_path.startswith("pinecone/"):
continue

suites = map_file_to_test_suites(file_path)

# Categorize suites by job type
for suite in suites:
if suite in ALL_REST_SYNC_SUITES:
rest_sync_suites.add(suite)
if suite in ALL_REST_ASYNCIO_SUITES:
rest_asyncio_suites.add(suite)
if suite in ALL_GRPC_SYNC_SUITES:
grpc_sync_suites.add(suite)
if suite in ALL_ADMIN_SUITES:
admin_suites.add(suite)

# If no tests matched, run all (safety fallback)
if not (rest_sync_suites or rest_asyncio_suites or grpc_sync_suites or admin_suites):
return {
"rest_sync": ALL_REST_SYNC_SUITES,
"rest_asyncio": ALL_REST_ASYNCIO_SUITES,
"grpc_sync": ALL_GRPC_SYNC_SUITES,
"admin": ALL_ADMIN_SUITES,
}

return {
"rest_sync": sorted(list(rest_sync_suites)),
"rest_asyncio": sorted(list(rest_asyncio_suites)),
"grpc_sync": sorted(list(grpc_sync_suites)),
"admin": sorted(list(admin_suites)),
}


def main():
"""Main entry point."""
import argparse

parser = argparse.ArgumentParser(
description="Determine test suites to run based on changed files"
)
parser.add_argument(
"--base-ref", default="main", help="Base branch/ref to compare against (default: main)"
)
parser.add_argument("--run-all", action="store_true", help="Force running all test suites")
parser.add_argument(
"--output-format",
choices=["json", "json-pretty"],
default="json",
help="Output format (default: json)",
)

args = parser.parse_args()

changed_files = get_changed_files(args.base_ref)
test_suites = determine_test_suites(changed_files, run_all=args.run_all)

# Output as JSON
if args.output_format == "json-pretty":
print(json.dumps(test_suites, indent=2))
else:
print(json.dumps(test_suites))

# Exit with non-zero if no test suites selected (shouldn't happen with safety fallback)
if not any(test_suites.values()):
sys.exit(1)


if __name__ == "__main__":
main()
70 changes: 69 additions & 1 deletion .github/workflows/on-pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,13 @@ on:
- '*.gif'
- '*.svg'
- '*.example'
workflow_dispatch: {}
workflow_dispatch:
inputs:
run_all_tests:
description: 'Run all integration tests regardless of changes'
required: false
default: 'false'
type: boolean

permissions: {}

Expand All @@ -34,21 +40,83 @@ jobs:
with:
python_versions_json: '["3.9"]'

determine-test-suites:
name: Determine test suites
runs-on: ubuntu-latest
outputs:
rest_sync_suites: ${{ steps.determine.outputs.rest_sync_suites }}
rest_asyncio_suites: ${{ steps.determine.outputs.rest_asyncio_suites }}
grpc_sync_suites: ${{ steps.determine.outputs.grpc_sync_suites }}
admin_suites: ${{ steps.determine.outputs.admin_suites }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0 # Fetch full history for git diff
- name: Determine test suites
id: determine
run: |
run_all="${{ github.event.inputs.run_all_tests == 'true' }}"
if [ "${{ github.event_name }}" = "pull_request" ]; then
base_ref="${{ github.event.pull_request.base.ref }}"
else
base_ref="main"
fi

if [ "$run_all" = "true" ]; then
echo "Running all tests (manual override)"
python3 .github/scripts/determine-test-suites.py --run-all --output-format json > test_suites.json
else
echo "Determining test suites based on changed files (base: $base_ref)"
if ! python3 .github/scripts/determine-test-suites.py --base-ref "$base_ref" --output-format json > test_suites.json 2>&1; then
echo "Script failed, falling back to all tests"
python3 .github/scripts/determine-test-suites.py --run-all --output-format json > test_suites.json
fi
fi

# Validate JSON was created
if [ ! -f test_suites.json ] || ! jq empty test_suites.json 2>/dev/null; then
echo "Error: Failed to generate valid test_suites.json, falling back to all tests"
python3 .github/scripts/determine-test-suites.py --run-all --output-format json > test_suites.json
fi

# Extract each job type's suites and set as outputs
rest_sync=$(jq -c '.rest_sync' test_suites.json)
rest_asyncio=$(jq -c '.rest_asyncio' test_suites.json)
grpc_sync=$(jq -c '.grpc_sync' test_suites.json)
admin=$(jq -c '.admin' test_suites.json)

echo "rest_sync_suites=$rest_sync" >> $GITHUB_OUTPUT
echo "rest_asyncio_suites=$rest_asyncio" >> $GITHUB_OUTPUT
echo "grpc_sync_suites=$grpc_sync" >> $GITHUB_OUTPUT
echo "admin_suites=$admin" >> $GITHUB_OUTPUT

echo "Selected test suites:"
echo "REST sync: $rest_sync"
echo "REST asyncio: $rest_asyncio"
echo "gRPC sync: $grpc_sync"
echo "Admin: $admin"

create-project:
uses: './.github/workflows/project-setup.yaml'
secrets: inherit
needs:
- unit-tests

integration-tests:
if: always() && (needs.unit-tests.result == 'success' && needs.create-project.result == 'success' && needs.determine-test-suites.result == 'success')
uses: './.github/workflows/testing-integration.yaml'
secrets: inherit
needs:
- unit-tests
- create-project
- determine-test-suites
with:
encrypted_project_api_key: ${{ needs.create-project.outputs.encrypted_project_api_key }}
python_versions_json: '["3.13", "3.9"]'
rest_sync_suites_json: ${{ needs.determine-test-suites.outputs.rest_sync_suites || '' }}
rest_asyncio_suites_json: ${{ needs.determine-test-suites.outputs.rest_asyncio_suites || '' }}
grpc_sync_suites_json: ${{ needs.determine-test-suites.outputs.grpc_sync_suites || '' }}
admin_suites_json: ${{ needs.determine-test-suites.outputs.admin_suites || '' }}

cleanup-project:
if: ${{ always() }}
Expand Down
Loading