BerriAI · TeddyAmkie · Aug 25, 2025 · Aug 25, 2025 · Aug 25, 2025 · Aug 26, 2025
diff --git a/tests/openai_endpoints_tests/azure_endpoints_tests/E2E_Testing_Q4_2025_Roadmap.md b/tests/openai_endpoints_tests/azure_endpoints_tests/E2E_Testing_Q4_2025_Roadmap.md
@@ -0,0 +1,142 @@
+# Azure OpenAI API Testing Specification
+
+## Phase 1 - Core Provider Testing
+
+| Provider / Host    | Model(s)                  | Status              |
+|:-------------------|:--------------------------|:--------------------|
+| **Azure OpenAI**   | OpenAI models             | ✅ **COMPLETED**    |
+|                    |                           |                     |
+| Endpoints          | Description (Regression focus) | Status         |
+|:-------------------|:--------------------------|:--------------------|
+| `/messages`        | Schema validity           | ✅ 6/6 tests passed |
+| `/chat/completions`| Usage/cost attribution    | ✅ 13/13 tests passed |
+| `/responses`       | Logging/traces            | ✅ 5/5 tests passed |
+| `/batches`         | Streaming                 | ✅ 6/6 tests passed |
+|                    | Error handling            | ✅ Covered          |
+|                    | Batch lifecycle           | ✅ Covered          |
+
+| Provider / Host    | Model(s)                  | Status              |
+|:-------------------|:--------------------------|:--------------------|
+| **Vertex Claude**  | Claude via Anthropic API  | 🔄 **PENDING**      |
+|                    |                           |                     |
+| Endpoints          | Description (Regression focus) | Status              |
+|:-------------------|:--------------------------|:--------------------|
+| `/messages`        | Schema validity           |                     |
+| `/chat/completions`| Usage/cost attribution    |                     |
+| `/responses`       | Logging/traces            |                     |
+| `/batches`         | Streaming                 |                     |
+|                    | Error handling            |                     |
+|                    | Batch lifecycle           |                     |
+
+| Provider / Host    | Model(s)                |
+|:-------------------|:------------------------|
+| **Vertex Gemini**  | Gemini                  |
+|                   |                         |
+| Endpoints         | Description (Regression focus) |
+|-------------------|--------------------------|
+| `/messages`       | Schema validity          |
+| `/chat/completions` | Usage/cost attribution  |
+| `/responses`      | Logging/traces           |
+| `/batches`        | Streaming                |
+|                   | Error handling           |
+|                   | Batch lifecycle          |
+
+| Provider / Host    | Model(s)                |
+|:-------------------|:------------------------|
+| **Deepseek**       | -                       |
+|                   |                         |
+| Endpoints         | Description (Regression focus) |
+|-------------------|--------------------------|
+| `/messages`       | Schema validity          |
+| `/chat/completions` | Usage/cost attribution  |
+| `/responses`      | Logging/traces           |
+| `/batches`        | Streaming                |
+|                   | Error handling           |
+|                   | Batch lifecycle          |
+
+| Provider / Host    | Model(s)                |
+|:-------------------|:------------------------|
+| **Mistral**        | -                       |
+|                   |                         |
+| Endpoints         | Description (Regression focus) |
+|-------------------|--------------------------|
+| `/messages`       | Schema validity          |
+| `/chat/completions` | Usage/cost attribution  |
+| `/responses`      | Logging/traces           |
+| `/batches`        | Streaming                |
+|                   | Error handling           |
+|                   | Batch lifecycle          |
+
+| Provider / Host    | Model(s)                |
+|:-------------------|:------------------------|
+| **On-Prem**        | self-hosted,            |
+|                   | vLLM/Ollama             |
+|                   |                         |
+| Endpoints         | Description (Regression focus) |
+|-------------------|--------------------------|
+| `/messages`       | Schema validity          |
+| `/chat/completions` | Usage/cost attribution  |
+| `/responses`      | Logging/traces           |
+| `/batches`        | Streaming                |
+|                   | Error handling           |
+|                   | Batch lifecycle          |
+
+---
+
+## Phase 2 (Future / Nice-to-Haves)
+
+| Category                      | Features / Examples               |
+|-------------------------------|-----------------------------------|
+| Adjacent Features             | Session management                |
+| Adjacent Features             | Passthrough endpoints             |
+| Adjacent Features             | Transformations transparency (req + resp) |
+| Adjacent Features             | Managed files                     |
+| Adjacent Features             | Benchmarking overhead             |
+| Adjacent Features             | SSO                               |
+| OpenAI Python SDK Examples    | Responses                         |
+| OpenAI Python SDK Examples    | Chat Completions                  |
+| OpenAI Python SDK Examples    | Vision                            |
+| OpenAI Python SDK Examples    | Audio STT/TTS                     |
+| OpenAI Python SDK Examples    | Files API                         |
+| OpenAI Python SDK Examples    | Embeddings                        |
+| OpenAI Python SDK Examples    | Batches                           |
+| OpenAI Python SDK Examples    | Retries/Pagination/Timeouts       |
+| OpenAI Python SDK Examples    | AzureOpenAI client                |
+| OpenAI Python SDK Examples    | Realtime WS demo                  |
+| OpenAI Agents Python Examples | Basic agent                       |
+| OpenAI Agents Python Examples | Agent patterns                    |
+| OpenAI Agents Python Examples | Handoffs                          |
+| OpenAI Agents Python Examples | Tools integration                 |
+| OpenAI Agents Python Examples | MCP integration                   |
+| OpenAI Agents Python Examples | Sessions & tracing                |
+| OpenAI Agents Python Examples | Voice/Realtime agent              |
+| OpenAI Agents Python Examples | End-to-end samples                |
+
+## 🔧 Test Implementation Notes
+
+### Schema Validation
+- 🎯 **Exact key presence and types;** usage must be numeric
+- ✅ **finish_reason** in allowed set: `{stop, length, tool_calls, content_filter}`
+
+### Streaming
+- 📊 **Verify event order** and final aggregation equals non-stream content
+- 🔄 **Delta accumulation** produces identical final result
+
+### Retries & Reliability  
+- ⏰ **Bounded exponential backoff** with jitter for 429/5xx
+- 🔑 **Idempotency:** Include `X-Idempotency-Key` in POST; assert dedupe
+
+### Observability
+- 📊 **Assert DB rows,** request/response transformation logs (both directions)
+- 🔍 **Langfuse trace spans** (including tool steps)
+- 🔒 **Redaction:** Ensure secrets removed from stored payloads
+
+### Performance
+- ⚡ **First token latency** threshold
+- ⏱️ **Batch completes** within window
+
+### Test Fixtures
+- 🎯 **Deterministic prompts** (temperature:0)
+- 📝 **Small golden outputs**
+- 📋 **JSONL files** (valid + one bad row)
+- 🖼️ **Mock images** (data URLs)
diff --git a/tests/openai_endpoints_tests/azure_endpoints_tests/README.md b/tests/openai_endpoints_tests/azure_endpoints_tests/README.md
@@ -0,0 +1,61 @@
+# Azure OpenAI E2E Testing
+
+End-to-end tests for Azure OpenAI endpoints via LiteLLM proxy covering batches, chat completions, messages, and responses APIs.
+
+## 🚀 Setup
+
+### 1. Environment Variables
+
+Create `.env.test` in repository root:
+
+```bash
+AZURE_API_BASE=https://your-resource.openai.azure.com/
+AZURE_API_KEY=your-azure-api-key-here
+AZURE_API_MODEL="o4-mini" # Ensure model works with Responses API
+
+```
+
+### 2. Start LiteLLM Proxy
+
+```bash
+litellm --config azure_testing_config.yaml --port 4000
+```
+
+### 3. Install Dependencies
+
+```bash
+pip install -r requirements.txt
+pip install pytest requests python-dotenv
+```
+
+## 🏃‍♂️ Running Tests
+
+```bash
+# Run all tests (~5-6 minutes)
+pytest -v .
+
+# Run specific test files
+pytest -v test_e2e_azure_batches.py
+pytest -v test_e2e_azure_chat_completions.py
+```
+
+## ⏱️ Test Duration
+
+- **Full suite**: ~5-6 minutes  
+- **Batch E2E**: ~70-90 seconds (times out at 5 minutes)
+
+## 🔧 Authentication
+
+Tests use `Bearer sk-1234` which matches the `master_key` in `azure_testing_config.yaml`. The proxy forwards requests to Azure using your `AZURE_API_KEY`.
+
+## 🐛 Troubleshooting
+
+**Proxy not running?**
+```bash
+curl http://localhost:4000/health
+```
+
+**Azure auth issues?**
+```bash
+curl -H "api-key: $AZURE_API_KEY" "$AZURE_API_BASE/openai/deployments/gpt-4o/chat/completions?api-version=2024-08-01-preview"
+```
diff --git a/tests/openai_endpoints_tests/azure_endpoints_tests/azure_testing_config.yaml b/tests/openai_endpoints_tests/azure_endpoints_tests/azure_testing_config.yaml
@@ -0,0 +1,76 @@
+model_list:
+  # Main Azure model for testing - use GPT-4.1 deployment
+  - model_name: azure-o4-mini
+    litellm_params:
+      model: azure/gpt-4.1
+      api_base: os.environ/AZURE_API_BASE
+      api_version: "2024-10-01-preview"
+      api_key: os.environ/AZURE_API_KEY
+    model_info:
+      base_model: gpt-4.1
+
+  # Alias for test compatibility - gpt-4 (maps to gpt-4.1 deployment)
+  - model_name: gpt-4
+    litellm_params:
+      model: azure/gpt-4.1
+      api_base: os.environ/AZURE_API_BASE
+      api_version: "2024-10-01-preview"
+      api_key: os.environ/AZURE_API_KEY
+    model_info:
+      base_model: gpt-4.1
+
+  # Additional alias for gpt-4o (maps to gpt-4.1 deployment)
+  - model_name: gpt-4o
+    litellm_params:
+      model: azure/gpt-4.1
+      api_base: os.environ/AZURE_API_BASE
+      api_version: "2024-10-01-preview"
+      api_key: os.environ/AZURE_API_KEY
+    model_info:
+      base_model: gpt-4.1
+
+  # Direct alias for gpt-4o-mini model name (maps to gpt-4.1 deployment)
+  - model_name: gpt-4o-mini
+    litellm_params:
+      model: azure/gpt-4.1
+      api_base: os.environ/AZURE_API_BASE
+      api_version: "2024-10-01-preview"
+      api_key: os.environ/AZURE_API_KEY
+    model_info:
+      base_model: gpt-4.1
+
+  # GPT-4.1 alias for direct use
+  - model_name: gpt-4.1
+    litellm_params:
+      model: azure/gpt-4.1
+      api_base: os.environ/AZURE_API_BASE
+      api_version: "2024-10-01-preview"
+      api_key: os.environ/AZURE_API_KEY
+    model_info:
+      base_model: gpt-4.1
+
+  # O4-mini model for responses API ONLY
+  - model_name: o4-mini
+    litellm_params:
+      model: azure/o4-mini
+      api_base: os.environ/AZURE_API_BASE
+      api_version: "2025-03-01-preview"
+      api_key: os.environ/AZURE_API_KEY
+    model_info:
+      base_model: o4-mini
+
+# General settings
+general_settings:
+  master_key: sk-1234
+  # No database required for E2E testing
+
+# Files API settings for batch operations
+files_settings:
+  - upload_type: "local"  # Store files locally for testing
+    local_dir: "/tmp/litellm_files"  # Temporary directory for file storage
+
+litellm_settings:
+  drop_params: true
+  set_verbose: false
+  request_timeout: 600
+  telemetry: false