Merge pull request #57 from mainframecomputer/feature/add-new-models

philippe-page · web-flow · commit fb8461d9b8d4 · 2025-02-27T16:21:04.000-05:00
Feature/add new models
diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
 [![PRs Welcome](https://img.shields.io/badge/PRs-welcome-brightgreen.svg?style=flat-square)](https://github.com/mainframecomputer/orchestra/issues)
 [![PyPI version](https://badge.fury.io/py/mainframe-orchestra.svg)](https://pypi.org/project/mainframe-orchestra/)
-[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) 
+[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
 [![Twitter](https://img.shields.io/twitter/follow/orchestraorg?label=Follow%20@orchestraorg&style=social)](https://twitter.com/orchestraorg)
 
 # Orchestra
@@ -96,25 +96,25 @@ print(result)
 Orchestra supports a wide range of language models from a number of providers:
 
 ### OpenAI
-GPT-4o, GPT-4o Mini, & Custom defined models 
+GPT-4.5-preview, GPT-4o, GPT-4o Mini, & Custom defined models
 
 ### Anthropic
-Claude 3 Haiku, Claude 3 Sonnet, Claude 3 Opus, Claude 3.5 Sonnet, Claude 3.7 Sonnet, & Custom defined models 
+Claude 3 Haiku, Claude 3 Sonnet, Claude 3 Opus, Claude 3.5 Sonnet, Claude 3.7 Sonnet, & Custom defined models
 
 ### Openrouter
-GPT-4 Turbo, Claude 3 Opus, Mixtral 8x7B, Llama 3.1 405B, & Custom defined models 
+GPT-4 Turbo, Claude 3 Opus, Mixtral 8x7B, Llama 3.1 405B, & Custom defined models
 
 ### Ollama
-Mistral, Mixtral, Llama 3.1, Qwen, Gemma, & Custom defined models 
+Mistral, Mixtral, Llama 3.1, Qwen, Gemma, & Custom defined models
 
 ### Groq
 Mixtral 8x7B, Llama 3, Llama 3.1, Gemma, & Custom defined models
 
 ### TogetherAI
-Custom defined models 
+Custom defined models
 
 ### Gemini
-Gemini 1.5 Flash, Gemini 1.5 Flash 8B, Gemini 1.5 Pro, & Custom defined models 
+Gemini 1.5 Flash, Gemini 1.5 Flash 8B, Gemini 1.5 Pro, & Custom defined models
 
 ### Deepseek
 Deepseek Reasoner, Deepseek Chat, & Custom defined models
diff --git a/docs/src/llms.md b/docs/src/llms.md
@@ -21,9 +21,9 @@ Each class contains static methods corresponding to specific models offered by t
 
 Orchestra supports a wide range of language models from various providers. Here's an overview of some supported models:
 
-- OpenAI Models: GPT-3.5 Turbo, GPT-4 Turbo, GPT-4, GPT-4o
+- OpenAI Models: GPT-4.5-preview, GPT-4o, GPT-4, GPT-4 Turbo, GPT-3.5 Turbo
 - Anthropic Models: Claude-3 Opus, Claude-3 Sonnet, Claude-3 Haiku, Claude-3.5 Sonnet, Claude-3.7 Sonnet
-- Openrouter Models: Various models including Anthropic Claude, OpenAI GPT, Llama, Mistral AI, and more
+- Openrouter Models: Various models including Anthropic Claude, OpenAI GPT, Llama, Mistral AI
 - Ollama Models: Llama 3, Gemma, Mistral, Qwen, Phi-3, Llama 2, CodeLlama, LLaVA, Mixtral
 - Groq Models: Gemma, Llama3, Llama3.1, Mixtral
 - Togetherai Models: Meta Llama 3.1, Mixtral, Mistral, many other open source models
diff --git a/packages/python/README.md b/packages/python/README.md
@@ -96,25 +96,25 @@ print(result)
 Orchestra supports a wide range of language models from a number of providers:
 
 ### OpenAI
-GPT-4o, GPT-4o Mini, & Custom defined models 
+GPT-4.5-preview, GPT-4o, GPT-4o Mini, & Custom defined models
 
 ### Anthropic
-Claude 3 Haiku, Claude 3 Sonnet, Claude 3 Opus, Claude 3.5 Sonnet, Claude 3.7 Sonnet, & Custom defined models 
+Claude 3 Haiku, Claude 3 Sonnet, Claude 3 Opus, Claude 3.5 Sonnet, Claude 3.7 Sonnet, & Custom defined models
 
 ### Openrouter
-GPT-4 Turbo, Claude 3 Opus, Mixtral 8x7B, Llama 3.1 405B, & Custom defined models 
+GPT-4 Turbo, Claude 3 Opus, Mixtral 8x7B, Llama 3.1 405B, & Custom defined models
 
 ### Ollama
-Mistral, Mixtral, Llama 3.1, Qwen, Gemma, & Custom defined models 
+Mistral, Mixtral, Llama 3.1, Qwen, Gemma, & Custom defined models
 
 ### Groq
 Mixtral 8x7B, Llama 3, Llama 3.1, Gemma, & Custom defined models
 
 ### TogetherAI
-Custom defined models 
+Custom defined models
 
 ### Gemini
-Gemini 1.5 Flash, Gemini 1.5 Flash 8B, Gemini 1.5 Pro, & Custom defined models 
+Gemini 1.5 Flash, Gemini 1.5 Flash 8B, Gemini 1.5 Pro, & Custom defined models
 
 ### Deepseek
 Deepseek Reasoner, Deepseek Chat, & Custom defined models
diff --git a/packages/python/pyproject.toml b/packages/python/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "mainframe-orchestra"
-version = "0.0.24"
+version = "0.0.25"
 description = "Mainframe-Orchestra is a lightweight, open-source agentic framework for building LLM based pipelines and self-orchestrating multi-agent teams"
 authors = [
     "Mainframe Computer Inc. <hi@mainfra.me>",
diff --git a/packages/python/src/mainframe_orchestra/__init__.py b/packages/python/src/mainframe_orchestra/__init__.py
@@ -3,7 +3,7 @@
 """
 # Copyright 2024 Mainframe-Orchestra Contributors. Licensed under Apache License 2.0.
 
-__version__ = "0.0.24"
+__version__ = "0.0.25"
 
 from .task import Task
 from .agent import Agent
@@ -33,7 +33,7 @@
     LinearTools,
     SemanticSplitter,
     SentenceSplitter,
-    WhisperTools 
+    WhisperTools
 )
 
 # Conditional imports for optional dependencies
@@ -47,7 +47,7 @@
     from .tools.yahoo_finance_tools import YahooFinanceTools
     from .tools.fred_tools import FredTools
     from .tools.audio_tools import WhisperTools, TextToSpeechTools
-    from .tools.stripe_tools import StripeTools 
+    from .tools.stripe_tools import StripeTools
 
 def __getattr__(name):
     package_map = {
diff --git a/packages/python/src/mainframe_orchestra/llm.py b/packages/python/src/mainframe_orchestra/llm.py
@@ -312,7 +312,7 @@ async def stream_generator():
             # Non-streaming logic
             spinner.text = f"Waiting for {model} response..."
             response: OpenAIChatCompletion = await client.chat.completions.create(**request_params)
-            
+
             content = response.choices[0].message.content
             spinner.succeed("Request completed")
 
@@ -385,6 +385,7 @@ async def wrapper(
     gpt_4o_mini = custom_model("gpt-4o-mini")
     o1_mini = custom_model("o1-mini")
     o1_preview = custom_model("o1-preview")
+    gpt_4_5_preview = custom_model("gpt-4.5-preview")
 
 
 class AnthropicModels:
@@ -687,7 +688,7 @@ async def stream_generator():
                     return compressed_content, None
                 except ValueError as e:
                     return "", e
-            
+
             # For non-JSON responses, keep original formatting but make single line
             logger.debug(f"[LLM] API Response: {' '.join(content.strip().splitlines())}")
             return content.strip(), None
@@ -727,12 +728,14 @@ async def wrapper(
     haiku_3_5 = custom_model("anthropic/claude-3.5-haiku")
     sonnet = custom_model("anthropic/claude-3-sonnet")
     sonnet_3_5 = custom_model("anthropic/claude-3.5-sonnet")
+    sonnet_3_7 = custom_model("anthropic/claude-3.7-sonnet")
     opus = custom_model("anthropic/claude-3-opus")
     gpt_3_5_turbo = custom_model("openai/gpt-3.5-turbo")
     gpt_4_turbo = custom_model("openai/gpt-4-turbo")
     gpt_4 = custom_model("openai/gpt-4")
     gpt_4o = custom_model("openai/gpt-4o")
     gpt_4o_mini = custom_model("openai/gpt-4o-mini")
+    gpt_4_5_preview = custom_model("openai/gpt-4.5-preview")
     o1_preview = custom_model("openai/o1-preview")
     o1_mini = custom_model("openai/o1-mini")
     gemini_flash_1_5 = custom_model("google/gemini-flash-1.5")
@@ -837,7 +840,7 @@ async def stream_generator():
                                     options={"temperature": temperature, "num_predict": max_tokens},
                                     stream=True,
                                 )
-                                
+
                                 for chunk in response:
                                     if chunk and "message" in chunk and "content" in chunk["message"]:
                                         content = chunk["message"]["content"]
@@ -1012,7 +1015,7 @@ async def stream_generator():
                     return compressed_content, None
                 except ValueError as e:
                     return "", e
-            
+
             # For non-JSON responses, keep original formatting but make single line
             logger.debug(f"[LLM] API Response: {' '.join(content.strip().splitlines())}")
             return content.strip(), None
@@ -1084,7 +1087,7 @@ async def send_together_request(
                     content = []
                     if isinstance(image_data, str):
                         image_data = [image_data]
-                    
+
                     for i, image in enumerate(image_data, start=1):
                         content.append({"type": "text", "text": f"Image {i}:"})
                         if image.startswith(("http://", "https://")):
@@ -1097,7 +1100,7 @@ async def send_together_request(
                                 "type": "image_url",
                                 "image_url": {"url": f"data:image/jpeg;base64,{image}"}
                             })
-                    
+
                     # Add original text content
                     content.append({"type": "text", "text": last_user_msg["content"]})
                     last_user_msg["content"] = content
@@ -1120,19 +1123,19 @@ async def stream_generator():
                             response_format={"type": "json_object"} if require_json_output else None,
                             stream=True
                         )
-                        
+
                         for chunk in response:
                             if chunk.choices[0].delta.content:
                                 content = chunk.choices[0].delta.content
                                 full_message += content
                                 yield content
                         logger.debug("Stream complete")
                         logger.debug(f"Full message: {full_message}")
-                        yield "\n" 
+                        yield "\n"
                     except Exception as e:
                         logger.error(f"An error occurred during streaming: {e}")
                         yield ""
-                        yield "\n" 
+                        yield "\n"
 
                 return stream_generator()
 
@@ -1148,7 +1151,7 @@ async def stream_generator():
 
             content = response.choices[0].message.content
             spinner.succeed("Request completed")
-            
+
             # Compress the response to single line if it's JSON
             if require_json_output:
                 try:
@@ -1158,7 +1161,7 @@ async def stream_generator():
                     return compressed_content, None
                 except ValueError as e:
                     return "", e
-            
+
             # For non-JSON responses, keep original formatting but make single line
             logger.debug(f"[LLM] API Response: {' '.join(content.strip().splitlines())}")
             return content.strip(), None
@@ -1216,7 +1219,7 @@ async def send_gemini_request(
         """
         # Create spinner only once at the start
         spinner = Halo(text=f"Sending request to Gemini ({model})...", spinner="dots")
-        
+
         try:
             # Start spinner
             spinner.start()
@@ -1229,7 +1232,7 @@ async def send_gemini_request(
                 "temperature": temperature,
                 "max_output_tokens": max_tokens,
             }
-            
+
             if require_json_output:
                 generation_config.update({
                     "response_mime_type": "application/json"
@@ -1247,15 +1250,15 @@ async def send_gemini_request(
                 last_user_message = next((msg["content"] for msg in reversed(messages) if msg["role"] == "user"), "")
                 full_message = ""
                 logger.debug("Stream started")
-                
+
                 try:
                     response = model_instance.generate_content(last_user_message, stream=True)
                     for chunk in response:
                         if chunk.text:
                             content = chunk.text
                             full_message += content
                             yield content
-                    
+
                     logger.debug("Stream complete")
                     logger.debug(f"Full message: {full_message}")
                 except Exception as e:
@@ -1264,13 +1267,13 @@ async def send_gemini_request(
             else:
                 # Non-streaming: Use chat format
                 chat = model_instance.start_chat(history=[])
-                
+
                 # Process messages and images
                 if messages:
                     for msg in messages:
                         role = msg["role"]
                         content = msg["content"]
-                        
+
                         if role == "user":
                             if image_data and msg == messages[-1]:
                                 parts = []
@@ -1508,7 +1511,7 @@ async def stream_generator():
             # Non-streaming logic
             spinner.text = f"Waiting for {model} response..."
             response = await client.chat.completions.create(**request_params)
-            
+
             if model == "deepseek-reasoner":
                 reasoning = response.choices[0].message.reasoning_content
                 content = response.choices[0].message.content
@@ -1564,7 +1567,7 @@ async def wrapper(
             )
 
         return wrapper
-    
+
     # Model-specific methods using custom_model
     chat = custom_model("deepseek-chat")
     reasoner = custom_model("deepseek-reasoner")