diff --git a/examples/core.py b/examples/core.py index cb62dd6d..583d52be 100644 --- a/examples/core.py +++ b/examples/core.py @@ -113,7 +113,7 @@ async def async_stream(): return latencies def build_chat_request(model: str, chat_input: str, is_stream: bool, max_tokens: int=1000): - if model.startswith(('o1', 'o3')): + if model.startswith(('o1', 'o3', 'o4')): chat_request = { "chat_input": chat_input, "model": model, @@ -156,7 +156,7 @@ def multiple_provider_runs(provider:str, model:str, num_runs:int, api_key:str, * def run_chat_all_providers(): # OpenAI multiple_provider_runs(provider="openai", model="gpt-4o-mini", api_key=os.environ["OPENAI_API_KEY"], num_runs=1) - multiple_provider_runs(provider="openai", model="o3-mini", api_key=os.environ["OPENAI_API_KEY"], num_runs=1) + multiple_provider_runs(provider="openai", model="o4-mini", api_key=os.environ["OPENAI_API_KEY"], num_runs=1) #multiple_provider_runs(provider="openai", model="o1-preview", api_key=os.environ["OPENAI_API_KEY"], num_runs=1) diff --git a/libs/core/llmstudio_core/config.yaml b/libs/core/llmstudio_core/config.yaml index 46813bd6..405c270d 100644 --- a/libs/core/llmstudio_core/config.yaml +++ b/libs/core/llmstudio_core/config.yaml @@ -238,6 +238,12 @@ providers: input_token_cost: 0.0000011 cached_token_cost: 0.00000055 output_token_cost: 0.0000044 + o4-mini: + mode: chat + max_completion_tokens: 200000 + input_token_cost: 0.0000011 + cached_token_cost: 0.000000275 + output_token_cost: 0.0000044 gpt-4o-mini: mode: chat max_tokens: 128000