|
88 | 88 | "search_context_size_high": 0.050
|
89 | 89 | }
|
90 | 90 | },
|
| 91 | + "watsonx/ibm/granite-3-8b-instruct": { |
| 92 | + "max_tokens": 8192, |
| 93 | + "max_input_tokens": 8192, |
| 94 | + "max_output_tokens": 1024, |
| 95 | + "input_cost_per_token": 0.0002, |
| 96 | + "output_cost_per_token": 0.0002, |
| 97 | + "litellm_provider": "watsonx", |
| 98 | + "mode": "chat", |
| 99 | + "supports_function_calling": true, |
| 100 | + "supports_tool_choice": true, |
| 101 | + "supports_parallel_function_calling": false, |
| 102 | + "supports_vision": false, |
| 103 | + "supports_audio_input": false, |
| 104 | + "supports_audio_output": false, |
| 105 | + "supports_prompt_caching": true, |
| 106 | + "supports_response_schema": true, |
| 107 | + "supports_system_messages": true |
| 108 | + }, |
91 | 109 | "gpt-4o-search-preview-2025-03-11": {
|
92 | 110 | "max_tokens": 16384,
|
93 | 111 | "max_input_tokens": 128000,
|
|
3303 | 3321 | "supports_response_schema": true,
|
3304 | 3322 | "supports_tool_choice": true
|
3305 | 3323 | },
|
| 3324 | + "groq/whisper-large-v3": { |
| 3325 | + "mode": "audio_transcription", |
| 3326 | + "input_cost_per_second": 0.00003083, |
| 3327 | + "output_cost_per_second": 0, |
| 3328 | + "litellm_provider": "groq" |
| 3329 | + }, |
| 3330 | + "groq/whisper-large-v3-turbo": { |
| 3331 | + "mode": "audio_transcription", |
| 3332 | + "input_cost_per_second": 0.00001111, |
| 3333 | + "output_cost_per_second": 0, |
| 3334 | + "litellm_provider": "groq" |
| 3335 | + }, |
| 3336 | + "groq/distil-whisper-large-v3-en": { |
| 3337 | + "mode": "audio_transcription", |
| 3338 | + "input_cost_per_second": 0.00000556, |
| 3339 | + "output_cost_per_second": 0, |
| 3340 | + "litellm_provider": "groq" |
| 3341 | + }, |
3306 | 3342 | "cerebras/llama3.1-8b": {
|
3307 | 3343 | "max_tokens": 128000,
|
3308 | 3344 | "max_input_tokens": 128000,
|
|
4453 | 4489 | "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models",
|
4454 | 4490 | "supports_tool_choice": true
|
4455 | 4491 | },
|
| 4492 | + "gemini-2.5-pro-exp-03-25": { |
| 4493 | + "max_tokens": 65536, |
| 4494 | + "max_input_tokens": 1048576, |
| 4495 | + "max_output_tokens": 65536, |
| 4496 | + "max_images_per_prompt": 3000, |
| 4497 | + "max_videos_per_prompt": 10, |
| 4498 | + "max_video_length": 1, |
| 4499 | + "max_audio_length_hours": 8.4, |
| 4500 | + "max_audio_per_prompt": 1, |
| 4501 | + "max_pdf_size_mb": 30, |
| 4502 | + "input_cost_per_image": 0, |
| 4503 | + "input_cost_per_video_per_second": 0, |
| 4504 | + "input_cost_per_audio_per_second": 0, |
| 4505 | + "input_cost_per_token": 0, |
| 4506 | + "input_cost_per_character": 0, |
| 4507 | + "input_cost_per_token_above_128k_tokens": 0, |
| 4508 | + "input_cost_per_character_above_128k_tokens": 0, |
| 4509 | + "input_cost_per_image_above_128k_tokens": 0, |
| 4510 | + "input_cost_per_video_per_second_above_128k_tokens": 0, |
| 4511 | + "input_cost_per_audio_per_second_above_128k_tokens": 0, |
| 4512 | + "output_cost_per_token": 0, |
| 4513 | + "output_cost_per_character": 0, |
| 4514 | + "output_cost_per_token_above_128k_tokens": 0, |
| 4515 | + "output_cost_per_character_above_128k_tokens": 0, |
| 4516 | + "litellm_provider": "vertex_ai-language-models", |
| 4517 | + "mode": "chat", |
| 4518 | + "supports_system_messages": true, |
| 4519 | + "supports_function_calling": true, |
| 4520 | + "supports_vision": true, |
| 4521 | + "supports_audio_input": true, |
| 4522 | + "supports_video_input": true, |
| 4523 | + "supports_pdf_input": true, |
| 4524 | + "supports_response_schema": true, |
| 4525 | + "supports_tool_choice": true, |
| 4526 | + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing" |
| 4527 | + }, |
4456 | 4528 | "gemini-2.0-pro-exp-02-05": {
|
4457 | 4529 | "max_tokens": 8192,
|
4458 | 4530 | "max_input_tokens": 2097152,
|
|
4614 | 4686 | "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
|
4615 | 4687 | "supports_tool_choice": true
|
4616 | 4688 | },
|
| 4689 | + "gemini-2.0-flash": { |
| 4690 | + "max_tokens": 8192, |
| 4691 | + "max_input_tokens": 1048576, |
| 4692 | + "max_output_tokens": 8192, |
| 4693 | + "max_images_per_prompt": 3000, |
| 4694 | + "max_videos_per_prompt": 10, |
| 4695 | + "max_video_length": 1, |
| 4696 | + "max_audio_length_hours": 8.4, |
| 4697 | + "max_audio_per_prompt": 1, |
| 4698 | + "max_pdf_size_mb": 30, |
| 4699 | + "input_cost_per_audio_token": 0.0000007, |
| 4700 | + "input_cost_per_token": 0.0000001, |
| 4701 | + "output_cost_per_token": 0.0000004, |
| 4702 | + "litellm_provider": "vertex_ai-language-models", |
| 4703 | + "mode": "chat", |
| 4704 | + "supports_system_messages": true, |
| 4705 | + "supports_function_calling": true, |
| 4706 | + "supports_vision": true, |
| 4707 | + "supports_response_schema": true, |
| 4708 | + "supports_audio_output": true, |
| 4709 | + "supports_audio_input": true, |
| 4710 | + "supported_modalities": ["text", "image", "audio", "video"], |
| 4711 | + "supports_tool_choice": true, |
| 4712 | + "source": "https://ai.google.dev/pricing#2_0flash" |
| 4713 | + }, |
4617 | 4714 | "gemini-2.0-flash-lite": {
|
4618 | 4715 | "max_input_tokens": 1048576,
|
4619 | 4716 | "max_output_tokens": 8192,
|
|
4750 | 4847 | "supports_tool_choice": true,
|
4751 | 4848 | "source": "https://ai.google.dev/pricing#2_0flash"
|
4752 | 4849 | },
|
| 4850 | + "gemini/gemini-2.5-pro-preview-03-25": { |
| 4851 | + "max_tokens": 65536, |
| 4852 | + "max_input_tokens": 1048576, |
| 4853 | + "max_output_tokens": 65536, |
| 4854 | + "max_images_per_prompt": 3000, |
| 4855 | + "max_videos_per_prompt": 10, |
| 4856 | + "max_video_length": 1, |
| 4857 | + "max_audio_length_hours": 8.4, |
| 4858 | + "max_audio_per_prompt": 1, |
| 4859 | + "max_pdf_size_mb": 30, |
| 4860 | + "input_cost_per_audio_token": 0.0000007, |
| 4861 | + "input_cost_per_token": 0.00000125, |
| 4862 | + "input_cost_per_token_above_128k_tokens": 0.0000025, |
| 4863 | + "output_cost_per_token": 0.0000010, |
| 4864 | + "output_cost_per_token_above_128k_tokens": 0.000015, |
| 4865 | + "litellm_provider": "gemini", |
| 4866 | + "mode": "chat", |
| 4867 | + "rpm": 10000, |
| 4868 | + "tpm": 10000000, |
| 4869 | + "supports_system_messages": true, |
| 4870 | + "supports_function_calling": true, |
| 4871 | + "supports_vision": true, |
| 4872 | + "supports_response_schema": true, |
| 4873 | + "supports_audio_output": false, |
| 4874 | + "supports_tool_choice": true, |
| 4875 | + "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-pro-preview" |
| 4876 | + }, |
4753 | 4877 | "gemini/gemini-2.0-flash-exp": {
|
4754 | 4878 | "max_tokens": 8192,
|
4755 | 4879 | "max_input_tokens": 1048576,
|
|
6568 | 6692 | "mode": "chat",
|
6569 | 6693 | "supports_tool_choice": true
|
6570 | 6694 | },
|
| 6695 | + "mistralai/mistral-small-3.1-24b-instruct": { |
| 6696 | + "max_tokens": 32000, |
| 6697 | + "input_cost_per_token": 0.0000001, |
| 6698 | + "output_cost_per_token": 0.0000003, |
| 6699 | + "litellm_provider": "openrouter", |
| 6700 | + "mode": "chat", |
| 6701 | + "supports_tool_choice": true |
| 6702 | + }, |
6571 | 6703 | "openrouter/cognitivecomputations/dolphin-mixtral-8x7b": {
|
6572 | 6704 | "max_tokens": 32769,
|
6573 | 6705 | "input_cost_per_token": 0.0000005,
|
|
6696 | 6828 | "supports_vision": false,
|
6697 | 6829 | "supports_tool_choice": true
|
6698 | 6830 | },
|
| 6831 | + "openrouter/openai/o3-mini": { |
| 6832 | + "max_tokens": 65536, |
| 6833 | + "max_input_tokens": 128000, |
| 6834 | + "max_output_tokens": 65536, |
| 6835 | + "input_cost_per_token": 0.0000011, |
| 6836 | + "output_cost_per_token": 0.0000044, |
| 6837 | + "litellm_provider": "openrouter", |
| 6838 | + "mode": "chat", |
| 6839 | + "supports_function_calling": true, |
| 6840 | + "supports_parallel_function_calling": true, |
| 6841 | + "supports_vision": false, |
| 6842 | + "supports_tool_choice": true |
| 6843 | + }, |
| 6844 | + "openrouter/openai/o3-mini-high": { |
| 6845 | + "max_tokens": 65536, |
| 6846 | + "max_input_tokens": 128000, |
| 6847 | + "max_output_tokens": 65536, |
| 6848 | + "input_cost_per_token": 0.0000011, |
| 6849 | + "output_cost_per_token": 0.0000044, |
| 6850 | + "litellm_provider": "openrouter", |
| 6851 | + "mode": "chat", |
| 6852 | + "supports_function_calling": true, |
| 6853 | + "supports_parallel_function_calling": true, |
| 6854 | + "supports_vision": false, |
| 6855 | + "supports_tool_choice": true |
| 6856 | + }, |
6699 | 6857 | "openrouter/openai/gpt-4o": {
|
6700 | 6858 | "max_tokens": 4096,
|
6701 | 6859 | "max_input_tokens": 128000,
|
6702 | 6860 | "max_output_tokens": 4096,
|
6703 |
| - "input_cost_per_token": 0.000005, |
6704 |
| - "output_cost_per_token": 0.000015, |
| 6861 | + "input_cost_per_token": 0.0000025, |
| 6862 | + "output_cost_per_token": 0.000010, |
6705 | 6863 | "litellm_provider": "openrouter",
|
6706 | 6864 | "mode": "chat",
|
6707 | 6865 | "supports_function_calling": true,
|
|
10189 | 10347 | "litellm_provider": "voyage",
|
10190 | 10348 | "mode": "rerank"
|
10191 | 10349 | },
|
| 10350 | + "databricks/databricks-claude-3-7-sonnet": { |
| 10351 | + "max_tokens": 200000, |
| 10352 | + "max_input_tokens": 200000, |
| 10353 | + "max_output_tokens": 128000, |
| 10354 | + "input_cost_per_token": 0.0000025, |
| 10355 | + "input_dbu_cost_per_token": 0.00003571, |
| 10356 | + "output_cost_per_token": 0.00017857, |
| 10357 | + "output_db_cost_per_token": 0.000214286, |
| 10358 | + "litellm_provider": "databricks", |
| 10359 | + "mode": "chat", |
| 10360 | + "source": "https://www.databricks.com/product/pricing/foundation-model-serving", |
| 10361 | + "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Claude 3.7 conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}, |
| 10362 | + "supports_assistant_prefill": true, |
| 10363 | + "supports_function_calling": true, |
| 10364 | + "supports_tool_choice": true |
| 10365 | + }, |
10192 | 10366 | "databricks/databricks-meta-llama-3-1-405b-instruct": {
|
10193 | 10367 | "max_tokens": 128000,
|
10194 | 10368 | "max_input_tokens": 128000,
|
|
10217 | 10391 | "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."},
|
10218 | 10392 | "supports_tool_choice": true
|
10219 | 10393 | },
|
10220 |
| - "databricks/meta-llama-3.3-70b-instruct": { |
| 10394 | + "databricks/databricks-meta-llama-3-3-70b-instruct": { |
10221 | 10395 | "max_tokens": 128000,
|
10222 | 10396 | "max_input_tokens": 128000,
|
10223 | 10397 | "max_output_tokens": 128000,
|
|
0 commit comments