diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 00000000..c8c34c39 --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,102 @@ +name: Tests + +on: + pull_request: + branches: + - main + - develop + types: + - opened + - synchronize + - reopened + +jobs: + unit-tests: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + + # Set up Python environment + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: ">=3.9 <3.13" + + # Install Poetry + - name: Install Poetry + run: | + curl -sSL https://install.python-poetry.org | python3 - + + # Cache Poetry Dependencies + - name: Cache Poetry dependencies + uses: actions/cache@v3 + with: + path: ~/.cache/pypoetry + key: poetry-${{ runner.os }}-${{ hashFiles('**/poetry.lock') }} + restore-keys: | + poetry-${{ runner.os }}- + + # Install lib and dev dependencies + - name: Install llmstudio-core + working-directory: ./libs/core + run: | + poetry install + UNIT_ENV=$(poetry env info --path) + echo $UNIT_ENV + echo "UNIT_ENV=$UNIT_ENV" >> $GITHUB_ENV + + - name: Run unit tests + run: | + echo ${{ env.UNIT_ENV }} + source ${{ env.UNIT_ENV }}/bin/activate + poetry run pytest libs/core + + integration-tests: + needs: unit-tests + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + + # Set up Python environment + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: ">=3.9 <3.13" + + # Install Poetry + - name: Install Poetry + run: | + curl -sSL https://install.python-poetry.org | python3 - + + # Inject Secrets as Environment Variables + - name: Set up environment variables + run: | + echo "OPENAI_API_KEY=${{ secrets.OPENAI_API_KEY }}" >> $GITHUB_ENV + echo "GOOGLE_API_KEY=${{ secrets.GOOGLE_API_KEY }}" >> $GITHUB_ENV + echo "BEDROCK_ACCESS_KEY=${{ secrets.BEDROCK_ACCESS_KEY }}" >> $GITHUB_ENV + echo "BEDROCK_SECRET_KEY=${{ secrets.BEDROCK_SECRET_KEY }}" >> $GITHUB_ENV + echo "BEDROCK_REGION=${{ secrets.BEDROCK_REGION }}" >> $GITHUB_ENV + + # Cache Poetry Dependencies + - name: Cache Poetry dependencies (Integration Tests) + uses: actions/cache@v3 + with: + path: ~/.cache/pypoetry + key: poetry-integration-${{ runner.os }}-${{ hashFiles('libs/llmstudio/poetry.lock') }} + restore-keys: | + poetry-integration-${{ runner.os }}- + + # Install llmstudio + - name: Install llmstudio + working-directory: ./libs/llmstudio + run: | + poetry install + INTEGRATION_ENV=$(poetry env info --path) + echo $INTEGRATION_ENV + echo "INTEGRATION_ENV=$INTEGRATION_ENV" >> $GITHUB_ENV + + # Run Integration Tests + - name: Run Integration Tests + run: | + source ${{ env.INTEGRATION_ENV }}/bin/activate + poetry run pytest libs/llmstudio/tests/integration_tests \ No newline at end of file diff --git a/.github/workflows/upload-pypi-dev.yml b/.github/workflows/upload-pypi-dev.yml index b13bf403..85cf886c 100644 --- a/.github/workflows/upload-pypi-dev.yml +++ b/.github/workflows/upload-pypi-dev.yml @@ -1,4 +1,4 @@ -name: PyPI prerelease and build/push Docker image. +name: PyPI prerelease any module. on: workflow_dispatch: diff --git a/.gitignore b/.gitignore index 19015866..d80305ef 100644 --- a/.gitignore +++ b/.gitignore @@ -56,6 +56,7 @@ env3 .env* .env*.local .venv* +*venv* env*/ venv*/ ENV/ @@ -66,6 +67,7 @@ venv.bak/ config.yaml bun.lockb + # Jupyter Notebook .ipynb_checkpoints @@ -76,4 +78,4 @@ bun.lockb llmstudio/llm_engine/logs/execution_logs.jsonl *.db .prettierignore -db \ No newline at end of file +db diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 6ee1c7ad..a637756c 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -9,9 +9,30 @@ Thank you for expressing your interest in contributing to LLMstudio. To ensure t 3. Follow our repo guidelines - Ensure that you update any relevant docstrings and comments within your code - Run `pre-commit run --all-files` to lint your code +4. Sign your commits. Without signed commits, your changes will not be accepted for main. ## Branches - All development happens in per-feature branches prefixed by contributor's initials. For example `feat/feature_name`. - Approved PRs are merged to the `main` branch. + +## Alpha releases: +You need to have your changes in the `develop` branch in order to push a new alpha version of any library `(llmstudio, llmstudio-proxy, llmstudio-tracker)`. Therefore, first guarantee that you feature branch is reviewed and working before merging to develop. + +Process: +- Ensure the `feature/**` you worked is passing the tests and has the approvals necessary. +- Merge to `develop` +- Ensure the changes are in the develop branch +- Use GitHub Actions to initiate the pre-release process: [PyPI pre-release any module](https://github.com/TensorOpsAI/LLMstudio/actions/workflows/upload-pypi-dev.yml) +- Select the target library `(llmstudio, llmstudio-proxy, llmstudio-tracker)` and the target version for the final release (e.g., 1.1.0). Consult main branch and PyPI for current versions. +- Run the workflow. +- The workflow will automatically bump the version and create an alpha release of the library/module specified +- The workflow will automatically push changes back (bump version) to the develop branch + +Repeat the process in case your `development` branch contains changes in multiple libraries. + +## Final releases: +Once you're happy with the versions, create the Release notes on the PR between `develop` and `main` and merge to main branch when ready for full release. The workflow will automatically remove any `alpha` tag in your libraries and push the versions for every library/module that suffered changes. + + diff --git a/Makefile b/Makefile index 5a43a3e6..6c957607 100644 --- a/Makefile +++ b/Makefile @@ -1,2 +1,5 @@ format: pre-commit run --all-files + +unit-tests: + pytest libs/core/tests/unit_tests diff --git a/examples/_config.yaml b/examples/_config.yaml index ae82394b..12f84a16 100644 --- a/examples/_config.yaml +++ b/examples/_config.yaml @@ -115,16 +115,29 @@ providers: keys: - OPENAI_API_KEY models: + o1-preview: + mode: chat + max_completion_tokens: 128000 + input_token_cost: 0.000015 + output_token_cost: 0.000060 + o1-mini: + mode: chat + max_completion_tokens: 128000 + input_token_cost: 0.000003 + cached_token_cost: 0.0000015 + output_token_cost: 0.000012 gpt-4o-mini: mode: chat max_tokens: 128000 input_token_cost: 0.00000015 + cached_token_cost: 0.000000075 output_token_cost: 0.00000060 gpt-4o: mode: chat max_tokens: 128000 - input_token_cost: 0.000005 - output_token_cost: 0.000015 + input_token_cost: 0.0000025 + cached_token_cost: 0.00000125 + output_token_cost: 0.00001 gpt-4-turbo: mode: chat max_tokens: 128000 diff --git a/examples/core.py b/examples/core.py index 6cae128a..61c7debe 100644 --- a/examples/core.py +++ b/examples/core.py @@ -4,24 +4,61 @@ from pprint import pprint import os +import asyncio from dotenv import load_dotenv load_dotenv() -def run_provider(provider, model, api_key, **kwargs): +def run_provider(provider, model, api_key=None, **kwargs): + print(f"\n\n###RUNNING for <{provider}>, <{model}> ###") llm = LLMCore(provider=provider, api_key=api_key, **kwargs) latencies = {} - chat_request = build_chat_request(model, chat_input="Hello, my name is Jason Json", is_stream=False) - import asyncio + print("\nAsync Non-Stream") + chat_request = build_chat_request(model, chat_input="Hello, my name is Jason", is_stream=False) + string = """ +What is Lorem Ipsum? +Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum. + +Why do we use it? +It is a long established fact that a reader will be distracted by the readable content of a page when looking at its layout. The point of using Lorem Ipsum is that it has a more-or-less normal distribution of letters, as opposed to using 'Content here, content here', making it look like readable English. Many desktop publishing packages and web page editors now use Lorem Ipsum as their default model text, and a search for 'lorem ipsum' will uncover many web sites still in their infancy. Various versions have evolved over the years, sometimes by accident, sometimes on purpose (injected humour and the like). + + +Where does it come from? +Contrary to popular belief, Lorem Ipsum is not simply random text. It has roots in a piece of classical Latin literature from 45 BC, making it over 2000 years old. Richard McClintock, a Latin professor at Hampden-Sydney College in Virginia, looked up one of the more obscure Latin words, consectetur, from a Lorem Ipsum passage, and going through the cites of the word in classical literature, discovered the undoubtable source. Lorem Ipsum comes from sections 1.10.32 and 1.10.33 of "de Finibus Bonorum et Malorum" (The Extremes of Good and Evil) by Cicero, written in 45 BC. This book is a treatise on the theory of ethics, very popular during the Renaissance. The first line of Lorem Ipsum, "Lorem ipsum dolor sit amet..", comes from a line in section 1.10.32. + +What is Lorem Ipsum? +Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum. + +Why do we use it? +It is a long established fact that a reader will be distracted by the readable content of a page when looking at its layout. The point of using Lorem Ipsum is that it has a more-or-less normal distribution of letters, as opposed to using 'Content here, content here', making it look like readable English. Many desktop publishing packages and web page editors now use Lorem Ipsum as their default model text, and a search for 'lorem ipsum' will uncover many web sites still in their infancy. Various versions have evolved over the years, sometimes by accident, sometimes on purpose (injected humour and the like). + + +Where does it come from? +Contrary to popular belief, Lorem Ipsum is not simply random text. It has roots in a piece of classical Latin literature from 45 BC, making it over 2000 years old. Richard McClintock, a Latin professor at Hampden-Sydney College in Virginia, looked up one of the more obscure Latin words, consectetur, from a Lorem Ipsum passage, and going through the cites of the word in classical literature, discovered the undoubtable source. Lorem Ipsum comes from sections 1.10.32 and 1.10.33 of "de Finibus Bonorum et Malorum" (The Extremes of Good and Evil) by Cicero, written in 45 BC. This book is a treatise on the theory of ethics, very popular during the Renaissance. The first line of Lorem Ipsum, "Lorem ipsum dolor sit amet..", comes from a line in section 1.10.32. + +What is Lorem Ipsum? +Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum. + +Why do we use it? +It is a long established fact that a reader will be distracted by the readable content of a page when looking at its layout. The point of using Lorem Ipsum is that it has a more-or-less normal distribution of letters, as opposed to using 'Content here, content here', making it look like readable English. Many desktop publishing packages and web page editors now use Lorem Ipsum as their default model text, and a search for 'lorem ipsum' will uncover many web sites still in their infancy. Various versions have evolved over the years, sometimes by accident, sometimes on purpose (injected humour and the like). + + +Where does it come from? +Contrary to popular belief, Lorem Ipsum is not simply random text. It has roots in a piece of classical Latin literature from 45 BC, making it over 2000 years old. Richard McClintock, a Latin professor at Hampden-Sydney College in Virginia, looked up one of the more obscure Latin words, consectetur, from a Lorem Ipsum passage, and going through the cites of the word in classical literature, discovered the undoubtable source. Lorem Ipsum comes from sections 1.10.32 and 1.10.33 of "de Finibus Bonorum et Malorum" (The Extremes of Good and Evil) by Cicero, written in 45 BC. This book is a treatise on the theory of ethics, very popular during the Renaissance. The first line of Lorem Ipsum, "Lorem ipsum dolor sit amet..", comes from a line in section 1.10.32. + + """ + #chat_request = build_chat_request(model, chat_input=string, is_stream=False) + + response_async = asyncio.run(llm.achat(**chat_request)) pprint(response_async) latencies["async (ms)"]= response_async.metrics["latency_s"]*1000 - - # stream - print("\nasync stream") + + + print("\nAsync Stream") async def async_stream(): - chat_request = build_chat_request(model, chat_input="Hello, my name is Tom Json", is_stream=True) + chat_request = build_chat_request(model, chat_input="Hello, my name is Tom", is_stream=True) response_async = await llm.achat(**chat_request) async for p in response_async: @@ -36,15 +73,16 @@ async def async_stream(): asyncio.run(async_stream()) - print("# Now sync calls") - chat_request = build_chat_request(model, chat_input="Hello, my name is Alice Json", is_stream=False) + print("\nSync Non-Stream") + chat_request = build_chat_request(model, chat_input="Hello, my name is Alice", is_stream=False) response_sync = llm.chat(**chat_request) pprint(response_sync) latencies["sync (ms)"]= response_sync.metrics["latency_s"]*1000 + - print("# Now sync calls streaming") - chat_request = build_chat_request(model, chat_input="Hello, my name is Mary Json", is_stream=True) + print("\nSync Stream") + chat_request = build_chat_request(model, chat_input="Hello, my name is Mary", is_stream=True) response_sync_stream = llm.chat(**chat_request) for p in response_sync_stream: @@ -59,7 +97,7 @@ async def async_stream(): return latencies def build_chat_request(model: str, chat_input: str, is_stream: bool, max_tokens: int=1000): - if model == "o1-preview" or model == "o1-mini": + if model.startswith(('o1', 'o3')): chat_request = { "chat_input": chat_input, "model": model, @@ -69,6 +107,16 @@ def build_chat_request(model: str, chat_input: str, is_stream: bool, max_tokens: "max_completion_tokens": max_tokens } } + elif 'amazon.nova' in model or 'anthropic.claude' in model: + chat_request = { + "chat_input": chat_input, + "model": model, + "is_stream": is_stream, + "retries": 0, + "parameters": { + "maxTokens": max_tokens + } + } else: chat_request = { "chat_input": chat_input, @@ -78,83 +126,86 @@ def build_chat_request(model: str, chat_input: str, is_stream: bool, max_tokens: "parameters": { "temperature": 0, "max_tokens": max_tokens, - "response_format": {"type": "json_object"}, "functions": None, } } return chat_request +def multiple_provider_runs(provider:str, model:str, num_runs:int, api_key:str, **kwargs): + for _ in range(num_runs): + latencies = run_provider(provider=provider, model=model, api_key=api_key, **kwargs) + pprint(latencies) + +def run_chat_all_providers(): + # OpenAI + multiple_provider_runs(provider="openai", model="gpt-4o-mini", api_key=os.environ["OPENAI_API_KEY"], num_runs=1) + multiple_provider_runs(provider="openai", model="o3-mini", api_key=os.environ["OPENAI_API_KEY"], num_runs=1) + #multiple_provider_runs(provider="openai", model="o1-preview", api_key=os.environ["OPENAI_API_KEY"], num_runs=1) + # Azure + multiple_provider_runs(provider="azure", model="gpt-4o-mini", num_runs=1, api_key=os.environ["AZURE_API_KEY"], api_version=os.environ["AZURE_API_VERSION"], api_endpoint=os.environ["AZURE_API_ENDPOINT"]) + #multiple_provider_runs(provider="azure", model="gpt-4o", num_runs=1, api_key=os.environ["AZURE_API_KEY"], api_version=os.environ["AZURE_API_VERSION"], api_endpoint=os.environ["AZURE_API_ENDPOINT"]) + #multiple_provider_runs(provider="azure", model="o1-mini", num_runs=1, api_key=os.environ["AZURE_API_KEY"], api_version=os.environ["AZURE_API_VERSION"], api_endpoint=os.environ["AZURE_API_ENDPOINT"]) + #multiple_provider_runs(provider="azure", model="o1-preview", num_runs=1, api_key=os.environ["AZURE_API_KEY"], api_version=os.environ["AZURE_API_VERSION"], api_endpoint=os.environ["AZURE_API_ENDPOINT"]) -provider = "openai" -model = "gpt-4o-mini" -for _ in range(1): - latencies = run_provider(provider=provider, model=model, api_key=os.environ["OPENAI_API_KEY"]) - pprint(latencies) + #multiple_provider_runs(provider="anthropic", model="claude-3-opus-20240229", num_runs=1, api_key=os.environ["ANTHROPIC_API_KEY"]) + + #multiple_provider_runs(provider="azure", model="o1-preview", num_runs=1, api_key=os.environ["AZURE_API_KEY"], api_version=os.environ["AZURE_API_VERSION"], api_endpoint=os.environ["AZURE_API_ENDPOINT"]) + #multiple_provider_runs(provider="azure", model="o1-mini", num_runs=1, api_key=os.environ["AZURE_API_KEY"], api_version=os.environ["AZURE_API_VERSION"], api_endpoint=os.environ["AZURE_API_ENDPOINT"]) + + + multiple_provider_runs(provider="vertexai", model="gemini-1.5-flash", num_runs=1, api_key=os.environ["GOOGLE_API_KEY"]) + + # Bedrock + multiple_provider_runs(provider="bedrock", model="us.amazon.nova-lite-v1:0", num_runs=1, api_key=None, region=os.environ["BEDROCK_REGION"], secret_key=os.environ["BEDROCK_SECRET_KEY"], access_key=os.environ["BEDROCK_ACCESS_KEY"]) + #multiple_provider_runs(provider="bedrock", model="anthropic.claude-3-5-sonnet-20241022-v2:0", num_runs=1, api_key=None, region=os.environ["BEDROCK_REGION"], secret_key=os.environ["BEDROCK_SECRET_KEY"], access_key=os.environ["BEDROCK_ACCESS_KEY"]) + +run_chat_all_providers() + + +import base64 + +def messages(img_path): + """ + Creates a message payload with both text and image. + Adapts format based on the provider. + """ + with open(img_path, "rb") as f: + image_bytes = f.read() + + base64_image = base64.b64encode(image_bytes).decode("utf-8") + return [ + { + "role": "user", + "content": [ + {"type": "text", "text": "What's in this image?"}, + { + "type": "image_url", + "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}, + }, + { + "type": "image_url", + "image_url": {"url": "https://awsmp-logos.s3.amazonaws.com/seller-zx4pk43qpmxoa/53d235806f343cec94aac3c577d81c13.png"}, + }, + ], + } + ] + +def run_send_imgs(): + provider="bedrock" + model="us.amazon.nova-lite-v1:0" + chat_input=messages(img_path="./libs/llmstudio/tests/integration_tests/test_data/llmstudio-logo.jpeg") + chat_request = build_chat_request(model=model, chat_input=chat_input, is_stream=False) + llm = LLMCore(provider=provider, api_key=os.environ["OPENAI_API_KEY"], region=os.environ["BEDROCK_REGION"], secret_key=os.environ["BEDROCK_SECRET_KEY"], access_key=os.environ["BEDROCK_ACCESS_KEY"]) + response_sync = llm.chat(**chat_request) + #print(response_sync) + response_sync.clean_print() -provider = "openai" -model = "o1-preview" -for _ in range(1): - latencies = run_provider(provider=provider, model=model, api_key=os.environ["OPENAI_API_KEY"]) - pprint(latencies) - -provider = "openai" -model = "o1-mini" -for _ in range(1): - latencies = run_provider(provider=provider, model=model, api_key=os.environ["OPENAI_API_KEY"]) - pprint(latencies) - -# provider = "anthropic" -# model = "claude-3-opus-20240229" -# for _ in range(1): -# latencies = run_provider(provider=provider, model=model, api_key=os.environ["ANTHROPIC_API_KEY"]) -# pprint(latencies) -# # we need credits - -provider = "azure" -model = "gpt-4o-mini" -for _ in range(1): - latencies = run_provider(provider=provider, model=model, - api_key=os.environ["AZURE_API_KEY"], - api_version=os.environ["AZURE_API_VERSION"], - api_endpoint=os.environ["AZURE_API_ENDPOINT"]) - pprint(latencies) - -provider = "azure" -model = "o1-preview" -for _ in range(1): - latencies = run_provider(provider=provider, model=model, - api_key=os.environ["AZURE_API_KEY"], - api_version=os.environ["AZURE_API_VERSION"], - api_endpoint=os.environ["AZURE_API_ENDPOINT"]) - pprint(latencies) + #for p in response_sync: + # if p.metrics: + # p.clean_print() -provider = "azure" -model = "o1-mini" -for _ in range(1): - latencies = run_provider(provider=provider, model=model, - api_key=os.environ["AZURE_API_KEY"], - api_version=os.environ["AZURE_API_VERSION"], - api_endpoint=os.environ["AZURE_API_ENDPOINT"]) - pprint(latencies) - -# provider = "azure" -# model = "gpt-4o" -# for _ in range(1): -# latencies = run_provider(provider=provider, model=model, -# api_key=os.environ["AZURE_API_KEY_llama"], -# base_url=os.environ["AZURE_BASE_URL"] -# ) -# pprint(latencies) - - -# provider = "vertexai" -# model = "gemini-1.5-pro-latest" -# for _ in range(1): -# latencies = run_provider(provider=provider, model=model, -# api_key=os.environ["GOOGLE_API_KEY"], -# ) -# pprint(latencies) +run_send_imgs() \ No newline at end of file diff --git a/examples/langchain_integration.py b/examples/langchain_integration.py index 4d408535..51940624 100644 --- a/examples/langchain_integration.py +++ b/examples/langchain_integration.py @@ -18,7 +18,12 @@ # %% from langchain.tools import tool -from langchain.agents import AgentType, initialize_agent +from langchain.agents import AgentType, initialize_agent, AgentExecutor + +from langchain.agents.openai_functions_agent.base import ( + create_openai_functions_agent, +) +from langchain import hub # # %% # print("\n", chat_llm.invoke('Hello')) @@ -192,9 +197,16 @@ def assistant(question: str)->str: tools = [power_disco_ball, start_music, dim_lights] print(tools) - #rebuild agent with new tools - agent_executor = initialize_agent( - tools, chat_llm, agent=AgentType.OPENAI_FUNCTIONS, verbose = True, debug = True + #rebuild agent with new tools - This is the old outdated way of using agents in langchain + #agent_executor = initialize_agent( + # tools, chat_llm, agent=AgentType.OPENAI_FUNCTIONS, verbose = True, debug = True + #) + prompt = hub.pull("hwchase17/openai-functions-agent") + + agent = create_openai_functions_agent(llm=chat_llm, tools=tools, prompt=prompt) + + agent_executor = AgentExecutor( + agent=agent, tools=tools, verbose=True, return_intermediate_steps=True ) response = agent_executor.invoke( diff --git a/libs/core/llmstudio_core/config.yaml b/libs/core/llmstudio_core/config.yaml index 5f0920b2..46813bd6 100644 --- a/libs/core/llmstudio_core/config.yaml +++ b/libs/core/llmstudio_core/config.yaml @@ -73,7 +73,7 @@ providers: step: 1 bedrock: id: bedrock - name: Bedrock + name: Bedrock ConverseAPI chat: true embed: true keys: @@ -126,6 +126,22 @@ providers: max_tokens: 100000 input_token_cost: 0.000008 output_token_cost: 0.000024 + us.amazon.nova-pro-v1:0: + mode: chat + max_tokens: 300000 + input_token_cost: 0.0000008 + output_token_cost: 0.0000016 + us.amazon.nova-lite-v1:0: + mode: chat + max_tokens: 300000 + input_token_cost: 0.00000006 + output_token_cost: 0.00000012 + us.amazon.nova-micro-v1:0: + mode: chat + max_tokens: 128000 + input_token_cost: 0.000000035 + output_token_cost: 0.00000007 + parameters: temperature: name: "Temperature" @@ -208,22 +224,32 @@ providers: mode: chat max_completion_tokens: 128000 input_token_cost: 0.000015 + cached_token_cost: 0.0000075 output_token_cost: 0.000060 o1-mini: mode: chat max_completion_tokens: 128000 - input_token_cost: 0.000003 - output_token_cost: 0.000012 + input_token_cost: 0.0000011 + cached_token_cost: 0.00000055 + output_token_cost: 0.0000044 + o3-mini: + mode: chat + max_completion_tokens: 200000 + input_token_cost: 0.0000011 + cached_token_cost: 0.00000055 + output_token_cost: 0.0000044 gpt-4o-mini: mode: chat max_tokens: 128000 input_token_cost: 0.00000015 + cached_token_cost: 0.000000075 output_token_cost: 0.00000060 gpt-4o: mode: chat max_tokens: 128000 - input_token_cost: 0.000005 - output_token_cost: 0.000015 + input_token_cost: 0.0000025 + cached_token_cost: 0.00000125 + output_token_cost: 0.00001 gpt-4-turbo: mode: chat max_tokens: 128000 @@ -294,11 +320,13 @@ providers: mode: chat max_completion_tokens: 128000 input_token_cost: 0.0000165 + cached_token_cost: 0.00000825 output_token_cost: 0.000066 o1-mini: mode: chat max_completion_tokens: 128000 input_token_cost: 0.0000033 + cached_token_cost: 0.00000165 output_token_cost: 0.0000132 gpt-4o-mini: mode: chat @@ -308,8 +336,9 @@ providers: gpt-4o: mode: chat max_tokens: 128000 - input_token_cost: 0.000005 - output_token_cost: 0.000015 + input_token_cost: 0.0000025 + cached_token_cost: 0.00000125 + output_token_cost: 0.00001 gpt-4-turbo: mode: chat max_tokens: 128000 diff --git a/libs/core/llmstudio_core/providers/__init__.py b/libs/core/llmstudio_core/providers/__init__.py index 6aa6c4e7..330fe48e 100644 --- a/libs/core/llmstudio_core/providers/__init__.py +++ b/libs/core/llmstudio_core/providers/__init__.py @@ -2,7 +2,7 @@ from typing import Optional from llmstudio_core.providers.azure import AzureProvider -from llmstudio_core.providers.bedrock.provider import BedrockProvider +from llmstudio_core.providers.bedrock_converse import BedrockConverseProvider # from llmstudio_core.providers.ollama import OllamaProvider #TODO: adapt it from llmstudio_core.providers.openai import OpenAIProvider diff --git a/libs/core/llmstudio_core/providers/azure.py b/libs/core/llmstudio_core/providers/azure.py index 2dbd7307..f558f9d6 100644 --- a/libs/core/llmstudio_core/providers/azure.py +++ b/libs/core/llmstudio_core/providers/azure.py @@ -62,7 +62,25 @@ async def agenerate_client(self, request: ChatRequest) -> Any: return self.generate_client(request=request) def generate_client(self, request: ChatRequest) -> Any: - """Generate an AzureOpenAI client""" + """ + Generates an AzureOpenAI client for processing a chat request. + + This method prepares and configures the arguments required to create a client + request to AzureOpenAI's chat completions API. It determines model-specific + configurations (e.g., whether tools or functions are enabled) and combines + these with the base arguments for the API call. + + Args: + request (ChatRequest): The chat request object containing the model, + parameters, and other necessary details. + + Returns: + Any: The result of the chat completions API call. + + Raises: + ProviderError: If there is an issue with the API connection or an error + returned from the API. + """ self.is_llama = "llama" in request.model.lower() self.is_openai = "gpt" in request.model.lower() @@ -72,7 +90,6 @@ def generate_client(self, request: ChatRequest) -> Any: try: messages = self.prepare_messages(request) - # Prepare the optional tool-related arguments tool_args = {} if not self.is_llama and self.has_tools and self.is_openai: tool_args = { @@ -80,7 +97,6 @@ def generate_client(self, request: ChatRequest) -> Any: "tool_choice": "auto" if request.parameters.get("tools") else None, } - # Prepare the optional function-related arguments function_args = {} if not self.is_llama and self.has_functions and self.is_openai: function_args = { @@ -90,14 +106,12 @@ def generate_client(self, request: ChatRequest) -> Any: else None, } - # Prepare the base arguments base_args = { "model": request.model, "messages": messages, "stream": True, } - # Combine all arguments combined_args = { **base_args, **tool_args, @@ -116,13 +130,13 @@ def prepare_messages(self, request: ChatRequest): if self.is_llama and (self.has_tools or self.has_functions): user_message = self.convert_to_openai_format(request.chat_input) content = "<|begin_of_text|>" - content = self.add_system_message( + content = self.build_llama_system_message( user_message, content, request.parameters.get("tools"), request.parameters.get("functions"), ) - content = self.add_conversation(user_message, content) + content = self.build_llama_conversation(user_message, content) return [{"role": "user", "content": content}] else: return ( @@ -139,6 +153,20 @@ async def aparse_response( yield chunk def parse_response(self, response: AsyncGenerator, **kwargs) -> Any: + """ + Processes a generator response and yields processed chunks. + + If `is_llama` is True and tools or functions are enabled, it processes the response + using `handle_tool_response`. Otherwise, it processes each chunk and yields only those + containing "choices". + + Args: + response (Generator): The response generator to process. + **kwargs: Additional arguments for tool handling. + + Yields: + Any: Processed response chunks. + """ if self.is_llama and (self.has_tools or self.has_functions): for chunk in self.handle_tool_response(response, **kwargs): if chunk: @@ -388,9 +416,25 @@ def convert_to_openai_format(self, message: Union[str, list]) -> list: return [{"role": "user", "content": message}] return message - def add_system_message( + def build_llama_system_message( self, openai_message: list, llama_message: str, tools: list, functions: list ) -> str: + """ + Builds a complete system message for Llama based on OpenAI's message, tools, and functions. + + If a system message is present in the OpenAI message, it is included in the result. + Otherwise, a default system message is used. Additional tool and function instructions + are appended if provided. + + Args: + openai_message (list): List of OpenAI messages. + llama_message (str): The message to prepend to the system message. + tools (list): List of tools to include in the system message. + functions (list): List of functions to include in the system message. + + Returns: + str: The formatted system message combined with Llama message. + """ system_message = "" system_message_found = False for message in openai_message: @@ -407,15 +451,31 @@ def add_system_message( """ if tools: - system_message = system_message + self.add_tool_instructions(tools) + system_message = system_message + self.build_tool_instructions(tools) if functions: - system_message = system_message + self.add_function_instructions(functions) + system_message = system_message + self.build_function_instructions( + functions + ) end_tag = "\n<|eot_id|>" return llama_message + system_message + end_tag - def add_tool_instructions(self, tools: list) -> str: + def build_tool_instructions(self, tools: list) -> str: + """ + Builds a detailed instructional prompt for tools available to the assistant. + + This function generates a message describing the available tools, focusing on tools + of type "function." It explains to the LLM how to use each tool and provides an example of the + correct response format for function calls. + + Args: + tools (list): A list of tool dictionaries, where each dictionary contains tool + details such as type, function name, description, and parameters. + + Returns: + str: A formatted string detailing the tool instructions and usage examples. + """ tool_prompt = """ You have access to the following tools: """ @@ -449,7 +509,21 @@ def add_tool_instructions(self, tools: list) -> str: return tool_prompt - def add_function_instructions(self, functions: list) -> str: + def build_function_instructions(self, functions: list) -> str: + """ + Builds a detailed instructional prompt for available functions. + + This method creates a message describing the functions accessible to the assistant. + It includes the function name, description, and required parameters, along with + specific guidelines for calling functions. + + Args: + functions (list): A list of function dictionaries, each containing details such as + name, description, and parameters. + + Returns: + str: A formatted string with instructions on using the provided functions. + """ function_prompt = """ You have access to the following functions: """ @@ -479,35 +553,60 @@ def add_function_instructions(self, functions: list) -> str: """ return function_prompt - def add_conversation(self, openai_message: list, llama_message: str) -> str: + def build_llama_conversation(self, openai_message: list, llama_message: str) -> str: + """ + Appends the OpenAI message to the Llama message while formatting OpenAI messages. + + This function iterates through a list of OpenAI messages and formats them for inclusion + in a Llama message. It handles user messages that might include nested content (lists of + messages) by safely evaluating the content. System messages are skipped. + + Args: + openai_message (list): A list of dictionaries representing the OpenAI messages. Each + dictionary should have "role" and "content" keys. + llama_message (str): The initial Llama message to which the conversation is appended. + + Returns: + str: The Llama message with the conversation appended. + """ conversation_parts = [] for message in openai_message: if message["role"] == "system": continue elif message["role"] == "user" and isinstance(message["content"], str): try: - # Attempt to safely evaluate the string to a Python object content_as_list = ast.literal_eval(message["content"]) if isinstance(content_as_list, list): - # If the content is a list, process each nested message for nested_message in content_as_list: conversation_parts.append( self.format_message(nested_message) ) else: - # If the content is not a list, append it directly conversation_parts.append(self.format_message(message)) except (ValueError, SyntaxError): - # If evaluation fails or content is not a list/dict string, append the message directly conversation_parts.append(self.format_message(message)) else: - # For all other messages, use the existing formatting logic conversation_parts.append(self.format_message(message)) return llama_message + "".join(conversation_parts) def format_message(self, message: dict) -> str: - """Format a single message for the conversation.""" + """ + Formats a single message dictionary into a structured string for a conversation. + + The formatting depends on the content of the message, such as tool calls, + function calls, or simple user/assistant messages. Each type of message + is formatted with specific headers and tags. + + Args: + message (dict): A dictionary containing message details. Expected keys + include "role", "content", and optionally "tool_calls", + "tool_call_id", or "function_call". + + Returns: + str: A formatted string representing the message. Returns an empty + string if the message cannot be formatted. + """ if "tool_calls" in message: for tool_call in message["tool_calls"]: function_name = tool_call["function"]["name"] diff --git a/libs/core/llmstudio_core/providers/bedrock/provider.py b/libs/core/llmstudio_core/providers/bedrock/provider.py deleted file mode 100644 index 7087dac2..00000000 --- a/libs/core/llmstudio_core/providers/bedrock/provider.py +++ /dev/null @@ -1,43 +0,0 @@ -from typing import Any, AsyncGenerator, Coroutine, Generator - -from llmstudio_core.providers.bedrock.anthropic import BedrockAnthropicProvider -from llmstudio_core.providers.provider import ChatRequest, ProviderCore, provider - - -@provider -class BedrockProvider(ProviderCore): - def __init__(self, config, **kwargs): - super().__init__(config, **kwargs) - self.kwargs = kwargs - self.selected_model = None - - def _get_provider(self, model): - if "anthropic." in model: - return BedrockAnthropicProvider(config=self.config, **self.kwargs) - - raise ValueError(f" provider is not yet supported.") - - @staticmethod - def _provider_config_name(): - return "bedrock" - - def validate_request(self, request: ChatRequest): - return ChatRequest(**request) - - async def agenerate_client(self, request: ChatRequest) -> Coroutine[Any, Any, Any]: - self.selected_model = self._get_provider(request.model) - return await self.selected_model.agenerate_client(request) - - def generate_client(self, request: ChatRequest) -> Coroutine[Any, Any, Generator]: - self.selected_model = self._get_provider(request.model) - return self.selected_model.generate_client(request=request) - - async def aparse_response( - self, response: Any, **kwargs - ) -> AsyncGenerator[Any, None]: - result = await self.selected_model.aparse_response(response=response, **kwargs) - for chunk in result: - yield chunk - - def parse_response(self, response: AsyncGenerator[Any, None], **kwargs) -> Any: - return self.selected_model.parse_response(response=response, **kwargs) diff --git a/libs/core/llmstudio_core/providers/bedrock/anthropic.py b/libs/core/llmstudio_core/providers/bedrock_converse.py similarity index 72% rename from libs/core/llmstudio_core/providers/bedrock/anthropic.py rename to libs/core/llmstudio_core/providers/bedrock_converse.py index f2b3cfc1..6afeffcf 100644 --- a/libs/core/llmstudio_core/providers/bedrock/anthropic.py +++ b/libs/core/llmstudio_core/providers/bedrock_converse.py @@ -1,5 +1,7 @@ +import base64 import json import os +import re import time import uuid from typing import ( @@ -14,6 +16,7 @@ ) import boto3 +import requests from llmstudio_core.exceptions import ProviderError from llmstudio_core.providers.provider import ChatRequest, ProviderCore, provider from llmstudio_core.utils import OpenAIToolFunction @@ -23,6 +26,7 @@ ChoiceDelta, ChoiceDeltaToolCall, ChoiceDeltaToolCallFunction, + CompletionUsage, ) from pydantic import ValidationError @@ -30,7 +34,7 @@ @provider -class BedrockAnthropicProvider(ProviderCore): +class BedrockConverseProvider(ProviderCore): def __init__(self, config, **kwargs): super().__init__(config, **kwargs) self._client = boto3.client( @@ -46,17 +50,17 @@ def __init__(self, config, **kwargs): @staticmethod def _provider_config_name(): - return "bedrock-antropic" + return "bedrock" def validate_request(self, request: ChatRequest): return ChatRequest(**request) async def agenerate_client(self, request: ChatRequest) -> Coroutine[Any, Any, Any]: - """Generate an AWS Bedrock client""" + """Generate an AWS Bedrock Converse client""" return self.generate_client(request=request) def generate_client(self, request: ChatRequest) -> Coroutine[Any, Any, Generator]: - """Generate an AWS Bedrock client""" + """Generate an AWS Bedrock Converse client""" try: messages, system_prompt = self._process_messages(request.chat_input) tools = self._process_tools(request.parameters) @@ -83,7 +87,9 @@ def generate_client(self, request: ChatRequest) -> Coroutine[Any, Any, Generator async def aparse_response( self, response: Any, **kwargs ) -> AsyncGenerator[Any, None]: - return self.parse_response(response=response, **kwargs) + result = self.parse_response(response=response, **kwargs) + for chunk in result: + yield chunk def parse_response(self, response: AsyncGenerator[Any, None], **kwargs) -> Any: tool_name = None @@ -222,6 +228,22 @@ def parse_response(self, response: AsyncGenerator[Any, None], **kwargs) -> Any: ) yield final_chunk.model_dump() + elif chunk.get("metadata"): + usage = chunk["metadata"].get("usage") + final_stream_chunk = ChatCompletionChunk( + id=str(uuid.uuid4()), + choices=[], + created=int(time.time()), + model=kwargs.get("request").model, + object="chat.completion.chunk", + usage=CompletionUsage( + completion_tokens=usage["outputTokens"], + prompt_tokens=usage["inputTokens"], + total_tokens=usage["totalTokens"], + ), + ) + yield final_stream_chunk.model_dump() + @staticmethod def _process_messages( chat_input: Union[str, List[Dict[str, str]]] @@ -257,6 +279,34 @@ def _process_messages( } ) messages.append(tool_use) + elif isinstance(message.get("content"), list): + converse_content_list = [] + for content in message.get("content"): + converse_content = {} + if content.get("type") == "text": + converse_content["text"] = content.get("text") + elif content.get("type") == "image_url": + image_url = content.get("image_url")["url"] + bytes_image = BedrockConverseProvider._get_image_bytes( + image_url + ) + format = ( + BedrockConverseProvider._get_img_format_from_bytes( + bytes_image + ) + ) + converse_content["image"] = { + "format": format, + "source": {"bytes": bytes_image}, + } + converse_content_list.append(converse_content) + + messages.append( + { + "role": message.get("role"), + "content": converse_content_list, + } + ) else: messages.append( { @@ -284,6 +334,67 @@ def _process_messages( return messages, system_prompt + @staticmethod + def _b64_data_url_to_bytes(b64_data_url: str) -> bytes: + """ + Extracts and decodes Base64 image data from a 'data:image/...;base64,...' data URL. + Returns the raw image bytes. + """ + if not b64_data_url.startswith("data:image/"): + raise ValueError("Invalid Base64 image URL") + + base64_data = re.sub(r"^data:image/[^;]+;base64,", "", b64_data_url) + + try: + return base64.b64decode(base64_data) + except Exception as e: + raise ValueError( + f"Failed to decode Base64: {e} ; For Base64 Data Url: {b64_data_url}" + ) + + @staticmethod + def _get_img_format_from_bytes(image_bytes: bytes) -> str: + """ + Determines the image format from raw image bytes using file signatures (magic numbers). + """ + if image_bytes.startswith(b"\xFF\xD8\xFF"): + return "jpeg" + elif image_bytes.startswith(b"\x89PNG\r\n\x1A\n"): + return "png" + elif image_bytes.startswith(b"GIF87a") or image_bytes.startswith(b"GIF89a"): + return "gif" + elif ( + image_bytes.startswith(b"\x52\x49\x46\x46") and image_bytes[8:12] == b"WEBP" + ): + return "webp" + elif image_bytes.startswith(b"\x49\x49\x2A\x00") or image_bytes.startswith( + b"\x4D\x4D\x00\x2A" + ): + return "tiff" + else: + raise ValueError("Unknown image format") + + @staticmethod + def _get_image_bytes(image_url: str) -> bytes: + """ + Converts an image URL to a Base64-encoded string. + - If already in 'data:image/...;base64,...' format, it returns as-is. + - If it's a normal URL, downloads and encodes the image in Base64. + """ + if image_url.startswith("data:image/"): + return BedrockConverseProvider._b64_data_url_to_bytes(image_url) + + elif image_url.startswith(("http://", "https://")): + response = requests.get(image_url) + if response.status_code != 200: + raise ValueError(f"Failed to download image: {response.status_code}") + + image_bytes = response.content + return image_bytes + + else: + raise ValueError("Invalid image URL format") + @staticmethod def _process_tools(parameters: dict) -> Optional[Dict]: if parameters.get("tools") is None and parameters.get("functions") is None: diff --git a/libs/core/llmstudio_core/providers/data_structures.py b/libs/core/llmstudio_core/providers/data_structures.py new file mode 100644 index 00000000..85c9482e --- /dev/null +++ b/libs/core/llmstudio_core/providers/data_structures.py @@ -0,0 +1,209 @@ +import copy +from typing import Any, List, Optional, Union + +from openai.types.chat import ChatCompletion, ChatCompletionChunk +from pydantic import BaseModel + + +class Metrics(BaseModel): + input_tokens: int + """Number of tokens in the input.""" + + output_tokens: int + """Number of tokens in the output.""" + + reasoning_tokens: int + """Number of reasoning tokens used by the model.""" + + total_tokens: int + """Total token count (input + output + reasoning).""" + + cached_tokens: int + """Number of cached tokens which will lower the price of input.""" + + cost_usd: float + """Total cost of the response in USD (input + output + reasoning - cached).""" + + latency_s: float + """Total time taken for the response, in seconds.""" + + time_to_first_token_s: Optional[float] = None + """Time to receive the first token, in seconds.""" + + inter_token_latency_s: Optional[float] = None + """Average time between tokens, in seconds. Defaults to None if not provided.""" + + tokens_per_second: Optional[float] = None + """Processing rate of tokens per second. Defaults to None if not provided.""" + + def __getitem__(self, key: str) -> Any: + """ + Allows subscriptable access to class fields. + + Parameters + ---------- + key : str + The name of the field to retrieve. + + Returns + ------- + Any + The value of the specified field. + + Raises + ------ + KeyError + If the key does not exist. + """ + try: + return getattr(self, key) + except AttributeError: + raise KeyError(f"'{key}' not found in MetricsStream.") + + def __iter__(self): + """ + Allows iteration over the class fields as key-value pairs. + """ + return iter(self.model_dump().items()) + + def __len__(self): + """ + Returns the number of fields in the class. + """ + return len(self.model_fields) + + def keys(self): + """ + Returns the keys of the fields. + """ + return self.model_fields.keys() + + def values(self): + """ + Returns the values of the fields. + """ + return self.model_dump().values() + + def items(self): + """ + Returns the key-value pairs of the fields. + """ + return self.model_dump().items() + + +class ChatCompletionLLMstudioBase: + """ + Base class to share the methods between different ChatCompletionLLMstudio classes. + """ + + def clean_print(self): + """ + Custom representation of the class to prevent large fields from bloating the output. + Ensures missing fields are handled gracefully without errors. + """ + data = copy.deepcopy(self.model_dump()) + + def clean_large_fields(d): + """ + Recursively traverses the dictionary to replace large image Base64 data + with a placeholder while ensuring missing fields do not cause errors. + """ + for key, value in d.items(): + if isinstance(value, list): + for item in value: + if isinstance(item, dict): + # Handle image_url directly under chat_input or context + if "image_url" in item and isinstance( + item["image_url"], dict + ): + if "url" in item["image_url"] and isinstance( + item["image_url"]["url"], str + ): + if item["image_url"]["url"].startswith( + "data:image/" + ): + item["image_url"][ + "url" + ] = "