From 9a982e7993683588d421452d23f95c12920c6f98 Mon Sep 17 00:00:00 2001 From: Xinyuan Guo Date: Mon, 28 Apr 2025 10:02:18 -0400 Subject: [PATCH 1/5] MLOB-2635 add responses endpoint apm span --- .../internal/openai/_endpoint_hooks.py | 54 +++++++++++++++++++ ddtrace/contrib/internal/openai/patch.py | 3 ++ 2 files changed, 57 insertions(+) diff --git a/ddtrace/contrib/internal/openai/_endpoint_hooks.py b/ddtrace/contrib/internal/openai/_endpoint_hooks.py index 903ab52240f..6baf8356c9e 100644 --- a/ddtrace/contrib/internal/openai/_endpoint_hooks.py +++ b/ddtrace/contrib/internal/openai/_endpoint_hooks.py @@ -715,3 +715,57 @@ def _record_response(self, pin, integration, span, args, kwargs, resp, error): else: span.set_metric("openai.response.total_bytes", getattr(resp, "total_bytes", 0)) return resp + + +class _ResponseHook(_EndpointHook): + _request_arg_params = ("api_key", "api_base", "api_type", "request_id", "api_version", "organization") + _request_kwarg_params = ( + "model", + "include", + "instructions", + "max_output_tokens", + "metadata", + "parallel_tool_calls", + "previous_response_id", + "reasoning", + "service_tier", + "store", + "stream", + "temperature", + "text", + "tool_choice", + "tools", + "top_p", + "truncation", + "user", + ) + _response_attrs = ("created_at", "id", "model", "tools") + ENDPOINT_NAME = "responses" + HTTP_METHOD_TYPE = "POST" + OPERATION_ID = "createResponse" + + def _record_request(self, pin, integration, instance, span, args, kwargs): + super()._record_request(pin, integration, instance, span, args, kwargs) + for idx, m in enumerate(kwargs.get("input", [])): + span._set_ctx_item("llmobs.response.input", m) + if parse_version(OPENAI_VERSION) >= (1, 26) and kwargs.get("stream"): + if kwargs.get("stream_options", {}).get("include_usage", None) is not None: + # Only perform token chunk auto-extraction if this option is not explicitly set + return + span._set_ctx_item("_dd.auto_extract_token_chunk", True) + stream_options = kwargs.get("stream_options", {}) + stream_options["include_usage"] = True + kwargs["stream_options"] = stream_options + print("record request is called") + + def _record_response(self, pin, integration, span, args, kwargs, resp, error): + resp = super()._record_response(pin, integration, span, args, kwargs, resp, error) + if kwargs.get("stream") and error is None: + return self._handle_streamed_response(integration, span, kwargs, resp, is_completion=False) + integration.llmobs_set_tags(span, args=[], kwargs=kwargs, response=resp, operation="chat") + if not resp: + return + span._set_ctx_items({"llmobs.response.output": kwargs.get("output", [])}) + integration.record_usage(span, resp.usage) + print("record response is called") + return resp diff --git a/ddtrace/contrib/internal/openai/patch.py b/ddtrace/contrib/internal/openai/patch.py index 04e442523b2..7924b6203c5 100644 --- a/ddtrace/contrib/internal/openai/patch.py +++ b/ddtrace/contrib/internal/openai/patch.py @@ -72,6 +72,9 @@ def get_version(): "delete": _endpoint_hooks._FileDeleteHook, "retrieve_content": _endpoint_hooks._FileDownloadHook, }, + "responses.Responses": { + "create": _endpoint_hooks._ResponseHook, + }, } OPENAI_WITH_RAW_RESPONSE_ARG = "_dd.with_raw_response" From 3b21b5569422b731498f14aab5e525bf23888a94 Mon Sep 17 00:00:00 2001 From: Xinyuan Guo Date: Mon, 28 Apr 2025 16:46:51 -0400 Subject: [PATCH 2/5] collect tools metadata --- .../internal/openai/_endpoint_hooks.py | 34 +++++++++++++++---- 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/ddtrace/contrib/internal/openai/_endpoint_hooks.py b/ddtrace/contrib/internal/openai/_endpoint_hooks.py index 6baf8356c9e..08f62a1c262 100644 --- a/ddtrace/contrib/internal/openai/_endpoint_hooks.py +++ b/ddtrace/contrib/internal/openai/_endpoint_hooks.py @@ -742,12 +742,18 @@ class _ResponseHook(_EndpointHook): _response_attrs = ("created_at", "id", "model", "tools") ENDPOINT_NAME = "responses" HTTP_METHOD_TYPE = "POST" - OPERATION_ID = "createResponse" + OPERATION_ID = "createResponseCompletion" def _record_request(self, pin, integration, instance, span, args, kwargs): super()._record_request(pin, integration, instance, span, args, kwargs) - for idx, m in enumerate(kwargs.get("input", [])): - span._set_ctx_item("llmobs.response.input", m) + + input_data = kwargs.get("input", []) + if input_data: + if isinstance(input_data, str): + input_data = [input_data] + + span._set_ctx_item("llmobs.response.input", input_data) + if parse_version(OPENAI_VERSION) >= (1, 26) and kwargs.get("stream"): if kwargs.get("stream_options", {}).get("include_usage", None) is not None: # Only perform token chunk auto-extraction if this option is not explicitly set @@ -756,7 +762,6 @@ def _record_request(self, pin, integration, instance, span, args, kwargs): stream_options = kwargs.get("stream_options", {}) stream_options["include_usage"] = True kwargs["stream_options"] = stream_options - print("record request is called") def _record_response(self, pin, integration, span, args, kwargs, resp, error): resp = super()._record_response(pin, integration, span, args, kwargs, resp, error) @@ -764,8 +769,23 @@ def _record_response(self, pin, integration, span, args, kwargs, resp, error): return self._handle_streamed_response(integration, span, kwargs, resp, is_completion=False) integration.llmobs_set_tags(span, args=[], kwargs=kwargs, response=resp, operation="chat") if not resp: - return - span._set_ctx_items({"llmobs.response.output": kwargs.get("output", [])}) + return resp + span._set_ctx_item("llmobs.response.output", resp.output) + if getattr(resp, "tools", None): + response_tools = [] + for tool in resp.tools: + tool_dict = {} + + if hasattr(tool, "type"): + tool_dict["type"] = getattr(tool, "type") + if hasattr(tool, "name"): + tool_dict["name"] = getattr(tool, "name") + if tool_dict: + response_tools.append(tool_dict) + + if response_tools: + span.set_tag("openai.response.tools", response_tools) + + integration.llmobs_set_tags(span, args=[], kwargs=kwargs, response=resp, operation="responses") integration.record_usage(span, resp.usage) - print("record response is called") return resp From 6a3c5e2719fff63b7c2d9a8abd67d4b38b78bdc8 Mon Sep 17 00:00:00 2001 From: Xinyuan Guo Date: Tue, 29 Apr 2025 17:48:19 -0400 Subject: [PATCH 3/5] fix formatting --- .../openai/cassettes/v1/response_create.yaml | 97 +++++++++++++++++++ tests/contrib/openai/test_openai_llmobs.py | 80 ++++++++------- ...penai_llmobs.test_response_completion.json | 44 +++++++++ 3 files changed, 187 insertions(+), 34 deletions(-) create mode 100644 tests/contrib/openai/cassettes/v1/response_create.yaml create mode 100644 tests/snapshots/tests.contrib.openai.test_openai_llmobs.test_response_completion.json diff --git a/tests/contrib/openai/cassettes/v1/response_create.yaml b/tests/contrib/openai/cassettes/v1/response_create.yaml new file mode 100644 index 00000000000..911149d1381 --- /dev/null +++ b/tests/contrib/openai/cassettes/v1/response_create.yaml @@ -0,0 +1,97 @@ +interactions: +- request: + body: '{"input":[{"role":"system","content":"You are a helpful assistant."},{"role":"user","content":"Who + won the world series in 2020?"},{"role":"assistant","content":"The Los Angeles + Dodgers won the World Series in 2020."},{"role":"user","content":"Where was + it played?"}],"model":"gpt-4.1","max_output_tokens":100,"top_p":0.9,"user":"ddtrace-test"}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate, zstd + connection: + - keep-alive + content-length: + - '344' + content-type: + - application/json + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.76.0 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.76.0 + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.12.10 + method: POST + uri: https://api.openai.com/v1/responses + response: + body: + string: !!binary | + H4sIAAAAAAAAA3RTwW7bMAy95ysInZvAcpykzm1YsWHAgB1WbIdhMBiZjrXJkiFRXYsi/z5Ydpxm + ay9B/B5JPT6SzwsAoWuxB+Ep9NX2VkrZkNwV6+xWlvJ2t6EdNvKwa9ZlqVSmcswLmRfbYkPNWoqb + oYA7/CLF5yLOBhpx5QmZ6goHTu6KTbnJd+td4gIjxzDkKNf1hpjqMYm8d17swUZjEqDtOaKqiVGb + cM0G9lGxdvYK7/CxcpH7yBW735RImWUj52oyw9PHnpfFSi7zLN8ss2Ipi6mhlCj28GMBAPCcfmen + unA8G5Vnt6UajDpQc8i3tN7IJt9tUb5qVKrBTz2lKhQCHulCvOVIIpWzTPYi6aWsq7LnnumR5+wU + gNY6xrNPP35ekSl8D+K+JcizPIPvzpsavpLXFOAPBugNPlENyPDRuAPBZ90QfNBkatAW3nmj7ZGd + vYF7esSwgrtIwA64JXj/5dunu6UsoUdbU6fVTYLJsvYE4fJGO1RDBm51AEuRPRoImgk8ckseuEWb + clvXEQTGWscugGsS2KNnrXSPrO0RmLALKzF3eZr+zY0L70yyDEPQgdHyGDwEpiDRo0djyFTsnKkU + mrR67CONtKcH7WKozltfpf2YV9ATBme1PYr9NCtBTeM8vwga5h67Dv3TBC4ATuOBkH/QiirWNJyD + qKnBaMahisDOD9obNGEUw9T15JFjwuUqm9A01+n1xvkOL98vtibFzd2PAsamW6fV6FJkJ2biskOC + XV/1Yg/Zqhy/fbQqLVqSrQMezPm4Y1r5WZC2V/e5Wd/8j784+lm2QtVSfUnMRulT9r9nX2xfI16r + O0/srdLsGM2FLMvZrRimMdXsUdGSKUyz6oixRsbhodPi9BcAAP//AwBg/xKFcwUAAA== + headers: + CF-RAY: + - 9380fee08936c5a6-IAD + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Tue, 29 Apr 2025 18:52:18 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=ZBZ_gGawWFZR6g_amHJEvmq6eTrwHn.6T0arKW9gVW0-1745952738-1.0.1.1-P.TmPJmOrF49vIO_.9StvI17Ray1uycDvUw73C88oGotVOmLTyPooIHWvf0v2Q.CuZw8REvaclx9qcKBnHXVhxGr_2MsX2_o8TMbSVj4Wb8; + path=/; expires=Tue, 29-Apr-25 19:22:18 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=EOcZHViEbTa_tPSVA8uhDzehfVRpUdeTnoMPw8C9f3I-1745952738788-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - datadog-staging + openai-processing-ms: + - '1371' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-request-id: + - req_0320a94becf7871df72beab48c3ecc6b + status: + code: 200 + message: OK +version: 1 diff --git a/tests/contrib/openai/test_openai_llmobs.py b/tests/contrib/openai/test_openai_llmobs.py index 4b3f2036c4b..1298bcab3dc 100644 --- a/tests/contrib/openai/test_openai_llmobs.py +++ b/tests/contrib/openai/test_openai_llmobs.py @@ -245,40 +245,6 @@ def test_chat_completion_azure_proxy( ) assert mock_llmobs_writer.enqueue.call_count == 0 - @pytest.mark.skipif( - parse_version(openai_module.version.VERSION) >= (1, 60), - reason="latest openai versions use modified azure requests", - ) - def test_chat_completion_azure( - self, openai, azure_openai_config, ddtrace_global_config, mock_llmobs_writer, mock_tracer - ): - input_messages = [{"role": "user", "content": "What's the weather like in NYC right now?"}] - expected_output = "I'm sorry, as an AI language model, I do not have real-time information. Please check" - with get_openai_vcr(subdirectory_name="v1").use_cassette("azure_chat_completion.yaml"): - azure_client = openai.AzureOpenAI( - api_version=azure_openai_config["api_version"], - azure_endpoint=azure_openai_config["azure_endpoint"], - azure_deployment=azure_openai_config["azure_deployment"], - api_key=azure_openai_config["api_key"], - ) - resp = azure_client.chat.completions.create( - model="gpt-35-turbo", messages=input_messages, temperature=0, n=1, max_tokens=20, user="ddtrace-test" - ) - span = mock_tracer.pop_traces()[0][0] - assert mock_llmobs_writer.enqueue.call_count == 1 - mock_llmobs_writer.enqueue.assert_called_with( - _expected_llmobs_llm_span_event( - span, - model_name=resp.model, - model_provider="azure_openai", - input_messages=input_messages, - output_messages=[{"role": "assistant", "content": expected_output}], - metadata={"temperature": 0, "max_tokens": 20, "n": 1, "user": "ddtrace-test"}, - token_metrics={"input_tokens": 18, "output_tokens": 20, "total_tokens": 38}, - tags={"ml_app": "", "service": "tests.contrib.openai"}, - ) - ) - @pytest.mark.skipif( parse_version(openai_module.version.VERSION) >= (1, 60), reason="latest openai versions use modified azure requests", @@ -682,6 +648,52 @@ def test_deepseek_as_provider(self, openai, mock_llmobs_writer, mock_tracer): assert span_event["meta"]["model_provider"] == "deepseek" assert span_event["meta"]["model_name"] == "deepseek-chat" + @mock.patch("openai._base_client.SyncAPIClient.post") + def test_response_completion_proxy( + self, mock_completions_post, openai, ddtrace_global_config, mock_llmobs_writer, mock_tracer + ): + """Ensure llmobs records are not emitted for response endpoints when the base_url is specified.""" + model = "gpt-4.1" + input_messages = multi_message_input + client = openai.OpenAI(base_url="http://0.0.0.0:4000") + client.responses.create( + model=model, input=input_messages, top_p=0.9, max_output_tokens=100, user="ddtrace-test" + ) + assert mock_llmobs_writer.enqueue.call_count == 0 + + @pytest.mark.snapshot(token="tests.contrib.openai.test_openai_llmobs.test_response_completion") + def test_response_completion(self, openai, ddtrace_global_config, mock_llmobs_writer, mock_tracer): + """Ensure llmobs records are emitted for response completion endpoints when configured.""" + # Create a new cassette for this test + with get_openai_vcr(subdirectory_name="v1").use_cassette("response_create.yaml"): + model = "gpt-4.1" + input_messages = multi_message_input + client = openai.OpenAI() + client.responses.create( + model=model, input=input_messages, top_p=0.9, max_output_tokens=100, user="ddtrace-test" + ) + span = mock_tracer.pop_traces()[0][0] + assert span.name == "openai.request" + assert span.resource == "createResponseCompletion" + assert span.get_tag("openai.request.model") == "gpt-4.1" + + @mock.patch("openai._base_client.SyncAPIClient.post") + def test_response_completion_azure_proxy( + self, mock_completions_post, openai, azure_openai_config, ddtrace_global_config, mock_llmobs_writer, mock_tracer + ): + input_messages = [ + {"role": "user", "content": "Where did the Los Angeles Dodgers play to win the world series in 2020?"} + ] + azure_client = openai.AzureOpenAI( + base_url="http://0.0.0.0:4000", + api_key=azure_openai_config["api_key"], + api_version=azure_openai_config["api_version"], + ) + azure_client.responses.create( + model="gpt-4.1", input=input_messages, temperature=0, max_output_tokens=100, user="ddtrace-test" + ) + assert mock_llmobs_writer.enqueue.call_count == 0 + @pytest.mark.parametrize( "ddtrace_global_config", diff --git a/tests/snapshots/tests.contrib.openai.test_openai_llmobs.test_response_completion.json b/tests/snapshots/tests.contrib.openai.test_openai_llmobs.test_response_completion.json new file mode 100644 index 00000000000..28a59eea727 --- /dev/null +++ b/tests/snapshots/tests.contrib.openai.test_openai_llmobs.test_response_completion.json @@ -0,0 +1,44 @@ +[[ + { + "name": "openai.request", + "service": "", + "resource": "createResponseCompletion", + "trace_id": 0, + "span_id": 1, + "parent_id": 0, + "type": "llm", + "error": 0, + "meta": { + "_dd.p.dm": "-0", + "_dd.p.tid": "68112ff900000000", + "component": "openai", + "language": "python", + "openai.base_url": "https://api.openai.com/v1/", + "openai.organization.name": "datadog-staging", + "openai.request.client": "OpenAI", + "openai.request.endpoint": "/v1/responses", + "openai.request.max_output_tokens": "100", + "openai.request.method": "POST", + "openai.request.model": "gpt-4.1", + "openai.request.top_p": "0.9", + "openai.request.user": "ddtrace-test", + "openai.response.created_at": "1745952737.0", + "openai.response.id": "resp_68111fe174308191875e7af1b7f399cc0c2a24124645ef31", + "openai.response.model": "gpt-4.1-2025-04-14", + "openai.response.tools": "[]", + "openai.user.api_key": "sk-...key>", + "runtime-id": "a052b5091f1b4828ac710e8760bee8e5" + }, + "metrics": { + "_dd.measured": 1, + "_dd.top_level": 1, + "_dd.tracer_kr": 1.0, + "_sampling_priority_v1": 1, + "openai.response.usage.input_tokens": 53, + "openai.response.usage.output_tokens": 46, + "openai.response.usage.total_tokens": 99, + "process_id": 8758 + }, + "duration": 27859000, + "start": 1745956857134697000 + }]] From 6366987e17c02f36c0e2af111c8ff3cfa9377112 Mon Sep 17 00:00:00 2001 From: Xinyuan Guo Date: Tue, 29 Apr 2025 17:53:16 -0400 Subject: [PATCH 4/5] adding back the test_chat_completion_azure --- tests/contrib/openai/test_openai_llmobs.py | 34 ++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/tests/contrib/openai/test_openai_llmobs.py b/tests/contrib/openai/test_openai_llmobs.py index 1298bcab3dc..be37600a11f 100644 --- a/tests/contrib/openai/test_openai_llmobs.py +++ b/tests/contrib/openai/test_openai_llmobs.py @@ -245,6 +245,40 @@ def test_chat_completion_azure_proxy( ) assert mock_llmobs_writer.enqueue.call_count == 0 + @pytest.mark.skipif( + parse_version(openai_module.version.VERSION) >= (1, 60), + reason="latest openai versions use modified azure requests", + ) + def test_chat_completion_azure( + self, openai, azure_openai_config, ddtrace_global_config, mock_llmobs_writer, mock_tracer + ): + input_messages = [{"role": "user", "content": "What's the weather like in NYC right now?"}] + expected_output = "I'm sorry, as an AI language model, I do not have real-time information. Please check" + with get_openai_vcr(subdirectory_name="v1").use_cassette("azure_chat_completion.yaml"): + azure_client = openai.AzureOpenAI( + api_version=azure_openai_config["api_version"], + azure_endpoint=azure_openai_config["azure_endpoint"], + azure_deployment=azure_openai_config["azure_deployment"], + api_key=azure_openai_config["api_key"], + ) + resp = azure_client.chat.completions.create( + model="gpt-35-turbo", messages=input_messages, temperature=0, n=1, max_tokens=20, user="ddtrace-test" + ) + span = mock_tracer.pop_traces()[0][0] + assert mock_llmobs_writer.enqueue.call_count == 1 + mock_llmobs_writer.enqueue.assert_called_with( + _expected_llmobs_llm_span_event( + span, + model_name=resp.model, + model_provider="azure_openai", + input_messages=input_messages, + output_messages=[{"role": "assistant", "content": expected_output}], + metadata={"temperature": 0, "max_tokens": 20, "n": 1, "user": "ddtrace-test"}, + token_metrics={"input_tokens": 18, "output_tokens": 20, "total_tokens": 38}, + tags={"ml_app": "", "service": "tests.contrib.openai"}, + ) + ) + @pytest.mark.skipif( parse_version(openai_module.version.VERSION) >= (1, 60), reason="latest openai versions use modified azure requests", From f928b0f3436c62c2e2062727f7d1b71f393c335d Mon Sep 17 00:00:00 2001 From: Xinyuan Guo Date: Wed, 30 Apr 2025 09:56:10 -0400 Subject: [PATCH 5/5] remove azure test --- tests/contrib/openai/test_openai_llmobs.py | 17 ----------------- ..._openai_llmobs.test_response_completion.json | 2 +- 2 files changed, 1 insertion(+), 18 deletions(-) diff --git a/tests/contrib/openai/test_openai_llmobs.py b/tests/contrib/openai/test_openai_llmobs.py index be37600a11f..1f45f1d16b1 100644 --- a/tests/contrib/openai/test_openai_llmobs.py +++ b/tests/contrib/openai/test_openai_llmobs.py @@ -711,23 +711,6 @@ def test_response_completion(self, openai, ddtrace_global_config, mock_llmobs_wr assert span.resource == "createResponseCompletion" assert span.get_tag("openai.request.model") == "gpt-4.1" - @mock.patch("openai._base_client.SyncAPIClient.post") - def test_response_completion_azure_proxy( - self, mock_completions_post, openai, azure_openai_config, ddtrace_global_config, mock_llmobs_writer, mock_tracer - ): - input_messages = [ - {"role": "user", "content": "Where did the Los Angeles Dodgers play to win the world series in 2020?"} - ] - azure_client = openai.AzureOpenAI( - base_url="http://0.0.0.0:4000", - api_key=azure_openai_config["api_key"], - api_version=azure_openai_config["api_version"], - ) - azure_client.responses.create( - model="gpt-4.1", input=input_messages, temperature=0, max_output_tokens=100, user="ddtrace-test" - ) - assert mock_llmobs_writer.enqueue.call_count == 0 - @pytest.mark.parametrize( "ddtrace_global_config", diff --git a/tests/snapshots/tests.contrib.openai.test_openai_llmobs.test_response_completion.json b/tests/snapshots/tests.contrib.openai.test_openai_llmobs.test_response_completion.json index 28a59eea727..cd0d1b852f6 100644 --- a/tests/snapshots/tests.contrib.openai.test_openai_llmobs.test_response_completion.json +++ b/tests/snapshots/tests.contrib.openai.test_openai_llmobs.test_response_completion.json @@ -1,7 +1,7 @@ [[ { "name": "openai.request", - "service": "", + "service": "tests.contrib.openai", "resource": "createResponseCompletion", "trace_id": 0, "span_id": 1,