Skip to content

Commit 5aa32d1

Browse files
quinna-hmabdinurwantsui
authored
fix(tracing): truncate long span attributes (#13270)
Truncate span resource name, tag key and tag values. Previously, a very large resource name would result in a runtime error during encoding. If any of these have over 25000 chars, this will truncate them to up to 2500 chars (and include the suffix `<truncated>`) The agent will truncate based on the limits [here](https://docs.datadoghq.com/tracing/troubleshooting/?tab=java#data-volume-guidelines) Resolves: - #13221 - #6587 ## Checklist - [x] PR author has checked that all the criteria below are met - The PR description includes an overview of the change - The PR description articulates the motivation for the change - The change includes tests OR the PR description describes a testing strategy - The PR description notes risks associated with the change, if any - Newly-added code is easy to change - The change follows the [library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) - The change includes or references documentation updates if necessary - Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [ ] Reviewer has checked that all the criteria below are met - Title is accurate - All changes are related to the pull request's stated goal - Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - Testing strategy adequately addresses listed risks - Newly-added code is easy to change - Release note makes sense to a user of the library - If necessary, author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) --------- Co-authored-by: Munir Abdinur <[email protected]> Co-authored-by: wan <[email protected]>
1 parent 9930821 commit 5aa32d1

7 files changed

+87
-38
lines changed

ddtrace/_trace/_limits.py

+1
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,4 @@
44

55
MAX_SPAN_META_KEY_LEN = 200
66
MAX_SPAN_META_VALUE_LEN = 25000
7+
TRUNCATED_SPAN_ATTRIBUTE_LEN = 2500

ddtrace/internal/_encoding.pyx

+19-12
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ from ..constants import _ORIGIN_KEY as ORIGIN_KEY
2323
from .constants import SPAN_LINKS_KEY
2424
from .constants import SPAN_EVENTS_KEY
2525
from .constants import MAX_UINT_64BITS
26+
from .._trace._limits import MAX_SPAN_META_VALUE_LEN
27+
from .._trace._limits import TRUNCATED_SPAN_ATTRIBUTE_LEN
2628
from ..settings._agent import config as agent_config
2729

2830

@@ -94,6 +96,10 @@ cdef inline int array_prefix_size(stdint.uint32_t l):
9496
return 3
9597
return MSGPACK_ARRAY_LENGTH_PREFIX_SIZE
9698

99+
cdef inline object truncate_string(object string):
100+
if string and len(string) > MAX_SPAN_META_VALUE_LEN:
101+
return string[:TRUNCATED_SPAN_ATTRIBUTE_LEN - 14] + "<truncated>..."
102+
return string
97103

98104
cdef inline int pack_bytes(msgpack_packer *pk, char *bs, Py_ssize_t l):
99105
cdef int ret
@@ -135,31 +141,35 @@ cdef inline int pack_text(msgpack_packer *pk, object text) except? -1:
135141

136142
if PyBytesLike_Check(text):
137143
L = len(text)
138-
if L > ITEM_LIMIT:
144+
if L > MAX_SPAN_META_VALUE_LEN:
139145
PyErr_Format(ValueError, b"%.200s object is too large", Py_TYPE(text).tp_name)
146+
text = truncate_string(text)
147+
L = len(text)
140148
ret = msgpack_pack_raw(pk, L)
141149
if ret == 0:
142150
ret = msgpack_pack_raw_body(pk, <char *> text, L)
143151
return ret
144152

145153
if PyUnicode_Check(text):
154+
if len(text) > MAX_SPAN_META_VALUE_LEN:
155+
text = truncate_string(text)
146156
IF PY_MAJOR_VERSION >= 3:
147-
ret = msgpack_pack_unicode(pk, text, ITEM_LIMIT)
157+
ret = msgpack_pack_unicode(pk, text, MAX_SPAN_META_VALUE_LEN)
148158
if ret == -2:
149159
raise ValueError("unicode string is too large")
150160
ELSE:
151161
text = PyUnicode_AsEncodedString(text, "utf-8", NULL)
152162
L = len(text)
153-
if L > ITEM_LIMIT:
163+
if L > MAX_SPAN_META_VALUE_LEN:
154164
raise ValueError("unicode string is too large")
155165
ret = msgpack_pack_raw(pk, L)
156166
if ret == 0:
157167
ret = msgpack_pack_raw_body(pk, <char *> text, L)
168+
158169
return ret
159170

160171
raise TypeError("Unhandled text type: %r" % type(text))
161172

162-
163173
cdef class StringTable(object):
164174
cdef dict _table
165175
cdef stdint.uint32_t _next_id
@@ -226,7 +236,6 @@ cdef class ListStringTable(StringTable):
226236
cdef class MsgpackStringTable(StringTable):
227237
cdef msgpack_packer pk
228238
cdef int max_size
229-
cdef int _max_string_length
230239
cdef int _sp_len
231240
cdef stdint.uint32_t _sp_id
232241
cdef object _lock
@@ -238,7 +247,6 @@ cdef class MsgpackStringTable(StringTable):
238247
if self.pk.buf == NULL:
239248
raise MemoryError("Unable to allocate internal buffer.")
240249
self.max_size = max_size
241-
self._max_string_length = int(0.1*max_size)
242250
self.pk.length = MSGPACK_STRING_TABLE_LENGTH_PREFIX_SIZE
243251
self._sp_len = 0
244252
self._lock = threading.RLock()
@@ -254,15 +262,13 @@ cdef class MsgpackStringTable(StringTable):
254262
cdef insert(self, object string):
255263
cdef int ret
256264

257-
if len(string) > self._max_string_length:
258-
string = "<dropped string of length %d because it's too long (max allowed length %d)>" % (
259-
len(string), self._max_string_length
260-
)
265+
# Before inserting, truncate the string if it is greater than MAX_SPAN_META_VALUE_LEN
266+
string = truncate_string(string)
261267

262268
if self.pk.length + len(string) > self.max_size:
263269
raise ValueError(
264-
"Cannot insert '%s': string table is full (current size: %d, max size: %d)." % (
265-
string, self.pk.length, self.max_size
270+
"Cannot insert '%s': string table is full (current size: %d, size after insert: %d, max size: %d)." % (
271+
string, self.pk.length, (self.pk.length + len(string)), self.max_size
266272
)
267273
)
268274

@@ -1001,6 +1007,7 @@ cdef class MsgpackEncoderV05(MsgpackEncoderBase):
10011007
raise
10021008

10031009
cdef inline int _pack_string(self, object string) except? -1:
1010+
string = truncate_string(string)
10041011
return msgpack_pack_uint32(&self.pk, self._st._index(string))
10051012

10061013
cdef void * get_dd_origin_ref(self, str dd_origin):
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
fixes:
3+
- |
4+
tracing: Fixes an issue where span attributes were not truncated before encoding, leading to runtime error and causing spans to be dropped.
5+
Spans with resource name, tag key or value larger than 25000 characters will be truncated to 2500 characters.

tests/integration/test_integration.py

-26
Original file line numberDiff line numberDiff line change
@@ -158,32 +158,6 @@ def test_payload_too_large():
158158
log.error.assert_not_called()
159159

160160

161-
@skip_if_testagent
162-
@pytest.mark.subprocess(
163-
env=dict(
164-
DD_TRACE_API_VERSION="v0.5",
165-
DD_TRACE_WRITER_BUFFER_SIZE_BYTES=str(FOUR_KB),
166-
)
167-
)
168-
def test_resource_name_too_large():
169-
import pytest
170-
171-
from ddtrace.trace import tracer as t
172-
from tests.integration.test_integration import FOUR_KB
173-
174-
assert t._span_aggregator.writer._buffer_size == FOUR_KB
175-
s = t.trace("operation", service="foo")
176-
# Maximum string length is set to 10% of the maximum buffer size
177-
s.resource = "B" * int(0.1 * FOUR_KB + 1)
178-
try:
179-
s.finish()
180-
except ValueError:
181-
pytest.fail()
182-
encoded_spans, size = t._span_aggregator.writer._encoder.encode()
183-
assert size == 1
184-
assert b"<dropped string of length 410 because it's too long (max allowed length 409)>" in encoded_spans
185-
186-
187161
@parametrize_with_all_encodings
188162
def test_large_payload_is_sent_without_warning_logs():
189163
import mock

tests/integration/test_integration_snapshots.py

+10
Original file line numberDiff line numberDiff line change
@@ -280,3 +280,13 @@ def test_setting_span_tags_and_metrics_generates_no_error_logs():
280280
s.set_metric("number2", 12.0)
281281
s.set_metric("number3", "1")
282282
s.finish()
283+
284+
285+
@pytest.mark.parametrize("encoding", ["v0.4", "v0.5"])
286+
@pytest.mark.snapshot()
287+
def test_encode_span_with_large_string_attributes(encoding):
288+
from ddtrace import tracer
289+
290+
with override_global_config(dict(_trace_api=encoding)):
291+
with tracer.trace(name="a" * 25000, resource="b" * 25001) as span:
292+
span.set_tag(key="c" * 25001, value="d" * 2000)

tests/snapshots/tests.integration.test_integration_snapshots.test_encode_span_with_large_string_attributes[v0.4].json

+26
Large diffs are not rendered by default.

tests/snapshots/tests.integration.test_integration_snapshots.test_encode_span_with_large_string_attributes[v0.5].json

+26
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)