fix: Change thread local context to allow overlapped scopes

bantonsson · bantonsson · commit aacf87d288c5 · 2025-03-04T09:10:24.000+01:00
diff --git a/opentelemetry/CHANGELOG.md b/opentelemetry/CHANGELOG.md
@@ -10,6 +10,7 @@
 - *Breaking* Changed value type of `Baggage` from `Value` to `StringValue`
 - Updated `Baggage` constants to reflect latest standard (`MAX_KEY_VALUE_PAIRS` - 180 -> 64, `MAX_BYTES_FOR_ONE_PAIR` - removed) and increased insert performance see #[2284](https://github.com/open-telemetry/opentelemetry-rust/pull/2284).
 - *Breaking* Align `Baggage.remove()` signature with `.get()` to take the key as a reference
+- Changed `Context` to use a stack to properly handle out of order dropping of `ContextGuard`. This imposes a limit of `65535` nested contexts on a single thread. See #[2378](https://github.com/open-telemetry/opentelemetry-rust/pull/2284) and #[1887](https://github.com/open-telemetry/opentelemetry-rust/issues/1887).
 
 ## 0.28.0
 
diff --git a/opentelemetry/src/context.rs b/opentelemetry/src/context.rs
@@ -1,3 +1,4 @@
+use crate::otel_warn;
 #[cfg(feature = "trace")]
 use crate::trace::context::SynchronizedSpan;
 use std::any::{Any, TypeId};
@@ -9,7 +10,7 @@ use std::marker::PhantomData;
 use std::sync::Arc;
 
 thread_local! {
-    static CURRENT_CONTEXT: RefCell<Context> = RefCell::new(Context::default());
+    static CURRENT_CONTEXT: RefCell<ContextStack> = RefCell::new(ContextStack::default());
 }
 
 /// An execution-scoped collection of values.
@@ -122,7 +123,7 @@ impl Context {
     /// Note: This function will panic if you attempt to attach another context
     /// while the current one is still borrowed.
     pub fn map_current<T>(f: impl FnOnce(&Context) -> T) -> T {
-        CURRENT_CONTEXT.with(|cx| f(&cx.borrow()))
+        CURRENT_CONTEXT.with(|cx| cx.borrow().map_current_cx(f))
     }
 
     /// Returns a clone of the current thread's context with the given value.
@@ -298,12 +299,10 @@ impl Context {
     /// assert_eq!(Context::current().get::<ValueA>(), None);
     /// ```
     pub fn attach(self) -> ContextGuard {
-        let previous_cx = CURRENT_CONTEXT
-            .try_with(|current| current.replace(self))
-            .ok();
+        let cx_id = CURRENT_CONTEXT.with(|cx| cx.borrow_mut().push(self));
 
         ContextGuard {
-            previous_cx,
+            cx_pos: cx_id,
             _marker: PhantomData,
         }
     }
@@ -344,17 +343,19 @@ impl fmt::Debug for Context {
 }
 
 /// A guard that resets the current context to the prior context when dropped.
-#[allow(missing_debug_implementations)]
+#[derive(Debug)]
 pub struct ContextGuard {
-    previous_cx: Option<Context>,
-    // ensure this type is !Send as it relies on thread locals
+    // The position of the context in the stack. This is used to pop the context.
+    cx_pos: u16,
+    // Ensure this type is !Send as it relies on thread locals
     _marker: PhantomData<*const ()>,
 }
 
 impl Drop for ContextGuard {
     fn drop(&mut self) {
-        if let Some(previous_cx) = self.previous_cx.take() {
-            let _ = CURRENT_CONTEXT.try_with(|current| current.replace(previous_cx));
+        let id = self.cx_pos;
+        if id > ContextStack::BASE_POS && id < ContextStack::MAX_POS {
+            CURRENT_CONTEXT.with(|context_stack| context_stack.borrow_mut().pop_id(id));
         }
     }
 }
@@ -381,17 +382,116 @@ impl Hasher for IdHasher {
     }
 }
 
+/// A stack for keeping track of the [`Context`] instances that have been attached
+/// to a thread.
+///
+/// The stack allows for popping of contexts by position, which is used to do out
+/// of order dropping of [`ContextGuard`] instances. Only when the top of the
+/// stack is popped, the topmost [`Context`] is actually restored.
+///
+/// The stack relies on the fact that it is thread local and that the
+/// [`ContextGuard`] instances that are constructed using ids from it can't be
+/// moved to other threads. That means that the ids are always valid and that
+/// they are always within the bounds of the stack.
+struct ContextStack {
+    /// This is the current [`Context`] that is active on this thread, and the top
+    /// of the [`ContextStack`]. It is always present, and if the `stack` is empty
+    /// it's an empty [`Context`].
+    ///
+    /// Having this here allows for fast access to the current [`Context`].
+    current_cx: Context,
+    /// A `stack` of the other contexts that have been attached to the thread.
+    stack: Vec<Option<Context>>,
+    /// Ensure this type is !Send as it relies on thread locals
+    _marker: PhantomData<*const ()>,
+}
+
+impl ContextStack {
+    const BASE_POS: u16 = 0;
+    const MAX_POS: u16 = u16::MAX;
+    const INITIAL_CAPACITY: usize = 8;
+
+    #[inline(always)]
+    fn push(&mut self, cx: Context) -> u16 {
+        // The next id is the length of the `stack`, plus one since we have the
+        // top of the [`ContextStack`] as the `current_cx`.
+        let next_id = self.stack.len() + 1;
+        if next_id < ContextStack::MAX_POS.into() {
+            let current_cx = std::mem::replace(&mut self.current_cx, cx);
+            self.stack.push(Some(current_cx));
+            next_id as u16
+        } else {
+            // This is an overflow, log it and ignore it.
+            otel_warn!(
+                name: "Context.AttachFailed",
+                message = format!("Too many contexts. Max limit is {}. \
+                  Context::current() remains unchanged as this attach failed. \
+                  Dropping the returned ContextGuard will have no impact on Context::current().",
+                  ContextStack::MAX_POS)
+            );
+            ContextStack::MAX_POS
+        }
+    }
+
+    #[inline(always)]
+    fn pop_id(&mut self, pos: u16) {
+        if pos == ContextStack::BASE_POS || pos == ContextStack::MAX_POS {
+            // The empty context is always at the bottom of the [`ContextStack`]
+            // and cannot be popped, and the overflow position is invalid, so do
+            // nothing.
+            return;
+        }
+        let len: u16 = self.stack.len() as u16;
+        // Are we at the top of the [`ContextStack`]?
+        if pos == len {
+            // Shrink the stack if possible to clear out any out of order pops.
+            while let Some(None) = self.stack.last() {
+                _ = self.stack.pop();
+            }
+            // Restore the previous context. This will always happen since the
+            // empty context is always at the bottom of the stack if the
+            // [`ContextStack`] is not empty.
+            if let Some(Some(next_cx)) = self.stack.pop() {
+                self.current_cx = next_cx;
+            }
+        } else {
+            // This is an out of order pop.
+            if pos >= len {
+                // This is an invalid id, ignore it.
+                return;
+            }
+            // Clear out the entry at the given id.
+            _ = self.stack[pos as usize].take();
+        }
+    }
+
+    #[inline(always)]
+    fn map_current_cx<T>(&self, f: impl FnOnce(&Context) -> T) -> T {
+        f(&self.current_cx)
+    }
+}
+
+impl Default for ContextStack {
+    fn default() -> Self {
+        ContextStack {
+            current_cx: Context::default(),
+            stack: Vec::with_capacity(ContextStack::INITIAL_CAPACITY),
+            _marker: PhantomData,
+        }
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
 
+    #[derive(Debug, PartialEq)]
+    struct ValueA(u64);
+    #[derive(Debug, PartialEq)]
+    struct ValueB(u64);
+
     #[test]
     fn context_immutable() {
-        #[derive(Debug, PartialEq)]
-        struct ValueA(u64);
-        #[derive(Debug, PartialEq)]
-        struct ValueB(u64);
-
         // start with Current, which should be an empty context
         let cx = Context::current();
         assert_eq!(cx.get::<ValueA>(), None);
@@ -424,66 +524,56 @@ mod tests {
 
     #[test]
     fn nested_contexts() {
-        #[derive(Debug, PartialEq)]
-        struct ValueA(&'static str);
-        #[derive(Debug, PartialEq)]
-        struct ValueB(u64);
-        let _outer_guard = Context::new().with_value(ValueA("a")).attach();
+        let _outer_guard = Context::new().with_value(ValueA(1)).attach();
 
         // Only value `a` is set
         let current = Context::current();
-        assert_eq!(current.get(), Some(&ValueA("a")));
+        assert_eq!(current.get(), Some(&ValueA(1)));
         assert_eq!(current.get::<ValueB>(), None);
 
         {
             let _inner_guard = Context::current_with_value(ValueB(42)).attach();
             // Both values are set in inner context
             let current = Context::current();
-            assert_eq!(current.get(), Some(&ValueA("a")));
+            assert_eq!(current.get(), Some(&ValueA(1)));
             assert_eq!(current.get(), Some(&ValueB(42)));
 
             assert!(Context::map_current(|cx| {
-                assert_eq!(cx.get(), Some(&ValueA("a")));
+                assert_eq!(cx.get(), Some(&ValueA(1)));
                 assert_eq!(cx.get(), Some(&ValueB(42)));
                 true
             }));
         }
 
         // Resets to only value `a` when inner guard is dropped
         let current = Context::current();
-        assert_eq!(current.get(), Some(&ValueA("a")));
+        assert_eq!(current.get(), Some(&ValueA(1)));
         assert_eq!(current.get::<ValueB>(), None);
 
         assert!(Context::map_current(|cx| {
-            assert_eq!(cx.get(), Some(&ValueA("a")));
+            assert_eq!(cx.get(), Some(&ValueA(1)));
             assert_eq!(cx.get::<ValueB>(), None);
             true
         }));
     }
 
     #[test]
-    #[ignore = "overlapping contexts are not supported yet"]
     fn overlapping_contexts() {
-        #[derive(Debug, PartialEq)]
-        struct ValueA(&'static str);
-        #[derive(Debug, PartialEq)]
-        struct ValueB(u64);
-
-        let outer_guard = Context::new().with_value(ValueA("a")).attach();
+        let outer_guard = Context::new().with_value(ValueA(1)).attach();
 
         // Only value `a` is set
         let current = Context::current();
-        assert_eq!(current.get(), Some(&ValueA("a")));
+        assert_eq!(current.get(), Some(&ValueA(1)));
         assert_eq!(current.get::<ValueB>(), None);
 
         let inner_guard = Context::current_with_value(ValueB(42)).attach();
         // Both values are set in inner context
         let current = Context::current();
-        assert_eq!(current.get(), Some(&ValueA("a")));
+        assert_eq!(current.get(), Some(&ValueA(1)));
         assert_eq!(current.get(), Some(&ValueB(42)));
 
         assert!(Context::map_current(|cx| {
-            assert_eq!(cx.get(), Some(&ValueA("a")));
+            assert_eq!(cx.get(), Some(&ValueA(1)));
             assert_eq!(cx.get(), Some(&ValueB(42)));
             true
         }));
@@ -492,7 +582,7 @@ mod tests {
 
         // `inner_guard` is still alive so both `ValueA` and `ValueB` should still be accessible
         let current = Context::current();
-        assert_eq!(current.get(), Some(&ValueA("a")));
+        assert_eq!(current.get(), Some(&ValueA(1)));
         assert_eq!(current.get(), Some(&ValueB(42)));
 
         drop(inner_guard);
@@ -502,4 +592,60 @@ mod tests {
         assert_eq!(current.get::<ValueA>(), None);
         assert_eq!(current.get::<ValueB>(), None);
     }
+
+    #[test]
+    fn too_many_contexts() {
+        let mut guards: Vec<ContextGuard> = Vec::with_capacity(ContextStack::MAX_POS as usize);
+        let stack_max_pos = ContextStack::MAX_POS as u64;
+        // Fill the stack up until the last position
+        for i in 1..stack_max_pos {
+            let cx_guard = Context::current().with_value(ValueB(i)).attach();
+            assert_eq!(Context::current().get(), Some(&ValueB(i)));
+            assert_eq!(cx_guard.cx_pos, i as u16);
+            guards.push(cx_guard);
+        }
+        // Let's overflow the stack a couple of times
+        for _ in 0..16 {
+            let cx_guard = Context::current().with_value(ValueA(1)).attach();
+            assert_eq!(cx_guard.cx_pos, ContextStack::MAX_POS);
+            assert_eq!(Context::current().get::<ValueA>(), None);
+            assert_eq!(Context::current().get(), Some(&ValueB(stack_max_pos - 1)));
+            guards.push(cx_guard);
+        }
+        // Drop the overflow contexts
+        for _ in 0..16 {
+            guards.pop();
+            assert_eq!(Context::current().get::<ValueA>(), None);
+            assert_eq!(Context::current().get(), Some(&ValueB(stack_max_pos - 1)));
+        }
+        // Drop one more so we can add a new one
+        guards.pop();
+        assert_eq!(Context::current().get::<ValueA>(), None);
+        assert_eq!(Context::current().get(), Some(&ValueB(stack_max_pos - 2)));
+        // Push a new context and see that it works
+        let cx_guard = Context::current().with_value(ValueA(2)).attach();
+        assert_eq!(cx_guard.cx_pos, ContextStack::MAX_POS - 1);
+        assert_eq!(Context::current().get(), Some(&ValueA(2)));
+        assert_eq!(Context::current().get(), Some(&ValueB(stack_max_pos - 2)));
+        guards.push(cx_guard);
+        // Let's overflow the stack a couple of times again
+        for _ in 0..16 {
+            let cx_guard = Context::current().with_value(ValueA(1)).attach();
+            assert_eq!(cx_guard.cx_pos, ContextStack::MAX_POS);
+            assert_eq!(Context::current().get(), Some(&ValueA(2)));
+            assert_eq!(Context::current().get(), Some(&ValueB(stack_max_pos - 2)));
+            guards.push(cx_guard);
+        }
+    }
+
+    #[test]
+    fn context_stack_pop_id() {
+        // This is to get full line coverage of the `pop_id` function.
+        // In real life the `Drop`` implementation of `ContextGuard` ensures that
+        // the ids are valid and inside the bounds.
+        let mut stack = ContextStack::default();
+        stack.pop_id(ContextStack::BASE_POS);
+        stack.pop_id(ContextStack::MAX_POS);
+        stack.pop_id(4711);
+    }
 }