Skip to content

Commit 9639d8e

Browse files
committed
add the "provisional cache"
1 parent a1a8a7b commit 9639d8e

File tree

1 file changed

+192
-4
lines changed

1 file changed

+192
-4
lines changed

src/librustc/traits/select.rs

Lines changed: 192 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ use crate::hir;
4343
use rustc_data_structures::bit_set::GrowableBitSet;
4444
use rustc_data_structures::sync::Lock;
4545
use rustc_target::spec::abi::Abi;
46-
use std::cell::Cell;
46+
use std::cell::{Cell, RefCell};
4747
use std::cmp;
4848
use std::fmt::{self, Display};
4949
use std::iter;
@@ -191,7 +191,6 @@ struct TraitObligationStack<'prev, 'tcx: 'prev> {
191191

192192
/// Depth-first number of this node in the search graph -- a
193193
/// pre-order index. Basically a freshly incremented counter.
194-
#[allow(dead_code)] // TODO
195194
dfn: usize,
196195
}
197196

@@ -880,6 +879,12 @@ impl<'cx, 'gcx, 'tcx> SelectionContext<'cx, 'gcx, 'tcx> {
880879
return Ok(result);
881880
}
882881

882+
if let Some(result) = stack.cache().get_provisional(fresh_trait_ref) {
883+
debug!("PROVISIONAL CACHE HIT: EVAL({:?})={:?}", fresh_trait_ref, result);
884+
stack.update_reached_depth(stack.cache().current_reached_depth());
885+
return Ok(result);
886+
}
887+
883888
// Check if this is a match for something already on the
884889
// stack. If so, we don't want to insert the result into the
885890
// main cache (it is cycle dependent) nor the provisional
@@ -892,20 +897,42 @@ impl<'cx, 'gcx, 'tcx> SelectionContext<'cx, 'gcx, 'tcx> {
892897
let (result, dep_node) = self.in_task(|this| this.evaluate_stack(&stack));
893898
let result = result?;
894899

900+
if !result.must_apply_modulo_regions() {
901+
stack.cache().on_failure(stack.dfn);
902+
}
903+
895904
let reached_depth = stack.reached_depth.get();
896905
if reached_depth >= stack.depth {
897906
debug!("CACHE MISS: EVAL({:?})={:?}", fresh_trait_ref, result);
898907
self.insert_evaluation_cache(obligation.param_env, fresh_trait_ref, dep_node, result);
908+
909+
stack.cache().on_completion(stack.depth, |fresh_trait_ref, provisional_result| {
910+
self.insert_evaluation_cache(
911+
obligation.param_env,
912+
fresh_trait_ref,
913+
dep_node,
914+
provisional_result.max(result),
915+
);
916+
});
899917
} else {
918+
debug!("PROVISIONAL: {:?}={:?}", fresh_trait_ref, result);
900919
debug!(
901-
"evaluate_trait_predicate_recursively: skipping cache because {:?} \
920+
"evaluate_trait_predicate_recursively: caching provisionally because {:?} \
902921
is a cycle participant (at depth {}, reached depth {})",
903922
fresh_trait_ref,
904923
stack.depth,
905924
reached_depth,
906925
);
926+
927+
stack.cache().insert_provisional(
928+
stack.dfn,
929+
reached_depth,
930+
fresh_trait_ref,
931+
result,
932+
);
907933
}
908934

935+
909936
Ok(result)
910937
}
911938

@@ -4004,18 +4031,179 @@ impl<'o, 'tcx> TraitObligationStack<'o, 'tcx> {
40044031
}
40054032
}
40064033

4034+
/// The "provisional evaluation cache" is used to store intermediate cache results
4035+
/// when solving auto traits. Auto traits are unusual in that they can support
4036+
/// cycles. So, for example, a "proof tree" like this would be ok:
4037+
///
4038+
/// - `Foo<T>: Send` :-
4039+
/// - `Bar<T>: Send` :-
4040+
/// - `Foo<T>: Send` -- cycle, but ok
4041+
/// - `Baz<T>: Send`
4042+
///
4043+
/// Here, to prove `Foo<T>: Send`, we have to prove `Bar<T>: Send` and
4044+
/// `Baz<T>: Send`. Proving `Bar<T>: Send` in turn required `Foo<T>: Send`.
4045+
/// For non-auto traits, this cycle would be an error, but for auto traits (because
4046+
/// they are coinductive) it is considered ok.
4047+
///
4048+
/// However, there is a complication: at the point where we have
4049+
/// "proven" `Bar<T>: Send`, we have in fact only proven it
4050+
/// *provisionally*. In particular, we proved that `Bar<T>: Send`
4051+
/// *under the assumption* that `Foo<T>: Send`. But what if we later
4052+
/// find out this assumption is wrong? Specifically, we could
4053+
/// encounter some kind of error proving `Baz<T>: Send`. In that case,
4054+
/// `Bar<T>: Send` didn't turn out to be true.
4055+
///
4056+
/// In Issue #60010, we found a bug in rustc where it would cache
4057+
/// these intermediate results. This was fixed in #60444 by disabling
4058+
/// *all* caching for things involved in a cycle -- in our example,
4059+
/// that would mean we don't cache that `Bar<T>: Send`. But this led
4060+
/// to large slowdowns.
4061+
///
4062+
/// Specifically, imagine this scenario, where proving `Baz<T>: Send`
4063+
/// first requires proving `Bar<T>: Send` (which is true:
4064+
///
4065+
/// - `Foo<T>: Send` :-
4066+
/// - `Bar<T>: Send` :-
4067+
/// - `Foo<T>: Send` -- cycle, but ok
4068+
/// - `Baz<T>: Send`
4069+
/// - `Bar<T>: Send` -- would be nice for this to be a cache hit!
4070+
/// - `*const T: Send` -- but what if we later encounter an error?
4071+
///
4072+
/// The *provisional evaluation cache* resolves this issue. It stores
4073+
/// cache results that we've proven but which were involved in a cycle
4074+
/// in some way. We track the minimal stack depth (i.e., the
4075+
/// farthest from the top of the stack) that we are dependent on.
4076+
/// The idea is that the cache results within are all valid -- so long as
4077+
/// none of the nodes in between the current node and the node at that minimum
4078+
/// depth result in an error (in which case the cached results are just thrown away).
4079+
///
4080+
/// During evaluation, we consult this provisional cache and rely on
4081+
/// it. Accessing a cached value is considered equivalent to accessing
4082+
/// a result at `reached_depth`, so it marks the *current* solution as
4083+
/// provisional as well. If an error is encountered, we toss out any
4084+
/// provisional results added from the subtree that encountered the
4085+
/// error. When we pop the node at `reached_depth` from the stack, we
4086+
/// can commit all the things that remain in the provisional cache.
40074087
#[derive(Default)]
40084088
struct ProvisionalEvaluationCache<'tcx> {
4089+
/// next "depth first number" to issue -- just a counter
40094090
dfn: Cell<usize>,
4010-
_dummy: Vec<&'tcx ()>,
4091+
4092+
/// Stores the "coldest" depth (bottom of stack) reached by any of
4093+
/// the evaluation entries. The idea here is that all things in the provisional
4094+
/// cache are always dependent on *something* that is colder in the stack:
4095+
/// therefore, if we add a new entry that is dependent on something *colder still*,
4096+
/// we have to modify the depth for all entries at once.
4097+
///
4098+
/// Example:
4099+
///
4100+
/// Imagine we have a stack `A B C D E` (with `E` being the top of
4101+
/// the stack). We cache something with depth 2, which means that
4102+
/// it was dependent on C. Then we pop E but go on and process a
4103+
/// new node F: A B C D F. Now F adds something to the cache with
4104+
/// depth 1, meaning it is dependent on B. Our original cache
4105+
/// entry is also dependent on B, because there is a path from E
4106+
/// to C and then from C to F and from F to B.
4107+
reached_depth: Cell<usize>,
4108+
4109+
/// Map from cache key to the provisionally evaluated thing.
4110+
/// The cache entries contain the result but also the DFN in which they
4111+
/// were added. The DFN is used to clear out values on failure.
4112+
///
4113+
/// Imagine we have a stack like:
4114+
///
4115+
/// - `A B C` and we add a cache for the result of C (DFN 2)
4116+
/// - Then we have a stack `A B D` where `D` has DFN 3
4117+
/// - We try to solve D by evaluating E: `A B D E` (DFN 4)
4118+
/// - `E` generates various cache entries which have cyclic dependices on `B`
4119+
/// - `A B D E F` and so forth
4120+
/// - the DFN of `F` for example would be 5
4121+
/// - then we determine that `E` is in error -- we will then clear
4122+
/// all cache values whose DFN is >= 4 -- in this case, that
4123+
/// means the cached value for `F`.
4124+
map: RefCell<FxHashMap<ty::PolyTraitRef<'tcx>, ProvisionalEvaluation>>,
4125+
}
4126+
4127+
/// A cache value for the provisional cache: contains the depth-first
4128+
/// number (DFN) and result.
4129+
#[derive(Copy, Clone)]
4130+
struct ProvisionalEvaluation {
4131+
from_dfn: usize,
4132+
result: EvaluationResult,
40114133
}
40124134

40134135
impl<'tcx> ProvisionalEvaluationCache<'tcx> {
4136+
/// Get the next DFN in sequence (basically a counter).
40144137
fn next_dfn(&self) -> usize {
40154138
let result = self.dfn.get();
40164139
self.dfn.set(result + 1);
40174140
result
40184141
}
4142+
4143+
/// Check the provisional cache for any result for
4144+
/// `fresh_trait_ref`. If there is a hit, then you must consider
4145+
/// it an access to the stack slots at depth
4146+
/// `self.current_reached_depth()` and above.
4147+
fn get_provisional(&self, fresh_trait_ref: ty::PolyTraitRef<'tcx>) -> Option<EvaluationResult> {
4148+
Some(self.map.borrow().get(&fresh_trait_ref)?.result)
4149+
}
4150+
4151+
/// Current value of the `reached_depth` counter -- all the
4152+
/// provisional cache entries are dependent on the item at this
4153+
/// depth.
4154+
fn current_reached_depth(&self) -> usize {
4155+
self.reached_depth.get()
4156+
}
4157+
4158+
/// Insert a provisional result into the cache. The result came
4159+
/// from the node with the given DFN. It accessed a minimum depth
4160+
/// of `reached_depth` to compute. It evaluated `fresh_trait_ref`
4161+
/// and resulted in `result`.
4162+
fn insert_provisional(
4163+
&self,
4164+
from_dfn: usize,
4165+
reached_depth: usize,
4166+
fresh_trait_ref: ty::PolyTraitRef<'tcx>,
4167+
result: EvaluationResult,
4168+
) {
4169+
let r_d = self.reached_depth.get();
4170+
self.reached_depth.set(r_d.min(reached_depth));
4171+
4172+
self.map.borrow_mut().insert(fresh_trait_ref, ProvisionalEvaluation { from_dfn, result });
4173+
}
4174+
4175+
/// Invoked when the node with dfn `dfn` does not get a successful
4176+
/// result. This will clear out any provisional cache entries
4177+
/// that were added since `dfn` was created. This is because the
4178+
/// provisional entries are things which must assume that the
4179+
/// things on the stack at the time of their creation succeeded --
4180+
/// since the failing node is presently at the top of the stack,
4181+
/// these provisional entries must either depend on it or some
4182+
/// ancestor of it.
4183+
fn on_failure(&self, dfn: usize) {
4184+
self.map.borrow_mut().retain(|_key, eval| eval.from_dfn >= dfn)
4185+
}
4186+
4187+
/// Invoked when the node at depth `depth` completed without
4188+
/// depending on anything higher in the stack (if that completion
4189+
/// was a failure, then `on_failure` should have been invoked
4190+
/// already). The callback `op` will be invoked for each
4191+
/// provisional entry that we can now confirm.
4192+
fn on_completion(
4193+
&self,
4194+
depth: usize,
4195+
mut op: impl FnMut(ty::PolyTraitRef<'tcx>, EvaluationResult),
4196+
) {
4197+
if self.reached_depth.get() < depth {
4198+
return;
4199+
}
4200+
4201+
for (fresh_trait_ref, eval) in self.map.borrow_mut().drain() {
4202+
op(fresh_trait_ref, eval.result);
4203+
}
4204+
4205+
self.reached_depth.set(depth);
4206+
}
40194207
}
40204208

40214209
#[derive(Copy, Clone)]

0 commit comments

Comments
 (0)