@@ -43,7 +43,7 @@ use crate::hir;
43
43
use rustc_data_structures:: bit_set:: GrowableBitSet ;
44
44
use rustc_data_structures:: sync:: Lock ;
45
45
use rustc_target:: spec:: abi:: Abi ;
46
- use std:: cell:: Cell ;
46
+ use std:: cell:: { Cell , RefCell } ;
47
47
use std:: cmp;
48
48
use std:: fmt:: { self , Display } ;
49
49
use std:: iter;
@@ -191,7 +191,6 @@ struct TraitObligationStack<'prev, 'tcx: 'prev> {
191
191
192
192
/// Depth-first number of this node in the search graph -- a
193
193
/// pre-order index. Basically a freshly incremented counter.
194
- #[ allow( dead_code) ] // TODO
195
194
dfn : usize ,
196
195
}
197
196
@@ -880,6 +879,12 @@ impl<'cx, 'gcx, 'tcx> SelectionContext<'cx, 'gcx, 'tcx> {
880
879
return Ok ( result) ;
881
880
}
882
881
882
+ if let Some ( result) = stack. cache ( ) . get_provisional ( fresh_trait_ref) {
883
+ debug ! ( "PROVISIONAL CACHE HIT: EVAL({:?})={:?}" , fresh_trait_ref, result) ;
884
+ stack. update_reached_depth ( stack. cache ( ) . current_reached_depth ( ) ) ;
885
+ return Ok ( result) ;
886
+ }
887
+
883
888
// Check if this is a match for something already on the
884
889
// stack. If so, we don't want to insert the result into the
885
890
// main cache (it is cycle dependent) nor the provisional
@@ -892,20 +897,42 @@ impl<'cx, 'gcx, 'tcx> SelectionContext<'cx, 'gcx, 'tcx> {
892
897
let ( result, dep_node) = self . in_task ( |this| this. evaluate_stack ( & stack) ) ;
893
898
let result = result?;
894
899
900
+ if !result. must_apply_modulo_regions ( ) {
901
+ stack. cache ( ) . on_failure ( stack. dfn ) ;
902
+ }
903
+
895
904
let reached_depth = stack. reached_depth . get ( ) ;
896
905
if reached_depth >= stack. depth {
897
906
debug ! ( "CACHE MISS: EVAL({:?})={:?}" , fresh_trait_ref, result) ;
898
907
self . insert_evaluation_cache ( obligation. param_env , fresh_trait_ref, dep_node, result) ;
908
+
909
+ stack. cache ( ) . on_completion ( stack. depth , |fresh_trait_ref, provisional_result| {
910
+ self . insert_evaluation_cache (
911
+ obligation. param_env ,
912
+ fresh_trait_ref,
913
+ dep_node,
914
+ provisional_result. max ( result) ,
915
+ ) ;
916
+ } ) ;
899
917
} else {
918
+ debug ! ( "PROVISIONAL: {:?}={:?}" , fresh_trait_ref, result) ;
900
919
debug ! (
901
- "evaluate_trait_predicate_recursively: skipping cache because {:?} \
920
+ "evaluate_trait_predicate_recursively: caching provisionally because {:?} \
902
921
is a cycle participant (at depth {}, reached depth {})",
903
922
fresh_trait_ref,
904
923
stack. depth,
905
924
reached_depth,
906
925
) ;
926
+
927
+ stack. cache ( ) . insert_provisional (
928
+ stack. dfn ,
929
+ reached_depth,
930
+ fresh_trait_ref,
931
+ result,
932
+ ) ;
907
933
}
908
934
935
+
909
936
Ok ( result)
910
937
}
911
938
@@ -4004,18 +4031,179 @@ impl<'o, 'tcx> TraitObligationStack<'o, 'tcx> {
4004
4031
}
4005
4032
}
4006
4033
4034
+ /// The "provisional evaluation cache" is used to store intermediate cache results
4035
+ /// when solving auto traits. Auto traits are unusual in that they can support
4036
+ /// cycles. So, for example, a "proof tree" like this would be ok:
4037
+ ///
4038
+ /// - `Foo<T>: Send` :-
4039
+ /// - `Bar<T>: Send` :-
4040
+ /// - `Foo<T>: Send` -- cycle, but ok
4041
+ /// - `Baz<T>: Send`
4042
+ ///
4043
+ /// Here, to prove `Foo<T>: Send`, we have to prove `Bar<T>: Send` and
4044
+ /// `Baz<T>: Send`. Proving `Bar<T>: Send` in turn required `Foo<T>: Send`.
4045
+ /// For non-auto traits, this cycle would be an error, but for auto traits (because
4046
+ /// they are coinductive) it is considered ok.
4047
+ ///
4048
+ /// However, there is a complication: at the point where we have
4049
+ /// "proven" `Bar<T>: Send`, we have in fact only proven it
4050
+ /// *provisionally*. In particular, we proved that `Bar<T>: Send`
4051
+ /// *under the assumption* that `Foo<T>: Send`. But what if we later
4052
+ /// find out this assumption is wrong? Specifically, we could
4053
+ /// encounter some kind of error proving `Baz<T>: Send`. In that case,
4054
+ /// `Bar<T>: Send` didn't turn out to be true.
4055
+ ///
4056
+ /// In Issue #60010, we found a bug in rustc where it would cache
4057
+ /// these intermediate results. This was fixed in #60444 by disabling
4058
+ /// *all* caching for things involved in a cycle -- in our example,
4059
+ /// that would mean we don't cache that `Bar<T>: Send`. But this led
4060
+ /// to large slowdowns.
4061
+ ///
4062
+ /// Specifically, imagine this scenario, where proving `Baz<T>: Send`
4063
+ /// first requires proving `Bar<T>: Send` (which is true:
4064
+ ///
4065
+ /// - `Foo<T>: Send` :-
4066
+ /// - `Bar<T>: Send` :-
4067
+ /// - `Foo<T>: Send` -- cycle, but ok
4068
+ /// - `Baz<T>: Send`
4069
+ /// - `Bar<T>: Send` -- would be nice for this to be a cache hit!
4070
+ /// - `*const T: Send` -- but what if we later encounter an error?
4071
+ ///
4072
+ /// The *provisional evaluation cache* resolves this issue. It stores
4073
+ /// cache results that we've proven but which were involved in a cycle
4074
+ /// in some way. We track the minimal stack depth (i.e., the
4075
+ /// farthest from the top of the stack) that we are dependent on.
4076
+ /// The idea is that the cache results within are all valid -- so long as
4077
+ /// none of the nodes in between the current node and the node at that minimum
4078
+ /// depth result in an error (in which case the cached results are just thrown away).
4079
+ ///
4080
+ /// During evaluation, we consult this provisional cache and rely on
4081
+ /// it. Accessing a cached value is considered equivalent to accessing
4082
+ /// a result at `reached_depth`, so it marks the *current* solution as
4083
+ /// provisional as well. If an error is encountered, we toss out any
4084
+ /// provisional results added from the subtree that encountered the
4085
+ /// error. When we pop the node at `reached_depth` from the stack, we
4086
+ /// can commit all the things that remain in the provisional cache.
4007
4087
#[ derive( Default ) ]
4008
4088
struct ProvisionalEvaluationCache < ' tcx > {
4089
+ /// next "depth first number" to issue -- just a counter
4009
4090
dfn : Cell < usize > ,
4010
- _dummy : Vec < & ' tcx ( ) > ,
4091
+
4092
+ /// Stores the "coldest" depth (bottom of stack) reached by any of
4093
+ /// the evaluation entries. The idea here is that all things in the provisional
4094
+ /// cache are always dependent on *something* that is colder in the stack:
4095
+ /// therefore, if we add a new entry that is dependent on something *colder still*,
4096
+ /// we have to modify the depth for all entries at once.
4097
+ ///
4098
+ /// Example:
4099
+ ///
4100
+ /// Imagine we have a stack `A B C D E` (with `E` being the top of
4101
+ /// the stack). We cache something with depth 2, which means that
4102
+ /// it was dependent on C. Then we pop E but go on and process a
4103
+ /// new node F: A B C D F. Now F adds something to the cache with
4104
+ /// depth 1, meaning it is dependent on B. Our original cache
4105
+ /// entry is also dependent on B, because there is a path from E
4106
+ /// to C and then from C to F and from F to B.
4107
+ reached_depth : Cell < usize > ,
4108
+
4109
+ /// Map from cache key to the provisionally evaluated thing.
4110
+ /// The cache entries contain the result but also the DFN in which they
4111
+ /// were added. The DFN is used to clear out values on failure.
4112
+ ///
4113
+ /// Imagine we have a stack like:
4114
+ ///
4115
+ /// - `A B C` and we add a cache for the result of C (DFN 2)
4116
+ /// - Then we have a stack `A B D` where `D` has DFN 3
4117
+ /// - We try to solve D by evaluating E: `A B D E` (DFN 4)
4118
+ /// - `E` generates various cache entries which have cyclic dependices on `B`
4119
+ /// - `A B D E F` and so forth
4120
+ /// - the DFN of `F` for example would be 5
4121
+ /// - then we determine that `E` is in error -- we will then clear
4122
+ /// all cache values whose DFN is >= 4 -- in this case, that
4123
+ /// means the cached value for `F`.
4124
+ map : RefCell < FxHashMap < ty:: PolyTraitRef < ' tcx > , ProvisionalEvaluation > > ,
4125
+ }
4126
+
4127
+ /// A cache value for the provisional cache: contains the depth-first
4128
+ /// number (DFN) and result.
4129
+ #[ derive( Copy , Clone ) ]
4130
+ struct ProvisionalEvaluation {
4131
+ from_dfn : usize ,
4132
+ result : EvaluationResult ,
4011
4133
}
4012
4134
4013
4135
impl < ' tcx > ProvisionalEvaluationCache < ' tcx > {
4136
+ /// Get the next DFN in sequence (basically a counter).
4014
4137
fn next_dfn ( & self ) -> usize {
4015
4138
let result = self . dfn . get ( ) ;
4016
4139
self . dfn . set ( result + 1 ) ;
4017
4140
result
4018
4141
}
4142
+
4143
+ /// Check the provisional cache for any result for
4144
+ /// `fresh_trait_ref`. If there is a hit, then you must consider
4145
+ /// it an access to the stack slots at depth
4146
+ /// `self.current_reached_depth()` and above.
4147
+ fn get_provisional ( & self , fresh_trait_ref : ty:: PolyTraitRef < ' tcx > ) -> Option < EvaluationResult > {
4148
+ Some ( self . map . borrow ( ) . get ( & fresh_trait_ref) ?. result )
4149
+ }
4150
+
4151
+ /// Current value of the `reached_depth` counter -- all the
4152
+ /// provisional cache entries are dependent on the item at this
4153
+ /// depth.
4154
+ fn current_reached_depth ( & self ) -> usize {
4155
+ self . reached_depth . get ( )
4156
+ }
4157
+
4158
+ /// Insert a provisional result into the cache. The result came
4159
+ /// from the node with the given DFN. It accessed a minimum depth
4160
+ /// of `reached_depth` to compute. It evaluated `fresh_trait_ref`
4161
+ /// and resulted in `result`.
4162
+ fn insert_provisional (
4163
+ & self ,
4164
+ from_dfn : usize ,
4165
+ reached_depth : usize ,
4166
+ fresh_trait_ref : ty:: PolyTraitRef < ' tcx > ,
4167
+ result : EvaluationResult ,
4168
+ ) {
4169
+ let r_d = self . reached_depth . get ( ) ;
4170
+ self . reached_depth . set ( r_d. min ( reached_depth) ) ;
4171
+
4172
+ self . map . borrow_mut ( ) . insert ( fresh_trait_ref, ProvisionalEvaluation { from_dfn, result } ) ;
4173
+ }
4174
+
4175
+ /// Invoked when the node with dfn `dfn` does not get a successful
4176
+ /// result. This will clear out any provisional cache entries
4177
+ /// that were added since `dfn` was created. This is because the
4178
+ /// provisional entries are things which must assume that the
4179
+ /// things on the stack at the time of their creation succeeded --
4180
+ /// since the failing node is presently at the top of the stack,
4181
+ /// these provisional entries must either depend on it or some
4182
+ /// ancestor of it.
4183
+ fn on_failure ( & self , dfn : usize ) {
4184
+ self . map . borrow_mut ( ) . retain ( |_key, eval| eval. from_dfn >= dfn)
4185
+ }
4186
+
4187
+ /// Invoked when the node at depth `depth` completed without
4188
+ /// depending on anything higher in the stack (if that completion
4189
+ /// was a failure, then `on_failure` should have been invoked
4190
+ /// already). The callback `op` will be invoked for each
4191
+ /// provisional entry that we can now confirm.
4192
+ fn on_completion (
4193
+ & self ,
4194
+ depth : usize ,
4195
+ mut op : impl FnMut ( ty:: PolyTraitRef < ' tcx > , EvaluationResult ) ,
4196
+ ) {
4197
+ if self . reached_depth . get ( ) < depth {
4198
+ return ;
4199
+ }
4200
+
4201
+ for ( fresh_trait_ref, eval) in self . map . borrow_mut ( ) . drain ( ) {
4202
+ op ( fresh_trait_ref, eval. result ) ;
4203
+ }
4204
+
4205
+ self . reached_depth . set ( depth) ;
4206
+ }
4019
4207
}
4020
4208
4021
4209
#[ derive( Copy , Clone ) ]
0 commit comments