Skip to content

Commit 4ec278f

Browse files
committed
Globally optimize traversal in resolve
Currently when we're attempting to resolve a dependency graph we locally optimize the order in which we visit candidates for a resolution (most constrained first). Once a version is activated, however, it will add a whole mess of new dependencies that need to be activated to the global list, currently appended at the end. This unfortunately can lead to pathological behavior. By always popping from the back and appending to the back of pending dependencies, super constrained dependencies in the front end up not getting visited for quite awhile. This in turn can cause Cargo to appear to hang for quite awhile as it's so aggressively backtracking. This commit switches the list of dependencies-to-activate from a `Vec` to a `BinaryHeap`. The heap is sorted by the number of candidates for each dependency, with the least candidates first. This ends up massively cutting down on resolution times in practice whenever `=` dependencies are encountered because they are resolved almost immediately instead of way near the end if they're at the wrong place in the graph. This alteration in traversal order ended up messing up the existing cycle detection, so that was just removed entirely from resolution and moved to its own dedicated pass. Closes #2090
1 parent 5cb45a9 commit 4ec278f

File tree

1 file changed

+117
-26
lines changed

1 file changed

+117
-26
lines changed

src/cargo/core/resolver/mod.rs

Lines changed: 117 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -46,11 +46,12 @@
4646
//! that we're implementing something that probably shouldn't be allocating all
4747
//! over the place.
4848
49-
use std::collections::HashSet;
50-
use std::collections::hash_map::HashMap;
49+
use std::cmp::Ordering;
50+
use std::collections::{HashSet, HashMap, BinaryHeap};
5151
use std::fmt;
5252
use std::ops::Range;
5353
use std::rc::Rc;
54+
5455
use semver;
5556

5657
use core::{PackageId, Registry, SourceId, Summary, Dependency};
@@ -142,7 +143,6 @@ impl fmt::Debug for Resolve {
142143
struct Context {
143144
activations: HashMap<(String, SourceId), Vec<Rc<Summary>>>,
144145
resolve: Resolve,
145-
visited: HashSet<PackageId>,
146146
}
147147

148148
/// Builds the list of all packages required to build the first argument.
@@ -154,10 +154,11 @@ pub fn resolve(summary: &Summary, method: &Method,
154154
let cx = Context {
155155
resolve: Resolve::new(summary.package_id().clone()),
156156
activations: HashMap::new(),
157-
visited: HashSet::new(),
158157
};
159158
let _p = profile::start(format!("resolving: {}", summary.package_id()));
160-
activate_deps_loop(cx, registry, summary, method)
159+
let cx = try!(activate_deps_loop(cx, registry, summary, method));
160+
try!(check_cycles(&cx));
161+
Ok(cx.resolve)
161162
}
162163

163164
/// Attempts to activate the summary `parent` in the context `cx`.
@@ -174,13 +175,9 @@ fn activate(cx: &mut Context,
174175
// Dependency graphs are required to be a DAG, so we keep a set of
175176
// packages we're visiting and bail if we hit a dupe.
176177
let id = parent.package_id().clone();
177-
if !cx.visited.insert(id.clone()) {
178-
bail!("cyclic package dependency: package `{}` depends on itself", id)
179-
}
180178

181179
// If we're already activated, then that was easy!
182180
if cx.flag_activated(&parent, method) {
183-
cx.visited.remove(&id);
184181
return Ok(None);
185182
}
186183
trace!("activating {}", parent.package_id());
@@ -207,17 +204,26 @@ impl<T> RcVecIter<T> {
207204
vec: Rc::new(vec),
208205
}
209206
}
207+
210208
fn cur_index(&self) -> usize {
211209
self.rest.start - 1
212210
}
211+
212+
fn as_slice(&self) -> &[T] {
213+
&self.vec[self.rest.start..self.rest.end]
214+
}
213215
}
216+
214217
impl<T> Iterator for RcVecIter<T> where T: Clone {
215218
type Item = (usize, T);
216219
fn next(&mut self) -> Option<(usize, T)> {
217220
self.rest.next().and_then(|i| {
218221
self.vec.get(i).map(|val| (i, val.clone()))
219222
})
220223
}
224+
fn size_hint(&self) -> (usize, Option<usize>) {
225+
self.rest.size_hint()
226+
}
221227
}
222228

223229
#[derive(Clone)]
@@ -227,9 +233,46 @@ struct DepsFrame {
227233
id: PackageId,
228234
}
229235

236+
impl DepsFrame {
237+
/// Returns the least number of candidates that any of this frame's siblings
238+
/// has.
239+
///
240+
/// The `remaining_siblings` array is already sorted with the smallest
241+
/// number of candidates at the front, so we just return the number of
242+
/// candidates in that entry.
243+
fn min_candidates(&self) -> usize {
244+
self.remaining_siblings.as_slice().get(0).map(|&(_, ref candidates, _)| {
245+
candidates.len()
246+
}).unwrap_or(0)
247+
}
248+
}
249+
250+
impl PartialEq for DepsFrame {
251+
fn eq(&self, other: &DepsFrame) -> bool {
252+
self.min_candidates() == other.min_candidates()
253+
}
254+
}
255+
256+
impl Eq for DepsFrame {}
257+
258+
impl PartialOrd for DepsFrame {
259+
fn partial_cmp(&self, other: &DepsFrame) -> Option<Ordering> {
260+
Some(self.cmp(other))
261+
}
262+
}
263+
264+
impl Ord for DepsFrame {
265+
fn cmp(&self, other: &DepsFrame) -> Ordering {
266+
// the frame with the sibling that has the least number of candidates
267+
// needs to get the bubbled up to the top of the heap we use below, so
268+
// reverse the order of the comparison here.
269+
other.min_candidates().cmp(&self.min_candidates())
270+
}
271+
}
272+
230273
struct BacktrackFrame {
231274
context_backup: Context,
232-
deps_backup: Vec<DepsFrame>,
275+
deps_backup: BinaryHeap<DepsFrame>,
233276
remaining_candidates: RcVecIter<Rc<Summary>>,
234277
parent: Rc<Summary>,
235278
dep: Dependency,
@@ -243,9 +286,16 @@ struct BacktrackFrame {
243286
fn activate_deps_loop(mut cx: Context,
244287
registry: &mut Registry,
245288
top: Rc<Summary>,
246-
top_method: &Method) -> CargoResult<Resolve> {
289+
top_method: &Method) -> CargoResult<Context> {
290+
// Note that a `BinaryHeap` is used for the remaining dependencies that need
291+
// activation. This heap is sorted such that the "largest value" is the most
292+
// constrained dependency, or the one with the least candidates.
293+
//
294+
// This helps us get through super constrained portions of the dependency
295+
// graph quickly and hopefully lock down what later larger dependencies can
296+
// use (those with more candidates).
247297
let mut backtrack_stack = Vec::new();
248-
let mut remaining_deps = Vec::new();
298+
let mut remaining_deps = BinaryHeap::new();
249299
remaining_deps.extend(try!(activate(&mut cx, registry, top, &top_method)));
250300

251301
// Main resolution loop, this is the workhorse of the resolution algorithm.
@@ -268,10 +318,7 @@ fn activate_deps_loop(mut cx: Context,
268318
remaining_deps.push(deps_frame);
269319
(parent, sibling)
270320
}
271-
None => {
272-
cx.visited.remove(&deps_frame.id);
273-
continue
274-
}
321+
None => continue,
275322
};
276323
let (mut parent, (mut cur, (mut dep, candidates, features))) = frame;
277324
assert!(!remaining_deps.is_empty());
@@ -353,32 +400,28 @@ fn activate_deps_loop(mut cx: Context,
353400
candidate.version());
354401
cx.resolve.graph.link(parent.package_id().clone(),
355402
candidate.package_id().clone());
356-
357-
// If we hit an intransitive dependency then clear out the visitation
358-
// list as we can't induce a cycle through transitive dependencies.
359-
if !dep.is_transitive() {
360-
cx.visited.clear();
361-
}
362403
remaining_deps.extend(try!(activate(&mut cx, registry,
363404
candidate, &method)));
364405
}
365406
trace!("resolved: {:?}", cx.resolve);
366-
Ok(cx.resolve)
407+
Ok(cx)
367408
}
368409

369410
// Searches up `backtrack_stack` until it finds a dependency with remaining
370411
// candidates. Resets `cx` and `remaining_deps` to that level and returns the
371412
// next candidate. If all candidates have been exhausted, returns None.
372413
fn find_candidate(backtrack_stack: &mut Vec<BacktrackFrame>,
373-
cx: &mut Context, remaining_deps: &mut Vec<DepsFrame>,
374-
parent: &mut Rc<Summary>, cur: &mut usize,
414+
cx: &mut Context,
415+
remaining_deps: &mut BinaryHeap<DepsFrame>,
416+
parent: &mut Rc<Summary>,
417+
cur: &mut usize,
375418
dep: &mut Dependency) -> Option<Rc<Summary>> {
376419
while let Some(mut frame) = backtrack_stack.pop() {
377420
if let Some((_, candidate)) = frame.remaining_candidates.next() {
378421
*cx = frame.context_backup.clone();
379422
*remaining_deps = frame.deps_backup.clone();
380423
*parent = frame.parent.clone();
381-
*cur = remaining_deps.last().unwrap().remaining_siblings.cur_index();
424+
*cur = remaining_deps.peek().unwrap().remaining_siblings.cur_index();
382425
*dep = frame.dep.clone();
383426
backtrack_stack.push(frame);
384427
return Some(candidate)
@@ -705,3 +748,51 @@ impl Context {
705748
Ok(ret)
706749
}
707750
}
751+
752+
fn check_cycles(cx: &Context) -> CargoResult<()> {
753+
let mut summaries = HashMap::new();
754+
for summary in cx.activations.values().flat_map(|v| v) {
755+
summaries.insert(summary.package_id(), &**summary);
756+
}
757+
return visit(&cx.resolve,
758+
cx.resolve.root(),
759+
&summaries,
760+
&mut HashSet::new(),
761+
&mut HashSet::new());
762+
763+
fn visit<'a>(resolve: &'a Resolve,
764+
id: &'a PackageId,
765+
summaries: &HashMap<&'a PackageId, &Summary>,
766+
visited: &mut HashSet<&'a PackageId>,
767+
checked: &mut HashSet<&'a PackageId>)
768+
-> CargoResult<()> {
769+
// See if we visited ourselves
770+
if !visited.insert(id) {
771+
bail!("cyclic package dependency: package `{}` depends on itself",
772+
id);
773+
}
774+
775+
// If we've already checked this node no need to recurse again as we'll
776+
// just conclude the same thing as last time, so we only execute the
777+
// recursive step if we successfully insert into `checked`.
778+
//
779+
// Note that if we hit an intransitive dependency then we clear out the
780+
// visitation list as we can't induce a cycle through transitive
781+
// dependencies.
782+
if checked.insert(id) {
783+
let summary = summaries[id];
784+
for dep in resolve.deps(id).into_iter().flat_map(|a| a) {
785+
let is_transitive = summary.dependencies().iter().any(|d| {
786+
d.matches_id(dep) && d.is_transitive()
787+
});
788+
let mut empty = HashSet::new();
789+
let visited = if is_transitive {&mut *visited} else {&mut empty};
790+
try!(visit(resolve, dep, summaries, visited, checked));
791+
}
792+
}
793+
794+
// Ok, we're done, no longer visiting our node any more
795+
visited.remove(id);
796+
Ok(())
797+
}
798+
}

0 commit comments

Comments
 (0)