diff --git a/news/10557.bugfix.rst b/news/10557.bugfix.rst new file mode 100644 index 00000000000..5a6a4ef26a9 --- /dev/null +++ b/news/10557.bugfix.rst @@ -0,0 +1 @@ +Optimize installation order calculation to improve performance when installing requirements that form a complex dependency graph with a large amount of edges. diff --git a/src/pip/_internal/resolution/resolvelib/resolver.py b/src/pip/_internal/resolution/resolvelib/resolver.py index 12f96702024..8ee36d377d8 100644 --- a/src/pip/_internal/resolution/resolvelib/resolver.py +++ b/src/pip/_internal/resolution/resolvelib/resolver.py @@ -171,12 +171,17 @@ def get_installation_order( get installed one-by-one. The current implementation creates a topological ordering of the - dependency graph, while breaking any cycles in the graph at arbitrary - points. We make no guarantees about where the cycle would be broken, - other than they would be broken. + dependency graph, giving more weight to packages with less + or no dependencies, while breaking any cycles in the graph at + arbitrary points. We make no guarantees about where the cycle + would be broken, other than it *would* be broken. """ assert self._result is not None, "must call resolve() first" + if not req_set.requirements: + # Nothing is left to install, so we do not need an order. + return [] + graph = self._result.graph weights = get_topological_weights( graph, @@ -199,13 +204,19 @@ def get_topological_weights( This implementation may change at any point in the future without prior notice. - We take the length for the longest path to any node from root, ignoring any - paths that contain a single node twice (i.e. cycles). This is done through - a depth-first search through the graph, while keeping track of the path to - the node. + We first simplify the dependency graph by pruning any leaves and giving them + the highest weight: a package without any dependencies should be installed + first. This is done again and again in the same way, giving ever less weight + to the newly found leaves. The loop stops when no leaves are left: all + remaining packages have at least one dependency left in the graph. + + Then we continue with the remaining graph, by taking the length for the + longest path to any node from root, ignoring any paths that contain a single + node twice (i.e. cycles). This is done through a depth-first search through + the graph, while keeping track of the path to the node. Cycles in the graph result would result in node being revisited while also - being it's own path. In this case, take no action. This helps ensure we + being on its own path. In this case, take no action. This helps ensure we don't get stuck in a cycle. When assigning weight, the longer path (i.e. larger length) is preferred. @@ -227,6 +238,34 @@ def visit(node: Optional[str]) -> None: last_known_parent_count = weights.get(node, 0) weights[node] = max(last_known_parent_count, len(path)) + # Simplify the graph, pruning leaves that have no dependencies. + # This is needed for large graphs (say over 200 packages) because the + # `visit` function is exponentially slower then, taking minutes. + # See https://github.com/pypa/pip/issues/10557 + # We will loop until we explicitly break the loop. + while True: + leaves = set() + for key in graph: + if key is None: + continue + for _child in graph.iter_children(key): + # This means we have at least one child + break + else: + # No child. + leaves.add(key) + if not leaves: + # We are done simplifying. + break + # Calculate the weight for the leaves. + weight = len(graph) - 1 + for leaf in leaves: + weights[leaf] = weight + # Remove the leaves from the graph, making it simpler. + for leaf in leaves: + graph.remove(leaf) + + # Visit the remaining graph. # `None` is guaranteed to be the root node by resolvelib. visit(None) diff --git a/tests/unit/resolution_resolvelib/test_resolver.py b/tests/unit/resolution_resolvelib/test_resolver.py index 1fcde34a41e..579195b55ea 100644 --- a/tests/unit/resolution_resolvelib/test_resolver.py +++ b/tests/unit/resolution_resolvelib/test_resolver.py @@ -115,7 +115,7 @@ def test_new_resolver_get_installation_order( ("three", "four"), ("four", "five"), ], - {None: 0, "one": 1, "two": 1, "three": 2, "four": 3, "five": 4}, + {None: 0, "five": 5, "four": 4, "one": 4, "three": 2, "two": 1}, ), ( "linear",