Skip to content

metadata-only resolve with pip download --dry-run --report! #10748

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions docs/html/cli/pip_download.rst
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,12 @@ them. Generic dependencies (e.g. universal wheels, or dependencies with no
platform, abi, or implementation constraints) will still match an over-
constrained download requirement.

``pip download --report output.json`` is an experimental feature which writes a :ref:`JSON report` of the
inputs and outputs of pip's internal resolution process to ``output.json``. This can be useful to
generate a lockfile, check whether transitive dependencies would introduce a conflict, or download
packages directly from download URLs without having to traverse PyPI again. The ``--dry-run`` option
can be used in conjunction to just produce a JSON report without actually downloading any packages,
which is faster.


Options
Expand Down Expand Up @@ -224,3 +230,17 @@ Examples
--implementation cp ^
--abi cp36m --abi cp36 --abi abi3 --abi none ^
SomePackage

#. Generate a JSON report of the inputs and outputs of pip's internal resolution process with ``--report`` to ``pip-resolve.json``. See the documentation for :ref:`the JSON report <JSON report>`.

.. tab:: Unix/macOS

.. code-block:: shell

$ python -m pip download --dry-run --report pip-resolve.json SomePackage

.. tab:: Windows

.. code-block:: shell

C:> py -m pip download --dry-run --report pip-resolve.json SomePackage
6 changes: 3 additions & 3 deletions docs/html/topics/dependency-resolution.md
Original file line number Diff line number Diff line change
Expand Up @@ -163,9 +163,9 @@ will avoid performing dependency resolution during deployment.

## Dealing with dependency conflicts

This section provides practical suggestions to pip users who encounter
a `ResolutionImpossible` error, where pip cannot install their specified
packages due to conflicting dependencies.
This section provides practical suggestions to pip users who encounter a `ResolutionImpossible`
error, where pip cannot install their specified packages due to conflicting dependencies. Note that
the {ref}`JSON report` may offer more debugging information.

### Understanding your error message

Expand Down
290 changes: 290 additions & 0 deletions docs/html/user_guide.rst

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions news/10748.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add ``--dry-run`` and ``--report`` to ``pip download`` to get a JSON resolution report.
2 changes: 2 additions & 0 deletions src/pip/_internal/cli/req_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,7 @@ def make_resolver(
use_user_site: bool = False,
ignore_installed: bool = True,
ignore_requires_python: bool = False,
dry_run: bool = False,
force_reinstall: bool = False,
upgrade_strategy: str = "to-satisfy-only",
use_pep517: Optional[bool] = None,
Expand Down Expand Up @@ -344,6 +345,7 @@ def make_resolver(
ignore_dependencies=options.ignore_dependencies,
ignore_installed=ignore_installed,
ignore_requires_python=ignore_requires_python,
dry_run=dry_run,
force_reinstall=force_reinstall,
upgrade_strategy=upgrade_strategy,
py_version_info=py_version_info,
Expand Down
65 changes: 57 additions & 8 deletions src/pip/_internal/commands/download.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import json
import logging
import os
from optparse import Values
Expand All @@ -7,7 +8,10 @@
from pip._internal.cli.cmdoptions import make_target_python
from pip._internal.cli.req_command import RequirementCommand, with_cleanup
from pip._internal.cli.status_codes import SUCCESS
from pip._internal.exceptions import CommandError
from pip._internal.operations.build.build_tracker import get_build_tracker
from pip._internal.resolution.base import RequirementSetWithCandidates
from pip._internal.resolution.resolvelib.reporter import ResolutionResult
from pip._internal.utils.misc import ensure_dir, normalize_path, write_output
from pip._internal.utils.temp_dir import TempDirectory

Expand Down Expand Up @@ -62,6 +66,28 @@ def add_options(self) -> None:
help="Download packages into <dir>.",
)

self.cmd_opts.add_option(
"--dry-run",
dest="dry_run",
action="store_true",
help=(
"Avoid actually downloading wheels or sdists. "
"Intended to be used with --report."
),
)

self.cmd_opts.add_option(
"--report",
"--resolution-report",
dest="json_report_file",
metavar="file",
default=None,
help=(
"Print a JSON object representing the resolve into <file>. "
"Often used with --dry-run."
),
)

cmdoptions.add_target_python_options(self.cmd_opts)

index_opts = cmdoptions.make_option_group(
Expand Down Expand Up @@ -122,19 +148,42 @@ def run(self, options: Values, args: List[str]) -> int:
options=options,
ignore_requires_python=options.ignore_requires_python,
py_version_info=options.python_version,
dry_run=options.dry_run,
)

self.trace_basic_info(finder)

requirement_set = resolver.resolve(reqs, check_supported_wheels=True)

downloaded: List[str] = []
for req in requirement_set.requirements.values():
if req.satisfied_by is None:
assert req.name is not None
preparer.save_linked_requirement(req)
downloaded.append(req.name)
if downloaded:
write_output("Successfully downloaded %s", " ".join(downloaded))
if not options.dry_run:
downloaded: List[str] = []
for req in requirement_set.requirements.values():
if req.satisfied_by is None:
assert req.name is not None
preparer.save_linked_requirement(req)
downloaded.append(req.name)
if downloaded:
write_output("Successfully downloaded %s", " ".join(downloaded))

# The rest of this method pertains to generating the ResolutionReport with
# --report.
if not options.json_report_file:
return SUCCESS
if not isinstance(requirement_set, RequirementSetWithCandidates):
raise CommandError(
"The legacy resolver is being used via "
"--use-deprecated=legacy-resolver."
"The legacy resolver does not retain detailed dependency information, "
"so `pip download --report` cannot be used with it. "
)

resolution_result = ResolutionResult.generate_resolve_report(
reqs, requirement_set
)

# Write the full report data to the JSON output file.
with open(options.json_report_file, "w") as f:
json.dump(resolution_result.to_dict(), f, indent=4)
write_output(f"JSON report written to '{options.json_report_file}'.")

return SUCCESS
103 changes: 4 additions & 99 deletions src/pip/_internal/index/collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,8 @@
import itertools
import logging
import os
import re
import urllib.parse
import urllib.request
import xml.etree.ElementTree
from html.parser import HTMLParser
from optparse import Values
from typing import (
Expand All @@ -33,12 +31,12 @@
from pip._vendor.requests.exceptions import RetryError, SSLError

from pip._internal.exceptions import NetworkConnectionError
from pip._internal.models.link import Link
from pip._internal.models.link import HTMLElement, Link
from pip._internal.models.search_scope import SearchScope
from pip._internal.network.session import PipSession
from pip._internal.network.utils import raise_for_status
from pip._internal.utils.filetypes import is_archive_file
from pip._internal.utils.misc import pairwise, redact_auth_from_url
from pip._internal.utils.misc import redact_auth_from_url
from pip._internal.vcs import vcs

from .sources import CandidatesFromPage, LinkSource, build_source
Expand All @@ -50,7 +48,6 @@

logger = logging.getLogger(__name__)

HTMLElement = xml.etree.ElementTree.Element
ResponseHeaders = MutableMapping[str, str]


Expand Down Expand Up @@ -182,94 +179,6 @@ def _determine_base_url(document: HTMLElement, page_url: str) -> str:
return page_url


def _clean_url_path_part(part: str) -> str:
"""
Clean a "part" of a URL path (i.e. after splitting on "@" characters).
"""
# We unquote prior to quoting to make sure nothing is double quoted.
return urllib.parse.quote(urllib.parse.unquote(part))


def _clean_file_url_path(part: str) -> str:
"""
Clean the first part of a URL path that corresponds to a local
filesystem path (i.e. the first part after splitting on "@" characters).
"""
# We unquote prior to quoting to make sure nothing is double quoted.
# Also, on Windows the path part might contain a drive letter which
# should not be quoted. On Linux where drive letters do not
# exist, the colon should be quoted. We rely on urllib.request
# to do the right thing here.
return urllib.request.pathname2url(urllib.request.url2pathname(part))


# percent-encoded: /
_reserved_chars_re = re.compile("(@|%2F)", re.IGNORECASE)


def _clean_url_path(path: str, is_local_path: bool) -> str:
"""
Clean the path portion of a URL.
"""
if is_local_path:
clean_func = _clean_file_url_path
else:
clean_func = _clean_url_path_part

# Split on the reserved characters prior to cleaning so that
# revision strings in VCS URLs are properly preserved.
parts = _reserved_chars_re.split(path)

cleaned_parts = []
for to_clean, reserved in pairwise(itertools.chain(parts, [""])):
cleaned_parts.append(clean_func(to_clean))
# Normalize %xx escapes (e.g. %2f -> %2F)
cleaned_parts.append(reserved.upper())

return "".join(cleaned_parts)


def _clean_link(url: str) -> str:
"""
Make sure a link is fully quoted.
For example, if ' ' occurs in the URL, it will be replaced with "%20",
and without double-quoting other characters.
"""
# Split the URL into parts according to the general structure
# `scheme://netloc/path;parameters?query#fragment`.
result = urllib.parse.urlparse(url)
# If the netloc is empty, then the URL refers to a local filesystem path.
is_local_path = not result.netloc
path = _clean_url_path(result.path, is_local_path=is_local_path)
return urllib.parse.urlunparse(result._replace(path=path))


def _create_link_from_element(
element_attribs: Dict[str, Optional[str]],
page_url: str,
base_url: str,
) -> Optional[Link]:
"""
Convert an anchor element's attributes in a simple repository page to a Link.
"""
href = element_attribs.get("href")
if not href:
return None

url = _clean_link(urllib.parse.urljoin(base_url, href))
pyrequire = element_attribs.get("data-requires-python")
yanked_reason = element_attribs.get("data-yanked")

link = Link(
url,
comes_from=page_url,
requires_python=pyrequire,
yanked_reason=yanked_reason,
)

return link


class CacheablePageContent:
def __init__(self, page: "HTMLPage") -> None:
assert page.cache_link_parsing
Expand Down Expand Up @@ -326,7 +235,7 @@ def _parse_links_html5lib(page: "HTMLPage") -> Iterable[Link]:
url = page.url
base_url = _determine_base_url(document, url)
for anchor in document.findall(".//a"):
link = _create_link_from_element(
link = Link.from_element(
anchor.attrib,
page_url=url,
base_url=base_url,
Expand All @@ -353,11 +262,7 @@ def parse_links(page: "HTMLPage", use_deprecated_html5lib: bool) -> Iterable[Lin
url = page.url
base_url = parser.base_url or url
for anchor in parser.anchors:
link = _create_link_from_element(
anchor,
page_url=url,
base_url=base_url,
)
link = Link.from_element(anchor, page_url=url, base_url=base_url)
if link is None:
continue
yield link
Expand Down
3 changes: 3 additions & 0 deletions src/pip/_internal/metadata/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,9 @@ def __repr__(self) -> str:
def __str__(self) -> str:
return f"{self.raw_name} {self.version}"

def as_serializable_requirement(self) -> Requirement:
raise NotImplementedError()

@property
def location(self) -> Optional[str]:
"""Where the distribution is loaded from.
Expand Down
3 changes: 3 additions & 0 deletions src/pip/_internal/metadata/pkg_resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,9 @@ def from_wheel(cls, wheel: Wheel, name: str) -> BaseDistribution:
)
return cls(dist)

def as_serializable_requirement(self) -> Requirement:
return self._dist.as_requirement()

@property
def location(self) -> Optional[str]:
return self._dist.location
Expand Down
Loading