diff --git a/docs/html/cli/pip_download.rst b/docs/html/cli/pip_download.rst index 4f15314d765..f7deded340f 100644 --- a/docs/html/cli/pip_download.rst +++ b/docs/html/cli/pip_download.rst @@ -45,6 +45,12 @@ them. Generic dependencies (e.g. universal wheels, or dependencies with no platform, abi, or implementation constraints) will still match an over- constrained download requirement. +``pip download --report output.json`` is an experimental feature which writes a :ref:`JSON report` of the +inputs and outputs of pip's internal resolution process to ``output.json``. This can be useful to +generate a lockfile, check whether transitive dependencies would introduce a conflict, or download +packages directly from download URLs without having to traverse PyPI again. The ``--dry-run`` option +can be used in conjunction to just produce a JSON report without actually downloading any packages, +which is faster. Options @@ -224,3 +230,17 @@ Examples --implementation cp ^ --abi cp36m --abi cp36 --abi abi3 --abi none ^ SomePackage + +#. Generate a JSON report of the inputs and outputs of pip's internal resolution process with ``--report`` to ``pip-resolve.json``. See the documentation for :ref:`the JSON report `. + + .. tab:: Unix/macOS + + .. code-block:: shell + + $ python -m pip download --dry-run --report pip-resolve.json SomePackage + + .. tab:: Windows + + .. code-block:: shell + + C:> py -m pip download --dry-run --report pip-resolve.json SomePackage diff --git a/docs/html/topics/dependency-resolution.md b/docs/html/topics/dependency-resolution.md index 7dd9848b021..52fbf891f61 100644 --- a/docs/html/topics/dependency-resolution.md +++ b/docs/html/topics/dependency-resolution.md @@ -163,9 +163,9 @@ will avoid performing dependency resolution during deployment. ## Dealing with dependency conflicts -This section provides practical suggestions to pip users who encounter -a `ResolutionImpossible` error, where pip cannot install their specified -packages due to conflicting dependencies. +This section provides practical suggestions to pip users who encounter a `ResolutionImpossible` +error, where pip cannot install their specified packages due to conflicting dependencies. Note that +the {ref}`JSON report` may offer more debugging information. ### Understanding your error message diff --git a/docs/html/user_guide.rst b/docs/html/user_guide.rst index 6a25a6e6ae3..5b21d066af4 100644 --- a/docs/html/user_guide.rst +++ b/docs/html/user_guide.rst @@ -865,6 +865,296 @@ of ability. Some examples that you could consider include: * ``distlib`` - Packaging and distribution utilities (including functions for interacting with PyPI). +Pip now contains support for an experimental feature to dump the output of its resolve process into a :ref:`JSON report`, which can then be processed using the ``packaging`` library. + +.. _`JSON report`: + +JSON report +=========== + +Pip has exposed an experimental feature to print a JSON report of the dependency resolution process's inputs and outputs with ``pip download --dry-run --report pip-output.json``. This report is intended to be consumed as part of automated pip execution pipelines, but can also be used as a debugging tool. + +Example command execution: + +.. tab:: Unix/macOS + + .. code-block:: console + + $ pip download --dry-run --report pip-output.json tensorboard + Collecting tensorboard + Obtaining dependency information from tensorboard 2.7.0 + Collecting tensorboard-plugin-wit>=1.6.0 + Obtaining dependency information from tensorboard-plugin-wit 1.8.1 + Collecting google-auth-oauthlib<0.5,>=0.4.1 + Obtaining dependency information from google-auth-oauthlib 0.4.6 + Collecting absl-py>=0.4 + Obtaining dependency information from absl-py 1.0.0 + Collecting protobuf>=3.6.0 + Obtaining dependency information from protobuf 3.19.1 + Collecting setuptools>=41.0.0 + Obtaining dependency information from setuptools 60.3.1 + Collecting wheel>=0.26 + Obtaining dependency information from wheel 0.37.1 + Collecting werkzeug>=0.11.15 + Obtaining dependency information from werkzeug 2.0.2 + Collecting tensorboard-data-server<0.7.0,>=0.6.0 + Obtaining dependency information from tensorboard-data-server 0.6.1 + Collecting markdown>=2.6.8 + Obtaining dependency information from markdown 3.3.6 + Collecting grpcio>=1.24.3 + Using cached grpcio-1.43.0.tar.gz (21.5 MB) + Preparing metadata (setup.py) ... done + Collecting numpy>=1.12.0 + Using cached numpy-1.22.0.zip (11.3 MB) + Installing build dependencies ... done + Getting requirements to build wheel ... done + Preparing metadata (pyproject.toml) ... done + Collecting requests<3,>=2.21.0 + Obtaining dependency information from requests 2.27.1 + Collecting google-auth<3,>=1.6.3 + Obtaining dependency information from google-auth 2.3.3 + Collecting six + Obtaining dependency information from six 1.16.0 + Collecting pyasn1-modules>=0.2.1 + Obtaining dependency information from pyasn1-modules 0.2.8 + Collecting rsa<5,>=3.1.4 + Obtaining dependency information from rsa 4.8 + Collecting cachetools<5.0,>=2.0.0 + Obtaining dependency information from cachetools 4.2.4 + Collecting requests-oauthlib>=0.7.0 + Obtaining dependency information from requests-oauthlib 1.3.0 + Collecting charset-normalizer~=2.0.0 + Obtaining dependency information from charset-normalizer 2.0.10 + Collecting certifi>=2017.4.17 + Obtaining dependency information from certifi 2021.10.8 + Collecting idna<4,>=2.5 + Obtaining dependency information from idna 3.3 + Collecting urllib3<1.27,>=1.21.1 + Obtaining dependency information from urllib3 1.26.7 + Collecting pyasn1<0.5.0,>=0.4.6 + Obtaining dependency information from pyasn1 0.4.8 + Collecting oauthlib>=3.0.0 + Obtaining dependency information from oauthlib 3.1.1 + Python version: '==3.10.1' + Input requirements: 'tensorboard' + Resolution: 'tensorboard==2.7.0' 'absl-py==1.0.0' 'google-auth==2.3.3' 'google-auth-oauthlib==0.4.6' 'grpcio==1.43.0' 'markdown==3.3.6' 'numpy==1.22.0' 'protobuf==3.19.1' 'requests==2.27.1' 'tensorboard-data-server==0.6.1' 'tensorboard-plugin-wit==1.8.1' 'werkzeug==2.0.2' 'wheel==0.37.1' 'cachetools==4.2.4' 'certifi==2021.10.8' 'charset-normalizer==2.0.10' 'idna==3.3' 'pyasn1-modules==0.2.8' 'requests-oauthlib==1.3.0' 'rsa==4.8' 'six==1.16.0' 'urllib3==1.26.7' 'oauthlib==3.1.1' 'pyasn1==0.4.8' 'setuptools==60.3.1' + JSON report written to 'pip-output.json'. + +.. tab:: Windows + + .. code-block:: console + + C:\> pip download --dry-run --report pip-output.json tensorboard + Collecting tensorboard + Obtaining dependency information from tensorboard 2.7.0 + Collecting tensorboard-plugin-wit>=1.6.0 + Obtaining dependency information from tensorboard-plugin-wit 1.8.1 + Collecting google-auth-oauthlib<0.5,>=0.4.1 + Obtaining dependency information from google-auth-oauthlib 0.4.6 + Collecting absl-py>=0.4 + Obtaining dependency information from absl-py 1.0.0 + Collecting protobuf>=3.6.0 + Obtaining dependency information from protobuf 3.19.1 + Collecting setuptools>=41.0.0 + Obtaining dependency information from setuptools 60.3.1 + Collecting wheel>=0.26 + Obtaining dependency information from wheel 0.37.1 + Collecting werkzeug>=0.11.15 + Obtaining dependency information from werkzeug 2.0.2 + Collecting tensorboard-data-server<0.7.0,>=0.6.0 + Obtaining dependency information from tensorboard-data-server 0.6.1 + Collecting markdown>=2.6.8 + Obtaining dependency information from markdown 3.3.6 + Collecting grpcio>=1.24.3 + Using cached grpcio-1.43.0.tar.gz (21.5 MB) + Preparing metadata (setup.py) ... done + Collecting numpy>=1.12.0 + Using cached numpy-1.22.0.zip (11.3 MB) + Installing build dependencies ... done + Getting requirements to build wheel ... done + Preparing metadata (pyproject.toml) ... done + Collecting requests<3,>=2.21.0 + Obtaining dependency information from requests 2.27.1 + Collecting google-auth<3,>=1.6.3 + Obtaining dependency information from google-auth 2.3.3 + Collecting six + Obtaining dependency information from six 1.16.0 + Collecting pyasn1-modules>=0.2.1 + Obtaining dependency information from pyasn1-modules 0.2.8 + Collecting rsa<5,>=3.1.4 + Obtaining dependency information from rsa 4.8 + Collecting cachetools<5.0,>=2.0.0 + Obtaining dependency information from cachetools 4.2.4 + Collecting requests-oauthlib>=0.7.0 + Obtaining dependency information from requests-oauthlib 1.3.0 + Collecting charset-normalizer~=2.0.0 + Obtaining dependency information from charset-normalizer 2.0.10 + Collecting certifi>=2017.4.17 + Obtaining dependency information from certifi 2021.10.8 + Collecting idna<4,>=2.5 + Obtaining dependency information from idna 3.3 + Collecting urllib3<1.27,>=1.21.1 + Obtaining dependency information from urllib3 1.26.7 + Collecting pyasn1<0.5.0,>=0.4.6 + Obtaining dependency information from pyasn1 0.4.8 + Collecting oauthlib>=3.0.0 + Obtaining dependency information from oauthlib 3.1.1 + Python version: '==3.10.1' + Input requirements: 'tensorboard' + Resolution: 'tensorboard==2.7.0' 'absl-py==1.0.0' 'google-auth==2.3.3' 'google-auth-oauthlib==0.4.6' 'grpcio==1.43.0' 'markdown==3.3.6' 'numpy==1.22.0' 'protobuf==3.19.1' 'requests==2.27.1' 'tensorboard-data-server==0.6.1' 'tensorboard-plugin-wit==1.8.1' 'werkzeug==2.0.2' 'wheel==0.37.1' 'cachetools==4.2.4' 'certifi==2021.10.8' 'charset-normalizer==2.0.10' 'idna==3.3' 'pyasn1-modules==0.2.8' 'requests-oauthlib==1.3.0' 'rsa==4.8' 'six==1.16.0' 'urllib3==1.26.7' 'oauthlib==3.1.1' 'pyasn1==0.4.8' 'setuptools==60.3.1' + JSON report written to 'pip-output.json'. + +The contents of ``pip-output.json`` will look like: + +.. code-block:: + + { + "experimental": true, + "input_requirements": [ + "tensorboard" + ], + "python_version": "==3.10.1", + "candidates": { + "tensorboard": { + "requirement": "tensorboard==2.7.0", + "download_info": { + "direct_url": { + "url": "https://files.pythonhosted.org/packages/2d/eb/80f75ab480cfbd032442f06ec7c15ef88376c5ef7fd6f6bf2e0e03b47e31/tensorboard-2.7.0-py3-none-any.whl", + "archive_info": { + "hash": "sha256=239f78a4a8dff200ce585a030c787773a8c1184d5c159252f5f85bac4e3c3b38" + } + }, + "dist_info_metadata": null + }, + "dependencies": { + "tensorboard-plugin-wit": "tensorboard-plugin-wit>=1.6.0", + "google-auth-oauthlib": "google-auth-oauthlib<0.5,>=0.4.1", + "absl-py": "absl-py>=0.4", + "protobuf": "protobuf>=3.6.0", + "setuptools": "setuptools>=41.0.0", + "wheel": "wheel>=0.26", + "werkzeug": "werkzeug>=0.11.15", + "tensorboard-data-server": "tensorboard-data-server<0.7.0,>=0.6.0", + "markdown": "markdown>=2.6.8", + "grpcio": "grpcio>=1.24.3", + "numpy": "numpy>=1.12.0", + "requests": "requests<3,>=2.21.0", + "google-auth": "google-auth<3,>=1.6.3" + }, + "requires_python": ">=3.6" + }, + "absl-py": { + "requirement": "absl-py==1.0.0", + "download_info": { + "direct_url": { + "url": "https://files.pythonhosted.org/packages/2c/03/e3e19d3faf430ede32e41221b294e37952e06acc96781c417ac25d4a0324/absl_py-1.0.0-py3-none-any.whl", + "archive_info": { + "hash": "sha256=84e6dcdc69c947d0c13e5457d056bd43cade4c2393dce00d684aedea77ddc2a3" + } + }, + "dist_info_metadata": null + }, + "dependencies": { + "six": "six" + }, + "requires_python": ">=3.6" + }, + (...truncated) + +The output can be processed with `jq `_ to produce e.g. a requirements file that pins the hashes of each dependency which provides such a hash: + +.. tab:: Unix/macOS + + .. code-block:: console + + $ jq -r <./pip-output.json '.candidates[] | {req: .requirement, hash: .download_info.direct_url.archive_info.hash} | .req + ((.hash | " --hash " + sub("="; ":")) // "")' + tensorboard==2.7.0 --hash sha256:239f78a4a8dff200ce585a030c787773a8c1184d5c159252f5f85bac4e3c3b38 + absl-py==1.0.0 --hash sha256:84e6dcdc69c947d0c13e5457d056bd43cade4c2393dce00d684aedea77ddc2a3 + google-auth==2.3.3 --hash sha256:a348a50b027679cb7dae98043ac8dbcc1d7951f06d8387496071a1e05a2465c0 + google-auth-oauthlib==0.4.6 --hash sha256:3f2a6e802eebbb6fb736a370fbf3b055edcb6b52878bf2f26330b5e041316c73 + grpcio==1.43.0 --hash sha256:735d9a437c262ab039d02defddcb9f8f545d7009ae61c0114e19dda3843febe5 + markdown==3.3.6 --hash sha256:9923332318f843411e9932237530df53162e29dc7a4e2b91e35764583c46c9a3 + numpy==1.22.0 --hash sha256:a955e4128ac36797aaffd49ab44ec74a71c11d6938df83b1285492d277db5397 + protobuf==3.19.1 --hash sha256:e813b1c9006b6399308e917ac5d298f345d95bb31f46f02b60cd92970a9afa17 + requests==2.27.1 --hash sha256:f22fa1e554c9ddfd16e6e41ac79759e17be9e492b3587efa038054674760e72d + tensorboard-data-server==0.6.1 --hash sha256:809fe9887682d35c1f7d1f54f0f40f98bb1f771b14265b453ca051e2ce58fca7 + tensorboard-plugin-wit==1.8.1 --hash sha256:ff26bdd583d155aa951ee3b152b3d0cffae8005dc697f72b44a8e8c2a77a8cbe + werkzeug==2.0.2 --hash sha256:63d3dc1cf60e7b7e35e97fa9861f7397283b75d765afcaefd993d6046899de8f + wheel==0.37.1 --hash sha256:4bdcd7d840138086126cd09254dc6195fb4fc6f01c050a1d7236f2630db1d22a + cachetools==4.2.4 --hash sha256:92971d3cb7d2a97efff7c7bb1657f21a8f5fb309a37530537c71b1774189f2d1 + certifi==2021.10.8 --hash sha256:d62a0163eb4c2344ac042ab2bdf75399a71a2d8c7d47eac2e2ee91b9d6339569 + charset-normalizer==2.0.10 --hash sha256:cb957888737fc0bbcd78e3df769addb41fd1ff8cf950dc9e7ad7793f1bf44455 + idna==3.3 --hash sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff + pyasn1-modules==0.2.8 --hash sha256:a50b808ffeb97cb3601dd25981f6b016cbb3d31fbf57a8b8a87428e6158d0c74 + requests-oauthlib==1.3.0 --hash sha256:7f71572defaecd16372f9006f33c2ec8c077c3cfa6f5911a9a90202beb513f3d + rsa==4.8 --hash sha256:95c5d300c4e879ee69708c428ba566c59478fd653cc3a22243eeb8ed846950bb + six==1.16.0 --hash sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254 + urllib3==1.26.7 --hash sha256:c4fdf4019605b6e5423637e01bc9fe4daef873709a7973e195ceba0a62bbc844 + oauthlib==3.1.1 --hash sha256:42bf6354c2ed8c6acb54d971fce6f88193d97297e18602a3a886603f9d7730cc + pyasn1==0.4.8 --hash sha256:39c7e2ec30515947ff4e87fb6f456dfc6e84857d34be479c9d4a4ba4bf46aa5d + setuptools==60.3.1 --hash sha256:2932bfeb248c648dc411ea9714d5a6de7a33ef1a0db2f0fce644d8172b0479e8 + +.. tab:: Windows + + .. code-block:: console + + C:\> jq -r <./pip-output.json '.candidates[] | {req: .requirement, hash: .download_info.direct_url.archive_info.hash} | .req + ((.hash | " --hash " + sub("="; ":")) // "")' + tensorboard==2.7.0 --hash sha256:239f78a4a8dff200ce585a030c787773a8c1184d5c159252f5f85bac4e3c3b38 + absl-py==1.0.0 --hash sha256:84e6dcdc69c947d0c13e5457d056bd43cade4c2393dce00d684aedea77ddc2a3 + google-auth==2.3.3 --hash sha256:a348a50b027679cb7dae98043ac8dbcc1d7951f06d8387496071a1e05a2465c0 + google-auth-oauthlib==0.4.6 --hash sha256:3f2a6e802eebbb6fb736a370fbf3b055edcb6b52878bf2f26330b5e041316c73 + grpcio==1.43.0 --hash sha256:735d9a437c262ab039d02defddcb9f8f545d7009ae61c0114e19dda3843febe5 + markdown==3.3.6 --hash sha256:9923332318f843411e9932237530df53162e29dc7a4e2b91e35764583c46c9a3 + numpy==1.22.0 --hash sha256:a955e4128ac36797aaffd49ab44ec74a71c11d6938df83b1285492d277db5397 + protobuf==3.19.1 --hash sha256:e813b1c9006b6399308e917ac5d298f345d95bb31f46f02b60cd92970a9afa17 + requests==2.27.1 --hash sha256:f22fa1e554c9ddfd16e6e41ac79759e17be9e492b3587efa038054674760e72d + tensorboard-data-server==0.6.1 --hash sha256:809fe9887682d35c1f7d1f54f0f40f98bb1f771b14265b453ca051e2ce58fca7 + tensorboard-plugin-wit==1.8.1 --hash sha256:ff26bdd583d155aa951ee3b152b3d0cffae8005dc697f72b44a8e8c2a77a8cbe + werkzeug==2.0.2 --hash sha256:63d3dc1cf60e7b7e35e97fa9861f7397283b75d765afcaefd993d6046899de8f + wheel==0.37.1 --hash sha256:4bdcd7d840138086126cd09254dc6195fb4fc6f01c050a1d7236f2630db1d22a + cachetools==4.2.4 --hash sha256:92971d3cb7d2a97efff7c7bb1657f21a8f5fb309a37530537c71b1774189f2d1 + certifi==2021.10.8 --hash sha256:d62a0163eb4c2344ac042ab2bdf75399a71a2d8c7d47eac2e2ee91b9d6339569 + charset-normalizer==2.0.10 --hash sha256:cb957888737fc0bbcd78e3df769addb41fd1ff8cf950dc9e7ad7793f1bf44455 + idna==3.3 --hash sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff + pyasn1-modules==0.2.8 --hash sha256:a50b808ffeb97cb3601dd25981f6b016cbb3d31fbf57a8b8a87428e6158d0c74 + requests-oauthlib==1.3.0 --hash sha256:7f71572defaecd16372f9006f33c2ec8c077c3cfa6f5911a9a90202beb513f3d + rsa==4.8 --hash sha256:95c5d300c4e879ee69708c428ba566c59478fd653cc3a22243eeb8ed846950bb + six==1.16.0 --hash sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254 + urllib3==1.26.7 --hash sha256:c4fdf4019605b6e5423637e01bc9fe4daef873709a7973e195ceba0a62bbc844 + oauthlib==3.1.1 --hash sha256:42bf6354c2ed8c6acb54d971fce6f88193d97297e18602a3a886603f9d7730cc + pyasn1==0.4.8 --hash sha256:39c7e2ec30515947ff4e87fb6f456dfc6e84857d34be479c9d4a4ba4bf46aa5d + setuptools==60.3.1 --hash sha256:2932bfeb248c648dc411ea9714d5a6de7a33ef1a0db2f0fce644d8172b0479e8 + +JSON schema +----------- + +The JSON report is described below with fields given fake mypy type annotations corresponding to the output of `json.load() `_ upon reading the JSON report. The report contains multiple top-level fields: + +* ``experimental: bool``: set to ``True`` as the format is not yet stable. +* ``input_requirements: List[str]``: strings describing the requirements provided to the pip resolver. Can be parsed with `packaging.requirements.Requirement `_. +* ``python_version: str``: a string describing the python interpreter version the resolve was performed for. Can be parsed with `packaging.specifiers.SpecifierSet `_. Currently always an exact ``==`` constraint. +* ``candidates: Dict``: each package that would have been downloaded with ``pip download`` is represented in the ``candidates`` dict. Each key is the ``name`` of a requirement, since each dependency package name is satisfied by exactly one candidate in the final resolve. + +Candidates +---------- + +Each element of the ``candidates`` dict has the following fields: + +* ``requirement: str``: an ``==`` requirement string for the exact version of the candidate that would have been fetched by ``pip download``. Can be parsed with `packaging.requirements.Requirement `_. +* ``requires_python: Optional[str]``: a constraint on the executing python interpreter version exerted by this candidate. Can be parsed with `packaging.specifiers.SpecifierSet `_. +* ``dependencies: List[str]``: all the dependencies required by this candidate, as requirement strings which can be parsed with `packaging.requirements.Requirement `_. Each requirement will have been satisfied by another member of the overall ``candidates`` dict. +* ``download_info: Dict``: a link where the requirement can be directly downloaded from, along with any metadata. + +Download Info +------------- + +The ``download_info`` object has the following fields: + +* ``direct_url: Dict``: a deserializable representation of a Direct URL as per :pep:`610` for this package's location, which may be a remote URL or local directory. +* ``dist_info_metadata: Optional[Dict]``: a deserializable representation of a Direct URL as per :pep:`610` for this package's *metadata*, which may be provided for individual package downloads by a package index provided with ``-i`` which implements :pep:`658`. + + If this field's value is non-``None``, it will only ever provide the ``archive_info`` key of the Direct URL JSON schema from :pep:`610`, and that key's ``hash`` field may be empty if :pep:`658`'s ``...`` was provided in the anchor tag that was parsed to obtain this candidate's download info, instead of providing a specific checksum value for the candidate's metadata with e.g. ``data-dist-info-metadata="sha256=e8413ab19..."``. + .. _changes-to-the-pip-dependency-resolver-in-20-2-2020: .. _`Resolver changes 2020`: diff --git a/news/10748.feature.rst b/news/10748.feature.rst new file mode 100644 index 00000000000..20578ed6735 --- /dev/null +++ b/news/10748.feature.rst @@ -0,0 +1 @@ +Add ``--dry-run`` and ``--report`` to ``pip download`` to get a JSON resolution report. diff --git a/src/pip/_internal/cli/req_command.py b/src/pip/_internal/cli/req_command.py index aab177002d4..74a1514055a 100644 --- a/src/pip/_internal/cli/req_command.py +++ b/src/pip/_internal/cli/req_command.py @@ -313,6 +313,7 @@ def make_resolver( use_user_site: bool = False, ignore_installed: bool = True, ignore_requires_python: bool = False, + dry_run: bool = False, force_reinstall: bool = False, upgrade_strategy: str = "to-satisfy-only", use_pep517: Optional[bool] = None, @@ -344,6 +345,7 @@ def make_resolver( ignore_dependencies=options.ignore_dependencies, ignore_installed=ignore_installed, ignore_requires_python=ignore_requires_python, + dry_run=dry_run, force_reinstall=force_reinstall, upgrade_strategy=upgrade_strategy, py_version_info=py_version_info, diff --git a/src/pip/_internal/commands/download.py b/src/pip/_internal/commands/download.py index a6d7e628f2b..180e657bae9 100644 --- a/src/pip/_internal/commands/download.py +++ b/src/pip/_internal/commands/download.py @@ -1,3 +1,4 @@ +import json import logging import os from optparse import Values @@ -7,7 +8,10 @@ from pip._internal.cli.cmdoptions import make_target_python from pip._internal.cli.req_command import RequirementCommand, with_cleanup from pip._internal.cli.status_codes import SUCCESS +from pip._internal.exceptions import CommandError from pip._internal.operations.build.build_tracker import get_build_tracker +from pip._internal.resolution.base import RequirementSetWithCandidates +from pip._internal.resolution.resolvelib.reporter import ResolutionResult from pip._internal.utils.misc import ensure_dir, normalize_path, write_output from pip._internal.utils.temp_dir import TempDirectory @@ -62,6 +66,28 @@ def add_options(self) -> None: help="Download packages into .", ) + self.cmd_opts.add_option( + "--dry-run", + dest="dry_run", + action="store_true", + help=( + "Avoid actually downloading wheels or sdists. " + "Intended to be used with --report." + ), + ) + + self.cmd_opts.add_option( + "--report", + "--resolution-report", + dest="json_report_file", + metavar="file", + default=None, + help=( + "Print a JSON object representing the resolve into . " + "Often used with --dry-run." + ), + ) + cmdoptions.add_target_python_options(self.cmd_opts) index_opts = cmdoptions.make_option_group( @@ -122,19 +148,42 @@ def run(self, options: Values, args: List[str]) -> int: options=options, ignore_requires_python=options.ignore_requires_python, py_version_info=options.python_version, + dry_run=options.dry_run, ) self.trace_basic_info(finder) requirement_set = resolver.resolve(reqs, check_supported_wheels=True) - downloaded: List[str] = [] - for req in requirement_set.requirements.values(): - if req.satisfied_by is None: - assert req.name is not None - preparer.save_linked_requirement(req) - downloaded.append(req.name) - if downloaded: - write_output("Successfully downloaded %s", " ".join(downloaded)) + if not options.dry_run: + downloaded: List[str] = [] + for req in requirement_set.requirements.values(): + if req.satisfied_by is None: + assert req.name is not None + preparer.save_linked_requirement(req) + downloaded.append(req.name) + if downloaded: + write_output("Successfully downloaded %s", " ".join(downloaded)) + + # The rest of this method pertains to generating the ResolutionReport with + # --report. + if not options.json_report_file: + return SUCCESS + if not isinstance(requirement_set, RequirementSetWithCandidates): + raise CommandError( + "The legacy resolver is being used via " + "--use-deprecated=legacy-resolver." + "The legacy resolver does not retain detailed dependency information, " + "so `pip download --report` cannot be used with it. " + ) + + resolution_result = ResolutionResult.generate_resolve_report( + reqs, requirement_set + ) + + # Write the full report data to the JSON output file. + with open(options.json_report_file, "w") as f: + json.dump(resolution_result.to_dict(), f, indent=4) + write_output(f"JSON report written to '{options.json_report_file}'.") return SUCCESS diff --git a/src/pip/_internal/index/collector.py b/src/pip/_internal/index/collector.py index e6e9469af1a..15c9dbdef54 100644 --- a/src/pip/_internal/index/collector.py +++ b/src/pip/_internal/index/collector.py @@ -8,10 +8,8 @@ import itertools import logging import os -import re import urllib.parse import urllib.request -import xml.etree.ElementTree from html.parser import HTMLParser from optparse import Values from typing import ( @@ -33,12 +31,12 @@ from pip._vendor.requests.exceptions import RetryError, SSLError from pip._internal.exceptions import NetworkConnectionError -from pip._internal.models.link import Link +from pip._internal.models.link import HTMLElement, Link from pip._internal.models.search_scope import SearchScope from pip._internal.network.session import PipSession from pip._internal.network.utils import raise_for_status from pip._internal.utils.filetypes import is_archive_file -from pip._internal.utils.misc import pairwise, redact_auth_from_url +from pip._internal.utils.misc import redact_auth_from_url from pip._internal.vcs import vcs from .sources import CandidatesFromPage, LinkSource, build_source @@ -50,7 +48,6 @@ logger = logging.getLogger(__name__) -HTMLElement = xml.etree.ElementTree.Element ResponseHeaders = MutableMapping[str, str] @@ -182,94 +179,6 @@ def _determine_base_url(document: HTMLElement, page_url: str) -> str: return page_url -def _clean_url_path_part(part: str) -> str: - """ - Clean a "part" of a URL path (i.e. after splitting on "@" characters). - """ - # We unquote prior to quoting to make sure nothing is double quoted. - return urllib.parse.quote(urllib.parse.unquote(part)) - - -def _clean_file_url_path(part: str) -> str: - """ - Clean the first part of a URL path that corresponds to a local - filesystem path (i.e. the first part after splitting on "@" characters). - """ - # We unquote prior to quoting to make sure nothing is double quoted. - # Also, on Windows the path part might contain a drive letter which - # should not be quoted. On Linux where drive letters do not - # exist, the colon should be quoted. We rely on urllib.request - # to do the right thing here. - return urllib.request.pathname2url(urllib.request.url2pathname(part)) - - -# percent-encoded: / -_reserved_chars_re = re.compile("(@|%2F)", re.IGNORECASE) - - -def _clean_url_path(path: str, is_local_path: bool) -> str: - """ - Clean the path portion of a URL. - """ - if is_local_path: - clean_func = _clean_file_url_path - else: - clean_func = _clean_url_path_part - - # Split on the reserved characters prior to cleaning so that - # revision strings in VCS URLs are properly preserved. - parts = _reserved_chars_re.split(path) - - cleaned_parts = [] - for to_clean, reserved in pairwise(itertools.chain(parts, [""])): - cleaned_parts.append(clean_func(to_clean)) - # Normalize %xx escapes (e.g. %2f -> %2F) - cleaned_parts.append(reserved.upper()) - - return "".join(cleaned_parts) - - -def _clean_link(url: str) -> str: - """ - Make sure a link is fully quoted. - For example, if ' ' occurs in the URL, it will be replaced with "%20", - and without double-quoting other characters. - """ - # Split the URL into parts according to the general structure - # `scheme://netloc/path;parameters?query#fragment`. - result = urllib.parse.urlparse(url) - # If the netloc is empty, then the URL refers to a local filesystem path. - is_local_path = not result.netloc - path = _clean_url_path(result.path, is_local_path=is_local_path) - return urllib.parse.urlunparse(result._replace(path=path)) - - -def _create_link_from_element( - element_attribs: Dict[str, Optional[str]], - page_url: str, - base_url: str, -) -> Optional[Link]: - """ - Convert an anchor element's attributes in a simple repository page to a Link. - """ - href = element_attribs.get("href") - if not href: - return None - - url = _clean_link(urllib.parse.urljoin(base_url, href)) - pyrequire = element_attribs.get("data-requires-python") - yanked_reason = element_attribs.get("data-yanked") - - link = Link( - url, - comes_from=page_url, - requires_python=pyrequire, - yanked_reason=yanked_reason, - ) - - return link - - class CacheablePageContent: def __init__(self, page: "HTMLPage") -> None: assert page.cache_link_parsing @@ -326,7 +235,7 @@ def _parse_links_html5lib(page: "HTMLPage") -> Iterable[Link]: url = page.url base_url = _determine_base_url(document, url) for anchor in document.findall(".//a"): - link = _create_link_from_element( + link = Link.from_element( anchor.attrib, page_url=url, base_url=base_url, @@ -353,11 +262,7 @@ def parse_links(page: "HTMLPage", use_deprecated_html5lib: bool) -> Iterable[Lin url = page.url base_url = parser.base_url or url for anchor in parser.anchors: - link = _create_link_from_element( - anchor, - page_url=url, - base_url=base_url, - ) + link = Link.from_element(anchor, page_url=url, base_url=base_url) if link is None: continue yield link diff --git a/src/pip/_internal/metadata/base.py b/src/pip/_internal/metadata/base.py index f1a1ee62faa..1528500c48c 100644 --- a/src/pip/_internal/metadata/base.py +++ b/src/pip/_internal/metadata/base.py @@ -120,6 +120,9 @@ def __repr__(self) -> str: def __str__(self) -> str: return f"{self.raw_name} {self.version}" + def as_serializable_requirement(self) -> Requirement: + raise NotImplementedError() + @property def location(self) -> Optional[str]: """Where the distribution is loaded from. diff --git a/src/pip/_internal/metadata/pkg_resources.py b/src/pip/_internal/metadata/pkg_resources.py index ffde8c77e73..fa49ba37189 100644 --- a/src/pip/_internal/metadata/pkg_resources.py +++ b/src/pip/_internal/metadata/pkg_resources.py @@ -113,6 +113,9 @@ def from_wheel(cls, wheel: Wheel, name: str) -> BaseDistribution: ) return cls(dist) + def as_serializable_requirement(self) -> Requirement: + return self._dist.as_requirement() + @property def location(self) -> Optional[str]: return self._dist.location diff --git a/src/pip/_internal/models/direct_url.py b/src/pip/_internal/models/direct_url.py index e75feda9ca9..9eff12ba3e5 100644 --- a/src/pip/_internal/models/direct_url.py +++ b/src/pip/_internal/models/direct_url.py @@ -1,8 +1,10 @@ """ PEP 610 """ +import abc import json import re import urllib.parse -from typing import Any, Dict, Iterable, Optional, Type, TypeVar, Union +from dataclasses import dataclass +from typing import Any, ClassVar, Dict, Iterable, Optional, Type, TypeVar __all__ = [ "DirectUrl", @@ -47,8 +49,39 @@ def _get_required( return value -def _exactly_one_of(infos: Iterable[Optional["InfoType"]]) -> "InfoType": - infos = [info for info in infos if info is not None] +def _filter_none(**kwargs: Any) -> Dict[str, Any]: + """Make dict excluding None values.""" + return {k: v for k, v in kwargs.items() if v is not None} + + +class InfoType(metaclass=abc.ABCMeta): + """Superclass for the types of metadata that can be stored within a "direct URL".""" + + name: ClassVar[str] + + @classmethod + @abc.abstractmethod + def _from_dict(cls: Type[T], d: Optional[Dict[str, Any]]) -> Optional[T]: + """Parse an instance of this class from a JSON-serializable dict.""" + + @abc.abstractmethod + def _to_dict(self) -> Dict[str, Any]: + """Produce a JSON-serializable dict which can be parsed with `._from_dict()`.""" + + @classmethod + def from_dict(cls, d: Dict[str, Any]) -> "InfoType": + """Parse exactly one of the known subclasses from the dict `d`.""" + return _exactly_one_of( + [ + ArchiveInfo._from_dict(_get(d, dict, "archive_info")), + DirInfo._from_dict(_get(d, dict, "dir_info")), + VcsInfo._from_dict(_get(d, dict, "vcs_info")), + ] + ) + + +def _exactly_one_of(infos: Iterable[Optional[InfoType]]) -> InfoType: + infos = list(filter(None, infos)) if not infos: raise DirectUrlValidationError( "missing one of archive_info, dir_info, vcs_info" @@ -61,23 +94,15 @@ def _exactly_one_of(infos: Iterable[Optional["InfoType"]]) -> "InfoType": return infos[0] -def _filter_none(**kwargs: Any) -> Dict[str, Any]: - """Make dict excluding None values.""" - return {k: v for k, v in kwargs.items() if v is not None} - - -class VcsInfo: - name = "vcs_info" +@dataclass(frozen=True) +class VcsInfo(InfoType): + vcs: str + commit_id: str + requested_revision: Optional[str] = None + resolved_revision: Optional[str] = None + resolved_revision_type: Optional[str] = None - def __init__( - self, - vcs: str, - commit_id: str, - requested_revision: Optional[str] = None, - ) -> None: - self.vcs = vcs - self.requested_revision = requested_revision - self.commit_id = commit_id + name: ClassVar[str] = "vcs_info" @classmethod def _from_dict(cls, d: Optional[Dict[str, Any]]) -> Optional["VcsInfo"]: @@ -97,14 +122,11 @@ def _to_dict(self) -> Dict[str, Any]: ) -class ArchiveInfo: - name = "archive_info" +@dataclass(frozen=True) +class ArchiveInfo(InfoType): + hash: Optional[str] = None - def __init__( - self, - hash: Optional[str] = None, - ) -> None: - self.hash = hash + name: ClassVar[str] = "archive_info" @classmethod def _from_dict(cls, d: Optional[Dict[str, Any]]) -> Optional["ArchiveInfo"]: @@ -116,14 +138,11 @@ def _to_dict(self) -> Dict[str, Any]: return _filter_none(hash=self.hash) -class DirInfo: - name = "dir_info" +@dataclass(frozen=True) +class DirInfo(InfoType): + editable: bool = False - def __init__( - self, - editable: bool = False, - ) -> None: - self.editable = editable + name: ClassVar[str] = "dir_info" @classmethod def _from_dict(cls, d: Optional[Dict[str, Any]]) -> Optional["DirInfo"]: @@ -135,19 +154,11 @@ def _to_dict(self) -> Dict[str, Any]: return _filter_none(editable=self.editable or None) -InfoType = Union[ArchiveInfo, DirInfo, VcsInfo] - - +@dataclass(frozen=True) class DirectUrl: - def __init__( - self, - url: str, - info: InfoType, - subdirectory: Optional[str] = None, - ) -> None: - self.url = url - self.info = info - self.subdirectory = subdirectory + url: str + info: InfoType + subdirectory: Optional[str] = None def _remove_auth_from_netloc(self, netloc: str) -> str: if "@" not in netloc: @@ -184,13 +195,7 @@ def from_dict(cls, d: Dict[str, Any]) -> "DirectUrl": return DirectUrl( url=_get_required(d, str, "url"), subdirectory=_get(d, str, "subdirectory"), - info=_exactly_one_of( - [ - ArchiveInfo._from_dict(_get(d, dict, "archive_info")), - DirInfo._from_dict(_get(d, dict, "dir_info")), - VcsInfo._from_dict(_get(d, dict, "vcs_info")), - ] - ), + info=InfoType.from_dict(d), ) def to_dict(self) -> Dict[str, Any]: diff --git a/src/pip/_internal/models/link.py b/src/pip/_internal/models/link.py index 6069b278b9b..a51f9598fd5 100644 --- a/src/pip/_internal/models/link.py +++ b/src/pip/_internal/models/link.py @@ -1,14 +1,19 @@ import functools +import itertools import logging import os import posixpath import re import urllib.parse -from typing import TYPE_CHECKING, Dict, List, NamedTuple, Optional, Tuple, Union +import xml.etree.ElementTree +from dataclasses import dataclass +from typing import TYPE_CHECKING, Any, Dict, List, NamedTuple, Optional, Tuple, Union +from pip._internal.models.direct_url import ArchiveInfo, DirectUrl from pip._internal.utils.filetypes import WHEEL_EXTENSION from pip._internal.utils.hashes import Hashes from pip._internal.utils.misc import ( + pairwise, redact_auth_from_url, split_auth_from_netloc, splitext, @@ -22,9 +27,131 @@ logger = logging.getLogger(__name__) +HTMLElement = xml.etree.ElementTree.Element + + _SUPPORTED_HASHES = ("sha1", "sha224", "sha384", "sha256", "sha512", "md5") +@dataclass(frozen=True) +class LinkHash: + """Links to content may have embedded hash values. This class parses those. + + `name` must be any member of `_SUPPORTED_HASHES`. + + This class can be converted to and from `ArchiveInfo`. While ArchiveInfo intends to + be JSON-serializable to conform to PEP 610, this class contains the logic for + parsing a hash name and value for correctness, and then checking whether that hash + conforms to a schema with `.is_hash_allowed()`.""" + + name: str + value: str + + # TODO: consider beginning/ending this with \b? Otherwise we risk accepting invalid + # hashes such as "sha256=aa113592bbeg", since this pattern will just terminate the + # search at "aa113592bbe" and discount the "g". + # TODO: consider re.IGNORECASE? + _hash_re = re.compile( + r"({choices})=([a-f0-9]+)".format( + choices="|".join(re.escape(hash_name) for hash_name in _SUPPORTED_HASHES) + ) + ) + + def __post_init__(self) -> None: + assert self._hash_re.match(f"{self.name}={self.value}") + + @classmethod + @functools.lru_cache(maxsize=None) + def split_hash_name_and_value(cls, url: str) -> Optional["LinkHash"]: + """Search a string for a checksum algorithm name and encoded output value.""" + match = cls._hash_re.search(url) + if match is None: + return None + name, value = match.groups() + return cls(name=name, value=value) + + def to_archive_info(self) -> ArchiveInfo: + """Convert to ArchiveInfo to form a DirectUrl instance (see PEP 610).""" + return ArchiveInfo(hash=f"{self.name}={self.value}") + + @classmethod + def from_archive_info(cls, info: ArchiveInfo) -> Optional["LinkHash"]: + """Parse an ArchiveInfo hash into a LinkHash instance.""" + if info.hash is None: + return None + return cls.split_hash_name_and_value(info.hash) + + def is_hash_allowed(self, hashes: Optional[Hashes]) -> bool: + """ + Return True if the current hash is allowed by `hashes`. + """ + if hashes is None: + return False + return hashes.is_hash_allowed(self.name, hex_digest=self.value) + + +def _clean_url_path_part(part: str) -> str: + """ + Clean a "part" of a URL path (i.e. after splitting on "@" characters). + """ + # We unquote prior to quoting to make sure nothing is double quoted. + return urllib.parse.quote(urllib.parse.unquote(part)) + + +def _clean_file_url_path(part: str) -> str: + """ + Clean the first part of a URL path that corresponds to a local + filesystem path (i.e. the first part after splitting on "@" characters). + """ + # We unquote prior to quoting to make sure nothing is double quoted. + # Also, on Windows the path part might contain a drive letter which + # should not be quoted. On Linux where drive letters do not + # exist, the colon should be quoted. We rely on urllib.request + # to do the right thing here. + return urllib.request.pathname2url(urllib.request.url2pathname(part)) + + +# percent-encoded: / +_reserved_chars_re = re.compile("(@|%2F)", re.IGNORECASE) + + +def _clean_url_path(path: str, is_local_path: bool) -> str: + """ + Clean the path portion of a URL. + """ + if is_local_path: + clean_func = _clean_file_url_path + else: + clean_func = _clean_url_path_part + + # Split on the reserved characters prior to cleaning so that + # revision strings in VCS URLs are properly preserved. + parts = _reserved_chars_re.split(path) + + cleaned_parts = [] + for to_clean, reserved in pairwise(itertools.chain(parts, [""])): + cleaned_parts.append(clean_func(to_clean)) + # Normalize %xx escapes (e.g. %2f -> %2F) + cleaned_parts.append(reserved.upper()) + + return "".join(cleaned_parts) + + +def _ensure_quoted_url(url: str) -> str: + """ + Make sure a link is fully quoted. + For example, if ' ' occurs in the URL, it will be replaced with "%20", + and without double-quoting other characters. + """ + # Split the URL into parts according to the general structure + # `scheme://netloc/path;parameters?query#fragment`. + result = urllib.parse.urlparse(url) + # If the netloc is empty, then the URL refers to a local filesystem path. + is_local_path = not result.netloc + path = _clean_url_path(result.path, is_local_path=is_local_path) + return urllib.parse.urlunparse(result._replace(path=path)) + + class Link(KeyBasedCompareMixin): """Represents a parsed link from a Package Index's simple URL""" @@ -34,6 +161,7 @@ class Link(KeyBasedCompareMixin): "comes_from", "requires_python", "yanked_reason", + "dist_info_metadata", "cache_link_parsing", ] @@ -43,6 +171,7 @@ def __init__( comes_from: Optional[Union[str, "HTMLPage"]] = None, requires_python: Optional[str] = None, yanked_reason: Optional[str] = None, + dist_info_metadata: Optional[str] = None, cache_link_parsing: bool = True, ) -> None: """ @@ -59,6 +188,11 @@ def __init__( a simple repository HTML link. If the file has been yanked but no reason was provided, this should be the empty string. See PEP 592 for more information and the specification. + :param dist_info_metadata: the metadata attached to the file, or None if no such + metadata is provided. This is the value of the "data-dist-info-metadata" + attribute, if present, in a simple repository HTML link. This may be parsed + by `URLDownloadInfo.from_link_with_source()`. See PEP 658 for more + information and the specification. :param cache_link_parsing: A flag that is used elsewhere to determine whether resources retrieved from this link should be cached. PyPI index urls should @@ -78,11 +212,41 @@ def __init__( self.comes_from = comes_from self.requires_python = requires_python if requires_python else None self.yanked_reason = yanked_reason + self.dist_info_metadata = dist_info_metadata super().__init__(key=url, defining_class=Link) self.cache_link_parsing = cache_link_parsing + @classmethod + def from_element( + cls, + anchor_attribs: Dict[str, Optional[str]], + page_url: str, + base_url: str, + ) -> Optional["Link"]: + """ + Convert an anchor element's attributes in a simple repository page to a Link. + """ + href = anchor_attribs.get("href") + if not href: + return None + + url = _ensure_quoted_url(urllib.parse.urljoin(base_url, href)) + pyrequire = anchor_attribs.get("data-requires-python") + yanked_reason = anchor_attribs.get("data-yanked") + dist_info_metadata = anchor_attribs.get("data-dist-info-metadata") + + link = Link( + url, + comes_from=page_url, + requires_python=pyrequire, + yanked_reason=yanked_reason, + dist_info_metadata=dist_info_metadata, + ) + + return link + def __str__(self) -> str: if self.requires_python: rp = f" (requires-python:{self.requires_python})" @@ -165,22 +329,21 @@ def subdirectory_fragment(self) -> Optional[str]: return None return match.group(1) - _hash_re = re.compile( - r"({choices})=([a-f0-9]+)".format(choices="|".join(_SUPPORTED_HASHES)) - ) + def get_link_hash(self) -> Optional[LinkHash]: + return LinkHash.split_hash_name_and_value(self._url) @property def hash(self) -> Optional[str]: - match = self._hash_re.search(self._url) - if match: - return match.group(2) + link_hash = self.get_link_hash() + if link_hash is not None: + return link_hash.value return None @property def hash_name(self) -> Optional[str]: - match = self._hash_re.search(self._url) - if match: - return match.group(1) + link_hash = self.get_link_hash() + if link_hash is not None: + return link_hash.name return None @property @@ -210,19 +373,86 @@ def is_yanked(self) -> bool: @property def has_hash(self) -> bool: - return self.hash_name is not None + return self.get_link_hash() is not None def is_hash_allowed(self, hashes: Optional[Hashes]) -> bool: """ - Return True if the link has a hash and it is allowed. + Return True if the link has a hash and it is allowed by `hashes`. """ - if hashes is None or not self.has_hash: + link_hash = self.get_link_hash() + if link_hash is None: return False - # Assert non-None so mypy knows self.hash_name and self.hash are str. - assert self.hash_name is not None - assert self.hash is not None + return link_hash.is_hash_allowed(hashes) + + +@dataclass(frozen=True) +class LinkWithSource: + """Retain a `Link` instance along with all the additional information necessary to + construct a `DirectUrl` instance with `direct_url_from_link`.""" + + link: Link + source_dir: Optional[str] = None + link_is_in_wheel_cache: bool = False + + +@dataclass(frozen=True) +class URLDownloadInfo: + """Retain a `DirectUrl` instance for a `Link` and for any metadata about it. + + The metadata would typically be parsed from a simple PyPI repository supporting + PEP 658.""" + + direct_url: DirectUrl + dist_info_metadata: Optional[DirectUrl] + + @classmethod + def from_link_with_source( + cls, + link_with_source: LinkWithSource, + ) -> "URLDownloadInfo": + """Parse a `DirectUrl` instance from a `Link` and any metadata.""" + from pip._internal.utils.direct_url_helpers import direct_url_from_link + + link = link_with_source.link + + # Implementation of PEP 658 parsing. Note that Link.from_element() parsing the + # "data-dist-info-metadata" attribute from an HTML anchor tag is typically how + # the Link.dist_info_metadata attribute gets set. + dist_info_metadata: Optional[DirectUrl] = None + if link.dist_info_metadata is not None: + metadata_url = f"{link.url_without_fragment}.metadata" + metadata_hash: Optional[ArchiveInfo] = None + # If data-dist-info-metadata="true" is set, then the metadata file exists, + # but there is no information about its checksum or anything else. + if link.dist_info_metadata != "true": + link_hash = LinkHash.split_hash_name_and_value(link.dist_info_metadata) + if link_hash is not None: + metadata_hash = link_hash.to_archive_info() + + dist_info_metadata = DirectUrl( + url=metadata_url, + info=metadata_hash or ArchiveInfo(hash=None), + subdirectory=link.subdirectory_fragment, + ) - return hashes.is_hash_allowed(self.hash_name, hex_digest=self.hash) + return cls( + direct_url=direct_url_from_link( + link, + source_dir=link_with_source.source_dir, + link_is_in_wheel_cache=link_with_source.link_is_in_wheel_cache, + ), + dist_info_metadata=dist_info_metadata, + ) + + def to_dict(self) -> Dict[str, Any]: + """Take advantage of `DirectUrl.to_dict()` to produce a JSON-serializable + representation.""" + return { + "direct_url": self.direct_url.to_dict(), + "dist_info_metadata": ( + self.dist_info_metadata and self.dist_info_metadata.to_dict() + ), + } class _CleanResult(NamedTuple): diff --git a/src/pip/_internal/req/req_install.py b/src/pip/_internal/req/req_install.py index b40d9e251f8..06eb0bca8c2 100644 --- a/src/pip/_internal/req/req_install.py +++ b/src/pip/_internal/req/req_install.py @@ -62,6 +62,21 @@ logger = logging.getLogger(__name__) +def produce_exact_version_specifier(version: str) -> SpecifierSet: + if isinstance(parse_version(version), Version): + op = "==" + else: + op = "===" + + return SpecifierSet(f"{op}{version}") + + +def produce_exact_version_requirement(name: str, version: str) -> Requirement: + specifier = produce_exact_version_specifier(version) + + return Requirement(f"{name}{specifier}") + + class InstallRequirement: """ Represents something that may be installed later on, may have information @@ -350,20 +365,10 @@ def _set_requirement(self) -> None: assert self.metadata is not None assert self.source_dir is not None - # Construct a Requirement object from the generated metadata - if isinstance(parse_version(self.metadata["Version"]), Version): - op = "==" - else: - op = "===" - - self.req = Requirement( - "".join( - [ - self.metadata["Name"], - op, - self.metadata["Version"], - ] - ) + # Construct a Requirement object from the generated metadata. + self.req = produce_exact_version_requirement( + self.metadata["Name"], + self.metadata["Version"], ) def warn_on_mismatching_name(self) -> None: diff --git a/src/pip/_internal/resolution/base.py b/src/pip/_internal/resolution/base.py index 42dade18c1e..283dfb5ed71 100644 --- a/src/pip/_internal/resolution/base.py +++ b/src/pip/_internal/resolution/base.py @@ -1,20 +1,49 @@ -from typing import Callable, List, Optional +import abc +from typing import TYPE_CHECKING, Callable, List, Optional, cast + +from pip._vendor.packaging.utils import NormalizedName from pip._internal.req.req_install import InstallRequirement from pip._internal.req.req_set import RequirementSet +if TYPE_CHECKING: + from pip._vendor.resolvelib.resolvers import Result as RLResult + + from .resolvelib.base import Candidate, Requirement + + Result = RLResult[Requirement, Candidate, str] + InstallRequirementProvider = Callable[ [str, Optional[InstallRequirement]], InstallRequirement ] -class BaseResolver: +# Avoid conflicting with the PyPI package "Python". +REQUIRES_PYTHON_IDENTIFIER = cast(NormalizedName, "") +# Avoid clashing with any package on PyPI, but remain parseable as a Requirement. This +# should only be used for .as_serializable_requirement(). +REQUIRES_PYTHON_SERIALIZABLE_IDENTIFIER = cast(NormalizedName, "Requires-Python") + + +class RequirementSetWithCandidates(RequirementSet): + def __init__( + self, + candidates: "Result", + check_supported_wheels: bool = True, + ) -> None: + self.candidates = candidates + super().__init__(check_supported_wheels=check_supported_wheels) + + +class BaseResolver(metaclass=abc.ABCMeta): + @abc.abstractmethod def resolve( self, root_reqs: List[InstallRequirement], check_supported_wheels: bool ) -> RequirementSet: - raise NotImplementedError() + ... + @abc.abstractmethod def get_installation_order( self, req_set: RequirementSet ) -> List[InstallRequirement]: - raise NotImplementedError() + ... diff --git a/src/pip/_internal/resolution/resolvelib/base.py b/src/pip/_internal/resolution/resolvelib/base.py index b206692a0a9..f8657e1eed6 100644 --- a/src/pip/_internal/resolution/resolvelib/base.py +++ b/src/pip/_internal/resolution/resolvelib/base.py @@ -1,5 +1,7 @@ +import abc from typing import FrozenSet, Iterable, Optional, Tuple, Union +from pip._vendor.packaging.requirements import Requirement as PkgRequirement from pip._vendor.packaging.specifiers import SpecifierSet from pip._vendor.packaging.utils import NormalizedName, canonicalize_name from pip._vendor.packaging.version import LegacyVersion, Version @@ -59,8 +61,8 @@ def is_satisfied_by(self, candidate: "Candidate") -> bool: return self.specifier.contains(candidate.version, prereleases=True) -class Requirement: - @property +class Requirement(metaclass=abc.ABCMeta): + @abc.abstractproperty def project_name(self) -> NormalizedName: """The "project name" of a requirement. @@ -68,25 +70,29 @@ def project_name(self) -> NormalizedName: in which case ``name`` would contain the ``[...]`` part, while this refers to the name of the project. """ - raise NotImplementedError("Subclass should override") - @property + @abc.abstractproperty def name(self) -> str: """The name identifying this requirement in the resolver. This is different from ``project_name`` if this requirement contains extras, where ``project_name`` would not contain the ``[...]`` part. """ - raise NotImplementedError("Subclass should override") def is_satisfied_by(self, candidate: "Candidate") -> bool: return False + @abc.abstractmethod def get_candidate_lookup(self) -> CandidateLookup: - raise NotImplementedError("Subclass should override") + ... + @abc.abstractmethod def format_for_error(self) -> str: - raise NotImplementedError("Subclass should override") + ... + + @abc.abstractmethod + def as_serializable_requirement(self) -> Optional[PkgRequirement]: + ... def _match_link(link: Link, candidate: "Candidate") -> bool: @@ -95,8 +101,8 @@ def _match_link(link: Link, candidate: "Candidate") -> bool: return False -class Candidate: - @property +class Candidate(metaclass=abc.ABCMeta): + @abc.abstractproperty def project_name(self) -> NormalizedName: """The "project name" of the candidate. @@ -104,38 +110,43 @@ def project_name(self) -> NormalizedName: in which case ``name`` would contain the ``[...]`` part, while this refers to the name of the project. """ - raise NotImplementedError("Override in subclass") - @property + @abc.abstractproperty def name(self) -> str: """The name identifying this candidate in the resolver. This is different from ``project_name`` if this candidate contains extras, where ``project_name`` would not contain the ``[...]`` part. """ - raise NotImplementedError("Override in subclass") - @property + @abc.abstractproperty def version(self) -> CandidateVersion: - raise NotImplementedError("Override in subclass") + ... + + @abc.abstractmethod + def as_serializable_requirement(self) -> PkgRequirement: + ... - @property + @abc.abstractproperty def is_installed(self) -> bool: - raise NotImplementedError("Override in subclass") + ... - @property + @abc.abstractproperty def is_editable(self) -> bool: - raise NotImplementedError("Override in subclass") + ... - @property + @abc.abstractproperty def source_link(self) -> Optional[Link]: - raise NotImplementedError("Override in subclass") + ... + @abc.abstractmethod def iter_dependencies(self, with_requires: bool) -> Iterable[Optional[Requirement]]: - raise NotImplementedError("Override in subclass") + ... + @abc.abstractmethod def get_install_requirement(self) -> Optional[InstallRequirement]: - raise NotImplementedError("Override in subclass") + ... + @abc.abstractmethod def format_for_error(self) -> str: - raise NotImplementedError("Subclass should override") + ... diff --git a/src/pip/_internal/resolution/resolvelib/candidates.py b/src/pip/_internal/resolution/resolvelib/candidates.py index d1470ecbf4e..52a7e3f575f 100644 --- a/src/pip/_internal/resolution/resolvelib/candidates.py +++ b/src/pip/_internal/resolution/resolvelib/candidates.py @@ -1,7 +1,8 @@ import logging import sys -from typing import TYPE_CHECKING, Any, FrozenSet, Iterable, Optional, Tuple, Union, cast +from typing import TYPE_CHECKING, Any, FrozenSet, Iterable, Optional, Tuple, Union +from pip._vendor.packaging.requirements import Requirement as PkgRequirement from pip._vendor.packaging.utils import NormalizedName, canonicalize_name from pip._vendor.packaging.version import Version @@ -17,7 +18,11 @@ install_req_from_editable, install_req_from_line, ) -from pip._internal.req.req_install import InstallRequirement +from pip._internal.req.req_install import ( + InstallRequirement, + produce_exact_version_requirement, +) +from pip._internal.resolution.base import REQUIRES_PYTHON_IDENTIFIER from pip._internal.utils.misc import normalize_version_info from .base import Candidate, CandidateVersion, Requirement, format_name @@ -33,9 +38,6 @@ "LinkCandidate", ] -# Avoid conflicting with the PyPI package "Python". -REQUIRES_PYTHON_IDENTIFIER = cast(NormalizedName, "") - def as_base_candidate(candidate: Candidate) -> Optional[BaseCandidate]: """The runtime version of BaseCandidate.""" @@ -163,6 +165,9 @@ def __init__( def __str__(self) -> str: return f"{self.name} {self.version}" + def as_serializable_requirement(self) -> PkgRequirement: + return produce_exact_version_requirement(self.name, str(self.version)) + def __repr__(self) -> str: return "{class_name}({link!r})".format( class_name=self.__class__.__name__, @@ -376,6 +381,9 @@ def name(self) -> str: def version(self) -> CandidateVersion: return self.dist.version + def as_serializable_requirement(self) -> PkgRequirement: + return self.dist.as_serializable_requirement() + @property def is_editable(self) -> bool: return self.dist.editable @@ -458,6 +466,9 @@ def name(self) -> str: def version(self) -> CandidateVersion: return self.base.version + def as_serializable_requirement(self) -> PkgRequirement: + return self.base.as_serializable_requirement() + def format_for_error(self) -> str: return "{} [{}]".format( self.base.format_for_error(), ", ".join(sorted(self.extras)) @@ -540,6 +551,13 @@ def name(self) -> str: def version(self) -> CandidateVersion: return self._version + def as_serializable_requirement(self) -> PkgRequirement: + raise NotImplementedError() + + @property + def is_editable(self) -> bool: + return False + def format_for_error(self) -> str: return f"Python {self.version}" diff --git a/src/pip/_internal/resolution/resolvelib/provider.py b/src/pip/_internal/resolution/resolvelib/provider.py index e6ec9594f62..23988bf2712 100644 --- a/src/pip/_internal/resolution/resolvelib/provider.py +++ b/src/pip/_internal/resolution/resolvelib/provider.py @@ -13,8 +13,9 @@ from pip._vendor.resolvelib.providers import AbstractProvider +from pip._internal.resolution.base import REQUIRES_PYTHON_IDENTIFIER + from .base import Candidate, Constraint, Requirement -from .candidates import REQUIRES_PYTHON_IDENTIFIER from .factory import Factory if TYPE_CHECKING: diff --git a/src/pip/_internal/resolution/resolvelib/reporter.py b/src/pip/_internal/resolution/resolvelib/reporter.py index 6ced5329b81..7739b6d84a0 100644 --- a/src/pip/_internal/resolution/resolvelib/reporter.py +++ b/src/pip/_internal/resolution/resolvelib/reporter.py @@ -1,9 +1,27 @@ from collections import defaultdict +from dataclasses import dataclass, field from logging import getLogger -from typing import Any, DefaultDict +from typing import Any, DefaultDict, Dict, Iterable, List, Optional, Tuple +from pip._vendor.packaging.requirements import Requirement as PkgRequirement +from pip._vendor.packaging.specifiers import SpecifierSet from pip._vendor.resolvelib.reporters import BaseReporter +from pip._internal.models.link import LinkWithSource, URLDownloadInfo +from pip._internal.req.req_install import ( + InstallRequirement, + produce_exact_version_specifier, +) +from pip._internal.resolution.base import RequirementSetWithCandidates +from pip._internal.resolution.resolvelib.candidates import ( + LinkCandidate, + RequiresPythonCandidate, +) +from pip._internal.resolution.resolvelib.requirements import ( + ExplicitRequirement, + RequiresPythonRequirement, +) + from .base import Candidate, Requirement logger = getLogger(__name__) @@ -66,3 +84,173 @@ def backtracking(self, candidate: Candidate) -> None: def pinning(self, candidate: Candidate) -> None: logger.info("Reporter.pinning(%r)", candidate) + + +@dataclass(frozen=True) +class ResolvedCandidate: + """Coalesce all the information pip's resolver retains about an + installation candidate.""" + + req: PkgRequirement + download_info: URLDownloadInfo + dependencies: Tuple[PkgRequirement, ...] + requires_python: Optional[SpecifierSet] + + def to_dict(self) -> Dict[str, Any]: + """Return a JSON-serializable representation of this install candidate.""" + return { + "requirement": str(self.req), + "download_info": self.download_info.to_dict(), + "dependencies": {dep.name: str(dep) for dep in self.dependencies}, + "requires_python": str(self.requires_python) + if self.requires_python + else None, + } + + +@dataclass +class ResolutionResult: + """The inputs and outputs of a pip internal resolve process.""" + + input_requirements: Tuple[str, ...] + python_version: Optional[SpecifierSet] = None + candidates: Dict[str, ResolvedCandidate] = field(default_factory=dict) + + def to_dict(self) -> Dict[str, Any]: + """Return a JSON-serializable representation of the resolve process.""" + return { + "experimental": True, + "input_requirements": [str(req) for req in self.input_requirements], + "python_version": str(self.python_version), + "candidates": { + name: info.to_dict() for name, info in self.candidates.items() + }, + } + + @classmethod + def _extract_hashable_resolve_input( + cls, + reqs: Iterable[InstallRequirement], + ) -> Tuple[str, ...]: + """Reconstruct the input requirements provided to the resolve. + + In theory, pip should be able to be re-run with these arguments to get the same + resolve output. Because pip can accept URLs as well as parseable requirement + strings on the command line, this method returns a list of strings instead of + `PkgRequirement` instances. + + These strings are sorted so that they can be hashed and compared efficiently. + """ + input_requirements: List[str] = [] + for ireq in reqs: + if ireq.req: + # If the initial requirement string contained a url (retained in + # InstallRequirement.link), add it back to the requirement string + # included in the JSON report. + if ireq.link: + req_string = f"{ireq.req}@{ireq.link.url}" + else: + req_string = str(ireq.req) + else: + # If the InstallRequirement has no Requirement information, don't + # produce a Requirement string, but simply reproduce the URL. + assert ireq.link + req_string = ireq.link.url + + input_requirements.append(req_string) + + return tuple(sorted(input_requirements)) + + @classmethod + def generate_resolve_report( + cls, + input_requirements: Iterable[InstallRequirement], + resolved_requirement_set: RequirementSetWithCandidates, + ) -> "ResolutionResult": + """Process the resolve to obtain a JSON-serializable/pretty-printable report.""" + hashable_input = cls._extract_hashable_resolve_input(input_requirements) + resolution_result = cls(input_requirements=hashable_input) + + # (1) Scan all the install candidates from `.candidates`. + for candidate in resolved_requirement_set.candidates.mapping.values(): + + # (2) Map each install candidate back to a specific install requirement from + # `.requirements`. + req = resolved_requirement_set.requirements.get(candidate.name, None) + if req is None: + # Pip will impose an implicit `Requires-Python` constraint upon the + # whole resolve corresponding to the value of the `--python-version` + # argument. This shows up as an installation candidate which does not + # correspond to any requirement from the requirement set. + if isinstance(candidate, RequiresPythonCandidate): + # This candidate should only appear once. + assert resolution_result.python_version is None + # Generate a serializable `SpecifierSet` instance. + resolution_result.python_version = produce_exact_version_specifier( + str(candidate.version) + ) + continue + + # All other types of installation candidates are expected to be found + # within the resolved requirement set. + raise TypeError( + f"unknown candidate not found in requirement set: {candidate}" + ) + assert req.name is not None + assert req.link is not None + # Each project name should only be fulfilled by a single + # installation candidate. + assert req.name not in resolution_result.candidates + + # (3) Scan the dependencies of the installation candidates, which cover both + # normal dependencies as well as Requires-Python information. + requires_python: Optional[SpecifierSet] = None + dependencies: List[PkgRequirement] = [] + for maybe_dep in candidate.iter_dependencies(with_requires=True): + # It's unclear why `.iter_dependencies()` may occasionally yield `None`. + if maybe_dep is None: + continue + + # There will only ever be one python version constraint for each + # candidate, if any. We extract the version specifier. + if isinstance(maybe_dep, RequiresPythonRequirement): + requires_python = maybe_dep.specifier + continue + + # Convert the 2020 resolver-internal Requirement subclass instance into + # a `packaging.requirements.Requirement` instance. + maybe_req = maybe_dep.as_serializable_requirement() + if maybe_req is None: + continue + + # For `ExplicitRequirement`s only, we want to make sure we propagate any + # source URL into a dependency's `packaging.requirements.Requirement` + # instance. + if isinstance(maybe_dep, ExplicitRequirement): + dep_candidate = maybe_dep.candidate + if maybe_req.url is None and isinstance( + dep_candidate, LinkCandidate + ): + assert dep_candidate.source_link is not None + maybe_req = PkgRequirement( + f"{maybe_req}@{dep_candidate.source_link.url}" + ) + + dependencies.append(maybe_req) + + # Mutate the candidates dictionary to add this candidate after processing + # any dependencies and python version requirement. + resolution_result.candidates[req.name] = ResolvedCandidate( + req=candidate.as_serializable_requirement(), + download_info=URLDownloadInfo.from_link_with_source( + LinkWithSource( + req.link, + source_dir=req.source_dir, + link_is_in_wheel_cache=req.original_link_is_in_wheel_cache, + ) + ), + dependencies=tuple(dependencies), + requires_python=requires_python, + ) + + return resolution_result diff --git a/src/pip/_internal/resolution/resolvelib/requirements.py b/src/pip/_internal/resolution/resolvelib/requirements.py index f561f1f1e27..816d5a5709c 100644 --- a/src/pip/_internal/resolution/resolvelib/requirements.py +++ b/src/pip/_internal/resolution/resolvelib/requirements.py @@ -1,7 +1,11 @@ +from typing import Optional + +from pip._vendor.packaging.requirements import Requirement as PkgRequirement from pip._vendor.packaging.specifiers import SpecifierSet from pip._vendor.packaging.utils import NormalizedName, canonicalize_name from pip._internal.req.req_install import InstallRequirement +from pip._internal.resolution.base import REQUIRES_PYTHON_SERIALIZABLE_IDENTIFIER from .base import Candidate, CandidateLookup, Requirement, format_name @@ -29,6 +33,9 @@ def name(self) -> str: # No need to canonicalize - the candidate did this return self.candidate.name + def as_serializable_requirement(self) -> PkgRequirement: + return self.candidate.as_serializable_requirement() + def format_for_error(self) -> str: return self.candidate.format_for_error() @@ -77,6 +84,9 @@ def format_for_error(self) -> str: return ", ".join(parts[:-1]) + " and " + parts[-1] + def as_serializable_requirement(self) -> Optional[PkgRequirement]: + return self._ireq.req + def get_candidate_lookup(self) -> CandidateLookup: return None, self._ireq @@ -120,6 +130,11 @@ def name(self) -> str: def format_for_error(self) -> str: return str(self) + def as_serializable_requirement(self) -> PkgRequirement: + return PkgRequirement( + f"{REQUIRES_PYTHON_SERIALIZABLE_IDENTIFIER}{self.specifier}", + ) + def get_candidate_lookup(self) -> CandidateLookup: if self.specifier.contains(self._candidate.version, prereleases=True): return self._candidate, None @@ -159,6 +174,9 @@ def name(self) -> str: def format_for_error(self) -> str: return str(self) + def as_serializable_requirement(self) -> Optional[PkgRequirement]: + raise NotImplementedError() + def get_candidate_lookup(self) -> CandidateLookup: return None, None diff --git a/src/pip/_internal/resolution/resolvelib/resolver.py b/src/pip/_internal/resolution/resolvelib/resolver.py index 32ef7899ba6..6b8ed47033c 100644 --- a/src/pip/_internal/resolution/resolvelib/resolver.py +++ b/src/pip/_internal/resolution/resolvelib/resolver.py @@ -13,7 +13,11 @@ from pip._internal.operations.prepare import RequirementPreparer from pip._internal.req.req_install import InstallRequirement from pip._internal.req.req_set import RequirementSet -from pip._internal.resolution.base import BaseResolver, InstallRequirementProvider +from pip._internal.resolution.base import ( + BaseResolver, + InstallRequirementProvider, + RequirementSetWithCandidates, +) from pip._internal.resolution.resolvelib.provider import PipProvider from pip._internal.resolution.resolvelib.reporter import ( PipDebuggingReporter, @@ -45,6 +49,7 @@ def __init__( ignore_dependencies: bool, ignore_installed: bool, ignore_requires_python: bool, + dry_run: bool, force_reinstall: bool, upgrade_strategy: str, suppress_build_failures: bool, @@ -66,12 +71,20 @@ def __init__( py_version_info=py_version_info, ) self.ignore_dependencies = ignore_dependencies + # TODO: for performance, try to decouple extracting sdist metadata from + # actually building the sdist. See https://github.com/pypa/pip/issues/8929. + # As mentioned in that issue, PEP 658 support on PyPI would address many cases, + # but it would drastically improve performance for many existing packages if we + # attempted to extract PKG-INFO or .egg-info from non-wheel files, falling back + # to the slower setup.py invocation if not found. LazyZipOverHTTP and + # MemoryWheel already implement such a hack for wheel files specifically. + self.dry_run = dry_run self.upgrade_strategy = upgrade_strategy self._result: Optional[Result] = None def resolve( self, root_reqs: List[InstallRequirement], check_supported_wheels: bool - ) -> RequirementSet: + ) -> RequirementSetWithCandidates: collected = self.factory.collect_root_requirements(root_reqs) provider = PipProvider( factory=self.factory, @@ -102,7 +115,9 @@ def resolve( ) raise error from e - req_set = RequirementSet(check_supported_wheels=check_supported_wheels) + req_set = RequirementSetWithCandidates( + candidates=result, check_supported_wheels=check_supported_wheels + ) for candidate in result.mapping.values(): ireq = candidate.get_install_requirement() if ireq is None: @@ -159,8 +174,10 @@ def resolve( req_set.add_named_requirement(ireq) - reqs = req_set.all_requirements - self.factory.preparer.prepare_linked_requirements_more(reqs) + if not self.dry_run: + reqs = req_set.all_requirements + self.factory.preparer.prepare_linked_requirements_more(reqs) + return req_set def get_installation_order( diff --git a/tests/functional/test_download.py b/tests/functional/test_download.py index ace2ff74c5b..7551248321e 100644 --- a/tests/functional/test_download.py +++ b/tests/functional/test_download.py @@ -1,12 +1,22 @@ +import json import os.path import shutil import textwrap +import uuid from hashlib import sha256 -from typing import List +from typing import Any, Callable, Dict, List, Optional, Tuple import pytest +from pip._vendor.packaging.requirements import Requirement from pip._internal.cli.status_codes import ERROR +from pip._internal.models.direct_url import ( + ArchiveInfo, + DirectUrl, + DirInfo, + InfoType, + VcsInfo, +) from pip._internal.utils.urls import path_to_url from tests.conftest import MockServer, ScriptFactory from tests.lib import PipTestEnvironment, TestData, create_really_basic_wheel @@ -90,6 +100,17 @@ def test_basic_download_should_download_dependencies( result.did_not_create(script.site_packages / "openid") +@pytest.mark.network +def test_dry_run_should_not_download_dependencies( + script: PipTestEnvironment, +) -> None: + """ + It should not download dependencies into the scratch path. + """ + result = script.pip("download", "--dry-run", "Paste[openid]==1.7.5.1", "-d", ".") + result.did_not_create(Path("scratch") / "Paste-1.7.5.1.tar.gz") + + def test_download_wheel_archive(script: PipTestEnvironment, data: TestData) -> None: """ It should download a wheel archive path @@ -1163,3 +1184,284 @@ def test_download_editable( downloads = os.listdir(download_dir) assert len(downloads) == 1 assert downloads[0].endswith(".zip") + + +@pytest.fixture(scope="function") +def json_report( + shared_script: PipTestEnvironment, tmpdir: Path +) -> Callable[..., Dict[str, Any]]: + """Execute `pip download --report` and parse the JSON file it writes out.""" + download_dir = tmpdir / "report" + download_dir.mkdir() + downloaded_path = download_dir / "report.json" + + def execute_pip_for_report_json(*args: str) -> Dict[str, Any]: + shared_script.pip( + "download", + "--dry-run", + f"--report={downloaded_path}", + *args, + ) + + assert downloaded_path.exists() + + with open(downloaded_path, "r") as f: + report = json.load(f) + + return report + + return execute_pip_for_report_json + + +@pytest.mark.network +@pytest.mark.parametrize( + "package_name, package_filename, requirement, url_no_fragment, info", + [ + ("simple", "simple-1.0.tar.gz", "simple==1.0", None, ArchiveInfo(hash=None)), + ( + "simplewheel", + "simplewheel-1.0-py2.py3-none-any.whl", + "simplewheel==1.0", + None, + ArchiveInfo(hash=None), + ), + ( + "pip-test-package", + "git+https://github.com/pypa/pip-test-package.git", + "pip-test-package==0.1.1", + "https://github.com/pypa/pip-test-package.git", + VcsInfo(vcs="git", commit_id="5547fa909e83df8bd743d3978d6667497983a4b7"), + ), + ("symlinks", "symlinks", "symlinks==0.1.dev0", None, DirInfo(editable=False)), + ( + "pex", + "https://files.pythonhosted.org/packages/6f/7f/6b1e56fc291df523a02769ebe9b432f63f294475012c2c1f76d4cbb5321f/pex-2.1.61-py2.py3-none-any.whl#sha256=c09fda0f0477f3894f7a7a464b7e4c03d44734de46caddd25291565eed32a882", # noqa: E501 + "pex==2.1.61", + "https://files.pythonhosted.org/packages/6f/7f/6b1e56fc291df523a02769ebe9b432f63f294475012c2c1f76d4cbb5321f/pex-2.1.61-py2.py3-none-any.whl", # noqa: E501 + ArchiveInfo( + hash="sha256=c09fda0f0477f3894f7a7a464b7e4c03d44734de46caddd25291565eed32a882" # noqa: E501 + ), + ), + ], +) +def test_download_report_direct_url_top_level( + json_report: Callable[..., Dict[str, Any]], + shared_data: TestData, + package_name: str, + package_filename: str, + requirement: str, + url_no_fragment: Optional[str], + info: InfoType, +) -> None: + """Test `pip download --report`'s "download_info" JSON field.""" + # If we are not referring to an explicit URL in our test parameterization, assume we + # are referring to one of our test packages. + if "://" in package_filename: + simple_pkg = package_filename + else: + simple_pkg = path_to_url(str(shared_data.packages / package_filename)) + + report = json_report("--no-index", simple_pkg) + + assert len(report["input_requirements"]) == 1 + # Wheel file paths provided as inputs will be converted into an equivalent + # Requirement string 'a==x.y@scheme://path/to/wheel' instead of just the wheel path. + assert report["input_requirements"][0].endswith(simple_pkg) + + candidate = report["candidates"][package_name] + assert requirement == candidate["requirement"] + direct_url = DirectUrl.from_dict(candidate["download_info"]["direct_url"]) + assert direct_url == DirectUrl( + url_no_fragment or simple_pkg, + info=info, + ) + + +@pytest.mark.network +def test_download_report_dependencies( + json_report: Callable[..., Dict[str, Any]], +) -> None: + """Test the result of a pinned resolve against PyPI.""" + report = json_report("cryptography==36.0.1", "cffi==1.15.0", "pycparser==2.21") + assert sorted(report["input_requirements"]) == [ + "cffi==1.15.0", + "cryptography==36.0.1", + "pycparser==2.21", + ] + + cryptography = report["candidates"]["cryptography"] + assert cryptography["requirement"] == "cryptography==36.0.1" + assert cryptography["requires_python"] == ">=3.6" + assert cryptography["dependencies"] == {"cffi": "cffi>=1.12"} + + cffi = report["candidates"]["cffi"] + assert cffi["requirement"] == "cffi==1.15.0" + assert cffi["requires_python"] is None + assert cffi["dependencies"] == {"pycparser": "pycparser"} + + pycparser = report["candidates"]["pycparser"] + assert pycparser["requirement"] == "pycparser==2.21" + assert pycparser["dependencies"] == {} + assert pycparser["requires_python"] == "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,>=2.7" + + +@pytest.mark.network +@pytest.mark.parametrize( + "python_version", + [ + "3.10.0", + "3.10.1", + "3.7.0", + "3.8.0", + "3.9.0", + ], +) +def test_download_report_python_version( + json_report: Callable[..., Dict[str, Any]], + python_version: str, +) -> None: + """Ensure the --python-version variable is respected in the --report JSON output.""" + report = json_report( + f"--python-version={python_version}", "--only-binary=:all:", "wheel" + ) + assert report["python_version"] == f"=={python_version}" + + +@pytest.fixture(scope="function") +def index_html_content(tmpdir: Path) -> Callable[..., Path]: + """Generate a PyPI package index.html within a temporary local directory.""" + html_dir = tmpdir / "index_html_content" + html_dir.mkdir() + + def generate_index_html_subdir(index_html: str) -> Path: + """Create a new subdirectory after a UUID and write an index.html.""" + new_subdir = html_dir / uuid.uuid4().hex + new_subdir.mkdir() + + with open(new_subdir / "index.html", "w") as f: + f.write(index_html) + + return new_subdir + + return generate_index_html_subdir + + +@pytest.fixture(scope="function") +def json_report_for_index_content( + shared_data: TestData, + index_html_content: Callable[..., Path], + json_report: Callable[..., Dict[str, Any]], +) -> Callable[..., Dict[str, Any]]: + """Generate a PyPI package index within a local directory pointing to test data.""" + + def generate_index_and_report_for_some_packages( + packages: Dict[str, List[Tuple[str, str]]], *args: str + ) -> Dict[str, Any]: + """ + Produce a PyPI directory structure pointing to a subset of packages in + test data, then execute `pip download --report ... -i ...` pointing to our + generated index. + """ + # (1) Generate the content for a PyPI index.html. + pkg_links = "\n".join( + f' {pkg}' for pkg in packages.keys() + ) + index_html = f"""\ + + + + + Simple index + + +{pkg_links} + +""" + # (2) Generate the index.html in a new subdirectory of the temp directory. + index_html_subdir = index_html_content(index_html) + + # (3) Generate subdirectories for individual packages, each with their own + # index.html. + for pkg, links in packages.items(): + pkg_subdir = index_html_subdir / pkg + pkg_subdir.mkdir() + + download_links: List[str] = [] + for relative_path, additional_tag in links: + # For each link to be added to the generated index.html for this + # package, copy over the corresponding file in `shared_data.packages`. + download_links.append( + f' {relative_path}
' # noqa: E501 + ) + shutil.copy( + shared_data.packages / relative_path, pkg_subdir / relative_path + ) + + # After collating all the download links and copying over the files, write + # an index.html with the generated download links for each copied file. + download_links_str = "\n".join(download_links) + pkg_index_content = f"""\ + + + + + Links for {pkg} + + +

Links for {pkg}

+{download_links_str} + +""" + with open(pkg_subdir / "index.html", "w") as f: + f.write(pkg_index_content) + + return json_report("-i", path_to_url(index_html_subdir), *args) + + return generate_index_and_report_for_some_packages + + +_simple_packages: Dict[str, List[Tuple[str, str]]] = { + "simple": [ + ("simple-1.0.tar.gz", ""), + ("simple-2.0.tar.gz", 'data-dist-info-metadata="true"'), + ("simple-3.0.tar.gz", 'data-dist-info-metadata="sha256=aabe42af"'), + ] +} + + +@pytest.mark.parametrize( + "requirement_to_download, dist_info_metadata", + [ + ( + "simple==1.0", + None, + ), + ( + "simple==2.0", + ArchiveInfo(hash=None), + ), + ( + "simple==3.0", + ArchiveInfo(hash="sha256=aabe42af"), + ), + ], +) +def test_download_report_dist_info_metadata( + json_report_for_index_content: Callable[..., Dict[str, Any]], + requirement_to_download: str, + dist_info_metadata: Optional[ArchiveInfo], +) -> None: + """Ensure `pip download --report` reflects PEP 658 metadata.""" + report = json_report_for_index_content( + _simple_packages, + requirement_to_download, + ) + project_name = Requirement(requirement_to_download).name + direct_url_json = report["candidates"][project_name]["download_info"][ + "dist_info_metadata" + ] + if dist_info_metadata is None: + assert direct_url_json is None + else: + direct_url = DirectUrl.from_dict(direct_url_json) + assert direct_url.info == dist_info_metadata diff --git a/tests/functional/test_freeze.py b/tests/functional/test_freeze.py index bae9eadbd30..83d1f959ee6 100644 --- a/tests/functional/test_freeze.py +++ b/tests/functional/test_freeze.py @@ -1,3 +1,4 @@ +import dataclasses import os import re import sys @@ -1015,7 +1016,13 @@ def test_freeze_pep610_editable(script: PipTestEnvironment) -> None: with open(direct_url_path) as f: direct_url = DirectUrl.from_json(f.read()) assert isinstance(direct_url.info, DirInfo) - direct_url.info.editable = True + direct_url = dataclasses.replace( + direct_url, + info=dataclasses.replace( + direct_url.info, + editable=True, + ), + ) with open(direct_url_path, "w") as f: f.write(direct_url.to_json()) result = script.pip("freeze") diff --git a/tests/functional/test_list.py b/tests/functional/test_list.py index b9d0f0fa340..be6ff5f0d9a 100644 --- a/tests/functional/test_list.py +++ b/tests/functional/test_list.py @@ -1,3 +1,4 @@ +import dataclasses import json import os @@ -744,7 +745,9 @@ def test_list_pep610_editable(script: PipTestEnvironment) -> None: with open(direct_url_path) as f: direct_url = DirectUrl.from_json(f.read()) assert isinstance(direct_url.info, DirInfo) - direct_url.info.editable = True + direct_url = dataclasses.replace( + direct_url, info=dataclasses.replace(direct_url.info, editable=True) + ) with open(direct_url_path, "w") as f: f.write(direct_url.to_json()) result = script.pip("list", "--format=json") diff --git a/tests/lib/server.py b/tests/lib/server.py index 95cc6a23e34..39da62ca36b 100644 --- a/tests/lib/server.py +++ b/tests/lib/server.py @@ -150,14 +150,6 @@ def html5_page(text: str) -> str: ) -def index_page(spec: Dict[str, str]) -> "WSGIApplication": - def link(name: str, value: str) -> str: - return '{}'.format(value, name) - - links = "".join(link(*kv) for kv in spec.items()) - return text_html_response(html5_page(links)) - - def package_page(spec: Dict[str, str]) -> "WSGIApplication": def link(name: str, value: str) -> str: return '{}'.format(value, name) diff --git a/tests/unit/resolution_resolvelib/test_resolver.py b/tests/unit/resolution_resolvelib/test_resolver.py index db71f911acd..50816c1e526 100644 --- a/tests/unit/resolution_resolvelib/test_resolver.py +++ b/tests/unit/resolution_resolvelib/test_resolver.py @@ -28,6 +28,7 @@ def resolver(preparer: RequirementPreparer, finder: PackageFinder) -> Resolver: ignore_installed=False, ignore_requires_python=False, force_reinstall=False, + dry_run=True, upgrade_strategy="to-satisfy-only", suppress_build_failures=False, ) diff --git a/tests/unit/test_collector.py b/tests/unit/test_collector.py index f77794b55b9..e0d7277403d 100644 --- a/tests/unit/test_collector.py +++ b/tests/unit/test_collector.py @@ -2,6 +2,7 @@ import logging import os.path import re +import urllib.parse import urllib.request import uuid from textwrap import dedent @@ -10,13 +11,12 @@ import pytest from pip._vendor import html5lib, requests +from pip._vendor.packaging.requirements import Requirement from pip._internal.exceptions import NetworkConnectionError from pip._internal.index.collector import ( HTMLPage, LinkCollector, - _clean_link, - _clean_url_path, _determine_base_url, _get_html_page, _get_html_response, @@ -27,13 +27,35 @@ ) from pip._internal.index.sources import _FlatDirectorySource, _IndexDirectorySource from pip._internal.models.candidate import InstallationCandidate +from pip._internal.models.direct_url import ArchiveInfo, DirectUrl from pip._internal.models.index import PyPI -from pip._internal.models.link import Link +from pip._internal.models.link import ( + Link, + LinkHash, + LinkWithSource, + URLDownloadInfo, + _clean_url_path, +) from pip._internal.network.session import PipSession from tests.lib import TestData, make_test_link_collector from tests.lib.path import Path +def _clean_link(url: str) -> str: + """ + Make sure a link is fully quoted. + For example, if ' ' occurs in the URL, it will be replaced with "%20", + and without double-quoting other characters. + """ + # Split the URL into parts according to the general structure + # `scheme://netloc/path;parameters?query#fragment`. + result = urllib.parse.urlparse(url) + # If the netloc is empty, then the URL refers to a local filesystem path. + is_local_path = not result.netloc + path = _clean_url_path(result.path, is_local_path=is_local_path) + return urllib.parse.urlunparse(result._replace(path=path)) + + @pytest.mark.parametrize( "url", [ @@ -420,7 +442,7 @@ def test_clean_link(url: str, clean_url: str) -> None: def _test_parse_links_data_attribute( anchor_html: str, attr: str, expected: Optional[str] -) -> None: +) -> Link: html = ( "" '' @@ -438,6 +460,7 @@ def _test_parse_links_data_attribute( (link,) = links actual = getattr(link, attr) assert actual == expected + return link @pytest.mark.parametrize( @@ -494,6 +517,78 @@ def test_parse_links__yanked_reason(anchor_html: str, expected: Optional[str]) - _test_parse_links_data_attribute(anchor_html, "yanked_reason", expected) +# Requirement objects do not == each other unless they point to the same instance! +_pkg1_requirement = Requirement("pkg1==1.0") + + +@pytest.mark.parametrize( + "anchor_html, expected, download_info", + [ + # Test not present. + ( + '', + None, + URLDownloadInfo( + DirectUrl( + "https://example.com/pkg1-1.0.tar.gz", ArchiveInfo(hash=None) + ), + None, + ), + ), + # Test with value "true". + ( + '', + "true", + URLDownloadInfo( + DirectUrl( + "https://example.com/pkg1-1.0.tar.gz", ArchiveInfo(hash=None) + ), + DirectUrl( + url="https://example.com/pkg1-1.0.tar.gz.metadata", + info=ArchiveInfo(hash=None), + ), + ), + ), + # Test with a provided hash value. + ( + '', # noqa: E501 + "sha256=aa113592bbe", + URLDownloadInfo( + DirectUrl( + "https://example.com/pkg1-1.0.tar.gz", ArchiveInfo(hash=None) + ), + DirectUrl( + url="https://example.com/pkg1-1.0.tar.gz.metadata", + info=ArchiveInfo(hash="sha256=aa113592bbe"), + ), + ), + ), + # Test with a provided hash value for both the requirement as well as metadata. + ( + '', # noqa: E501 + "sha256=aa113592bbe", + URLDownloadInfo( + DirectUrl( + "https://example.com/pkg1-1.0.tar.gz", + ArchiveInfo(hash="sha512=abc132409cb"), + ), + DirectUrl( + url="https://example.com/pkg1-1.0.tar.gz.metadata", + info=ArchiveInfo(hash="sha256=aa113592bbe"), + ), + ), + ), + ], +) +def test_parse_links__dist_info_metadata( + anchor_html: str, + expected: Optional[str], + download_info: URLDownloadInfo, +) -> None: + link = _test_parse_links_data_attribute(anchor_html, "dist_info_metadata", expected) + assert URLDownloadInfo.from_link_with_source(LinkWithSource(link)) == download_info + + def test_parse_links_caches_same_page_by_url() -> None: html = ( "" @@ -933,3 +1028,56 @@ def expand_path(path: str) -> str: expected_temp2_dir = os.path.normcase(temp2_dir) assert search_scope.find_links == ["~/temp1", expected_temp2_dir] assert search_scope.index_urls == ["default_url"] + + +@pytest.mark.parametrize( + "url, result", + [ + ( + "https://pypi.org/pip-18.0.tar.gz#sha256=aa113592bbe", + LinkHash("sha256", "aa113592bbe"), + ), + ( + "https://pypi.org/pip-18.0.tar.gz#md5=aa113592bbe", + LinkHash("md5", "aa113592bbe"), + ), + ("https://pypi.org/pip-18.0.tar.gz#sha256=gaa113592bbe", None), + ("https://pypi.org/pip-18.0.tar.gz", None), + ("https://pypi.org/pip-18.0.tar.gz#sha500=aa113592bbe", None), + ], +) +def test_link_hash_parsing(url: str, result: Optional[LinkHash]) -> None: + assert LinkHash.split_hash_name_and_value(url) == result + + +@pytest.mark.parametrize( + "archive_info, link_hash", + [ + ( + ArchiveInfo(hash=None), + None, + ), + ( + ArchiveInfo(hash="sha256=aabe42af"), + LinkHash(name="sha256", value="aabe42af"), + ), + # Test invalid hash strings, which ArchiveInfo doesn't validate. + ( + # Invalid hash name. + ArchiveInfo(hash="sha500=aabe42af"), + None, + ), + ( + # Invalid hash value. + ArchiveInfo(hash="sha256=g42afbe"), + None, + ), + ], +) +def test_link_hash_archive_info_fungibility( + archive_info: ArchiveInfo, + link_hash: Optional[LinkHash], +) -> None: + assert LinkHash.from_archive_info(archive_info) == link_hash + if link_hash is not None: + assert link_hash.to_archive_info() == archive_info diff --git a/tests/unit/test_direct_url_helpers.py b/tests/unit/test_direct_url_helpers.py index 8d94aeb50b6..08af0b01fd2 100644 --- a/tests/unit/test_direct_url_helpers.py +++ b/tests/unit/test_direct_url_helpers.py @@ -1,3 +1,4 @@ +import dataclasses from functools import partial from unittest import mock @@ -22,14 +23,20 @@ def test_as_pep440_requirement_archive() -> None: direct_url_as_pep440_direct_reference(direct_url, "pkg") == "pkg @ file:///home/user/archive.tgz" ) - direct_url.subdirectory = "subdir" + direct_url = dataclasses.replace(direct_url, subdirectory="subdir") direct_url.validate() assert ( direct_url_as_pep440_direct_reference(direct_url, "pkg") == "pkg @ file:///home/user/archive.tgz#subdirectory=subdir" ) assert isinstance(direct_url.info, ArchiveInfo) - direct_url.info.hash = "sha1=1b8c5bc61a86f377fea47b4276c8c8a5842d2220" + assert direct_url.info.hash is None + direct_url = dataclasses.replace( + direct_url, + info=dataclasses.replace( + direct_url.info, hash="sha1=1b8c5bc61a86f377fea47b4276c8c8a5842d2220" + ), + ) direct_url.validate() assert ( direct_url_as_pep440_direct_reference(direct_url, "pkg") @@ -76,7 +83,7 @@ def test_as_pep440_requirement_vcs() -> None: == "pkg @ git+https:///g.c/u/p.git" "@1b8c5bc61a86f377fea47b4276c8c8a5842d2220" ) - direct_url.subdirectory = "subdir" + direct_url = dataclasses.replace(direct_url, subdirectory="subdir") direct_url.validate() assert ( direct_url_as_pep440_direct_reference(direct_url, "pkg")