Skip to content

Commit c43cda6

Browse files
implement --report
- create LinkWithSource to retain attrs from InstallRequirement - add tests for report output for top-level requirements - add tests for more of the report JSON format - add passing tests for JSON report output including PEP 658! - add docstrings to several classes and functions, including tests! - move the --report implementation into resolvelib - use an abstract base class instead of a Union for InfoType - use frozen dataclasses for InfoType subclasses
1 parent 9c8dfa6 commit c43cda6

20 files changed

+1106
-241
lines changed

src/pip/_internal/commands/download.py

+41-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import json
12
import logging
23
import os
34
from optparse import Values
@@ -7,7 +8,10 @@
78
from pip._internal.cli.cmdoptions import make_target_python
89
from pip._internal.cli.req_command import RequirementCommand, with_cleanup
910
from pip._internal.cli.status_codes import SUCCESS
11+
from pip._internal.exceptions import CommandError
1012
from pip._internal.req.req_tracker import get_requirement_tracker
13+
from pip._internal.resolution.base import RequirementSetWithCandidates
14+
from pip._internal.resolution.resolvelib.reporter import ResolutionResult
1115
from pip._internal.utils.misc import ensure_dir, normalize_path, write_output
1216
from pip._internal.utils.temp_dir import TempDirectory
1317

@@ -66,7 +70,22 @@ def add_options(self) -> None:
6670
"--dry-run",
6771
dest="dry_run",
6872
action="store_true",
69-
help="Avoid actually downloading wheels.",
73+
help=(
74+
"Avoid actually downloading wheels or sdists. "
75+
"Intended to be used with --report."
76+
),
77+
)
78+
79+
self.cmd_opts.add_option(
80+
"--report",
81+
"--resolution-report",
82+
dest="json_report_file",
83+
metavar="file",
84+
default=None,
85+
help=(
86+
"Print a JSON object representing the resolve into <file>. "
87+
"Often used with --dry-run."
88+
),
7089
)
7190

7291
cmdoptions.add_target_python_options(self.cmd_opts)
@@ -145,4 +164,25 @@ def run(self, options: Values, args: List[str]) -> int:
145164
if downloaded:
146165
write_output("Successfully downloaded %s", " ".join(downloaded))
147166

167+
# The rest of this method pertains to generating the ResolutionReport with
168+
# --report.
169+
if not options.json_report_file:
170+
return SUCCESS
171+
if not isinstance(requirement_set, RequirementSetWithCandidates):
172+
raise CommandError(
173+
"The legacy resolver is being used via "
174+
"--use-deprecated=legacy-resolver."
175+
"The legacy resolver does not retain detailed dependency information, "
176+
"so `pip download --report` cannot be used with it. "
177+
)
178+
179+
resolution_result = ResolutionResult.generate_resolve_report(
180+
reqs, requirement_set
181+
)
182+
183+
# Write the full report data to the JSON output file.
184+
with open(options.json_report_file, "w") as f:
185+
json.dump(resolution_result.to_dict(), f, indent=4)
186+
write_output(f"JSON report written to '{options.json_report_file}'.")
187+
148188
return SUCCESS

src/pip/_internal/index/collector.py

+3-98
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,8 @@
88
import itertools
99
import logging
1010
import os
11-
import re
1211
import urllib.parse
1312
import urllib.request
14-
import xml.etree.ElementTree
1513
from optparse import Values
1614
from typing import (
1715
Callable,
@@ -29,19 +27,18 @@
2927
from pip._vendor.requests.exceptions import RetryError, SSLError
3028

3129
from pip._internal.exceptions import NetworkConnectionError
32-
from pip._internal.models.link import Link
30+
from pip._internal.models.link import HTMLElement, Link
3331
from pip._internal.models.search_scope import SearchScope
3432
from pip._internal.network.session import PipSession
3533
from pip._internal.network.utils import raise_for_status
3634
from pip._internal.utils.filetypes import is_archive_file
37-
from pip._internal.utils.misc import pairwise, redact_auth_from_url
35+
from pip._internal.utils.misc import redact_auth_from_url
3836
from pip._internal.vcs import vcs
3937

4038
from .sources import CandidatesFromPage, LinkSource, build_source
4139

4240
logger = logging.getLogger(__name__)
4341

44-
HTMLElement = xml.etree.ElementTree.Element
4542
ResponseHeaders = MutableMapping[str, str]
4643

4744

@@ -171,94 +168,6 @@ def _determine_base_url(document: HTMLElement, page_url: str) -> str:
171168
return page_url
172169

173170

174-
def _clean_url_path_part(part: str) -> str:
175-
"""
176-
Clean a "part" of a URL path (i.e. after splitting on "@" characters).
177-
"""
178-
# We unquote prior to quoting to make sure nothing is double quoted.
179-
return urllib.parse.quote(urllib.parse.unquote(part))
180-
181-
182-
def _clean_file_url_path(part: str) -> str:
183-
"""
184-
Clean the first part of a URL path that corresponds to a local
185-
filesystem path (i.e. the first part after splitting on "@" characters).
186-
"""
187-
# We unquote prior to quoting to make sure nothing is double quoted.
188-
# Also, on Windows the path part might contain a drive letter which
189-
# should not be quoted. On Linux where drive letters do not
190-
# exist, the colon should be quoted. We rely on urllib.request
191-
# to do the right thing here.
192-
return urllib.request.pathname2url(urllib.request.url2pathname(part))
193-
194-
195-
# percent-encoded: /
196-
_reserved_chars_re = re.compile("(@|%2F)", re.IGNORECASE)
197-
198-
199-
def _clean_url_path(path: str, is_local_path: bool) -> str:
200-
"""
201-
Clean the path portion of a URL.
202-
"""
203-
if is_local_path:
204-
clean_func = _clean_file_url_path
205-
else:
206-
clean_func = _clean_url_path_part
207-
208-
# Split on the reserved characters prior to cleaning so that
209-
# revision strings in VCS URLs are properly preserved.
210-
parts = _reserved_chars_re.split(path)
211-
212-
cleaned_parts = []
213-
for to_clean, reserved in pairwise(itertools.chain(parts, [""])):
214-
cleaned_parts.append(clean_func(to_clean))
215-
# Normalize %xx escapes (e.g. %2f -> %2F)
216-
cleaned_parts.append(reserved.upper())
217-
218-
return "".join(cleaned_parts)
219-
220-
221-
def _clean_link(url: str) -> str:
222-
"""
223-
Make sure a link is fully quoted.
224-
For example, if ' ' occurs in the URL, it will be replaced with "%20",
225-
and without double-quoting other characters.
226-
"""
227-
# Split the URL into parts according to the general structure
228-
# `scheme://netloc/path;parameters?query#fragment`.
229-
result = urllib.parse.urlparse(url)
230-
# If the netloc is empty, then the URL refers to a local filesystem path.
231-
is_local_path = not result.netloc
232-
path = _clean_url_path(result.path, is_local_path=is_local_path)
233-
return urllib.parse.urlunparse(result._replace(path=path))
234-
235-
236-
def _create_link_from_element(
237-
anchor: HTMLElement,
238-
page_url: str,
239-
base_url: str,
240-
) -> Optional[Link]:
241-
"""
242-
Convert an anchor element in a simple repository page to a Link.
243-
"""
244-
href = anchor.get("href")
245-
if not href:
246-
return None
247-
248-
url = _clean_link(urllib.parse.urljoin(base_url, href))
249-
pyrequire = anchor.get("data-requires-python")
250-
yanked_reason = anchor.get("data-yanked")
251-
252-
link = Link(
253-
url,
254-
comes_from=page_url,
255-
requires_python=pyrequire,
256-
yanked_reason=yanked_reason,
257-
)
258-
259-
return link
260-
261-
262171
class CacheablePageContent:
263172
def __init__(self, page: "HTMLPage") -> None:
264173
assert page.cache_link_parsing
@@ -307,11 +216,7 @@ def parse_links(page: "HTMLPage") -> Iterable[Link]:
307216
url = page.url
308217
base_url = _determine_base_url(document, url)
309218
for anchor in document.findall(".//a"):
310-
link = _create_link_from_element(
311-
anchor,
312-
page_url=url,
313-
base_url=base_url,
314-
)
219+
link = Link.from_element(anchor, page_url=url, base_url=base_url)
315220
if link is None:
316221
continue
317222
yield link

src/pip/_internal/metadata/base.py

+3
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,9 @@ def __repr__(self) -> str:
101101
def __str__(self) -> str:
102102
return f"{self.raw_name} {self.version}"
103103

104+
def as_serializable_requirement(self) -> Requirement:
105+
raise NotImplementedError()
106+
104107
@property
105108
def location(self) -> Optional[str]:
106109
"""Where the distribution is loaded from.

src/pip/_internal/metadata/pkg_resources.py

+3
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,9 @@ def from_wheel(cls, wheel: Wheel, name: str) -> "Distribution":
120120
)
121121
return cls(dist)
122122

123+
def as_serializable_requirement(self) -> Requirement:
124+
return self._dist.as_requirement()
125+
123126
@property
124127
def location(self) -> Optional[str]:
125128
return self._dist.location

src/pip/_internal/models/direct_url.py

+57-56
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
""" PEP 610 """
2+
import abc
23
import json
34
import re
45
import urllib.parse
5-
from typing import Any, Dict, Iterable, Optional, Type, TypeVar, Union
6+
from dataclasses import dataclass
7+
from typing import Any, ClassVar, Dict, Iterable, Optional, Type, TypeVar
68

79
__all__ = [
810
"DirectUrl",
@@ -47,8 +49,39 @@ def _get_required(
4749
return value
4850

4951

50-
def _exactly_one_of(infos: Iterable[Optional["InfoType"]]) -> "InfoType":
51-
infos = [info for info in infos if info is not None]
52+
def _filter_none(**kwargs: Any) -> Dict[str, Any]:
53+
"""Make dict excluding None values."""
54+
return {k: v for k, v in kwargs.items() if v is not None}
55+
56+
57+
class InfoType(metaclass=abc.ABCMeta):
58+
"""Superclass for the types of metadata that can be stored within a "direct URL"."""
59+
60+
name: ClassVar[str]
61+
62+
@classmethod
63+
@abc.abstractmethod
64+
def _from_dict(cls: Type[T], d: Optional[Dict[str, Any]]) -> Optional[T]:
65+
"""Parse an instance of this class from a JSON-serializable dict."""
66+
67+
@abc.abstractmethod
68+
def _to_dict(self) -> Dict[str, Any]:
69+
"""Produce a JSON-serializable dict which can be parsed with `._from_dict()`."""
70+
71+
@classmethod
72+
def from_dict(cls, d: Dict[str, Any]) -> "InfoType":
73+
"""Parse exactly one of the known subclasses from the dict `d`."""
74+
return _exactly_one_of(
75+
[
76+
ArchiveInfo._from_dict(_get(d, dict, "archive_info")),
77+
DirInfo._from_dict(_get(d, dict, "dir_info")),
78+
VcsInfo._from_dict(_get(d, dict, "vcs_info")),
79+
]
80+
)
81+
82+
83+
def _exactly_one_of(infos: Iterable[Optional[InfoType]]) -> InfoType:
84+
infos = list(filter(None, infos))
5285
if not infos:
5386
raise DirectUrlValidationError(
5487
"missing one of archive_info, dir_info, vcs_info"
@@ -61,27 +94,15 @@ def _exactly_one_of(infos: Iterable[Optional["InfoType"]]) -> "InfoType":
6194
return infos[0]
6295

6396

64-
def _filter_none(**kwargs: Any) -> Dict[str, Any]:
65-
"""Make dict excluding None values."""
66-
return {k: v for k, v in kwargs.items() if v is not None}
67-
68-
69-
class VcsInfo:
70-
name = "vcs_info"
97+
@dataclass(frozen=True)
98+
class VcsInfo(InfoType):
99+
vcs: str
100+
commit_id: str
101+
requested_revision: Optional[str] = None
102+
resolved_revision: Optional[str] = None
103+
resolved_revision_type: Optional[str] = None
71104

72-
def __init__(
73-
self,
74-
vcs: str,
75-
commit_id: str,
76-
requested_revision: Optional[str] = None,
77-
resolved_revision: Optional[str] = None,
78-
resolved_revision_type: Optional[str] = None,
79-
) -> None:
80-
self.vcs = vcs
81-
self.requested_revision = requested_revision
82-
self.commit_id = commit_id
83-
self.resolved_revision = resolved_revision
84-
self.resolved_revision_type = resolved_revision_type
105+
name: ClassVar[str] = "vcs_info"
85106

86107
@classmethod
87108
def _from_dict(cls, d: Optional[Dict[str, Any]]) -> Optional["VcsInfo"]:
@@ -105,14 +126,11 @@ def _to_dict(self) -> Dict[str, Any]:
105126
)
106127

107128

108-
class ArchiveInfo:
109-
name = "archive_info"
129+
@dataclass(frozen=True)
130+
class ArchiveInfo(InfoType):
131+
hash: Optional[str] = None
110132

111-
def __init__(
112-
self,
113-
hash: Optional[str] = None,
114-
) -> None:
115-
self.hash = hash
133+
name: ClassVar[str] = "archive_info"
116134

117135
@classmethod
118136
def _from_dict(cls, d: Optional[Dict[str, Any]]) -> Optional["ArchiveInfo"]:
@@ -124,14 +142,11 @@ def _to_dict(self) -> Dict[str, Any]:
124142
return _filter_none(hash=self.hash)
125143

126144

127-
class DirInfo:
128-
name = "dir_info"
145+
@dataclass(frozen=True)
146+
class DirInfo(InfoType):
147+
editable: bool = False
129148

130-
def __init__(
131-
self,
132-
editable: bool = False,
133-
) -> None:
134-
self.editable = editable
149+
name: ClassVar[str] = "dir_info"
135150

136151
@classmethod
137152
def _from_dict(cls, d: Optional[Dict[str, Any]]) -> Optional["DirInfo"]:
@@ -143,19 +158,11 @@ def _to_dict(self) -> Dict[str, Any]:
143158
return _filter_none(editable=self.editable or None)
144159

145160

146-
InfoType = Union[ArchiveInfo, DirInfo, VcsInfo]
147-
148-
161+
@dataclass(frozen=True)
149162
class DirectUrl:
150-
def __init__(
151-
self,
152-
url: str,
153-
info: InfoType,
154-
subdirectory: Optional[str] = None,
155-
) -> None:
156-
self.url = url
157-
self.info = info
158-
self.subdirectory = subdirectory
163+
url: str
164+
info: InfoType
165+
subdirectory: Optional[str] = None
159166

160167
def _remove_auth_from_netloc(self, netloc: str) -> str:
161168
if "@" not in netloc:
@@ -192,13 +199,7 @@ def from_dict(cls, d: Dict[str, Any]) -> "DirectUrl":
192199
return DirectUrl(
193200
url=_get_required(d, str, "url"),
194201
subdirectory=_get(d, str, "subdirectory"),
195-
info=_exactly_one_of(
196-
[
197-
ArchiveInfo._from_dict(_get(d, dict, "archive_info")),
198-
DirInfo._from_dict(_get(d, dict, "dir_info")),
199-
VcsInfo._from_dict(_get(d, dict, "vcs_info")),
200-
]
201-
),
202+
info=InfoType.from_dict(d),
202203
)
203204

204205
def to_dict(self) -> Dict[str, Any]:

0 commit comments

Comments
 (0)