Skip to content

Commit 90703cc

Browse files
calculate byte lengths with a HEAD request
1 parent e0dab39 commit 90703cc

File tree

1 file changed

+37
-8
lines changed

1 file changed

+37
-8
lines changed

src/pip/_internal/network/download.py

+37-8
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from pathlib import Path
99
from queue import Queue
1010
from threading import Event, Semaphore, Thread
11-
from typing import Iterable, Optional, Tuple, Union
11+
from typing import Iterable, List, Optional, Tuple, Union
1212

1313
from pip._vendor.requests.models import Response
1414

@@ -127,6 +127,15 @@ def _http_get_download(session: PipSession, link: Link) -> Response:
127127
return resp
128128

129129

130+
def _http_content_length(session: PipSession, link: Link) -> Optional[int]:
131+
target_url = link.url.split("#", 1)[0]
132+
resp = session.head(target_url)
133+
raise_for_status(resp)
134+
if content_length := resp.headers.get("content-length", None):
135+
return int(content_length)
136+
return None
137+
138+
130139
class Downloader:
131140
def __init__(
132141
self,
@@ -164,8 +173,9 @@ def _copy_chunks(
164173
semaphore: Semaphore,
165174
session: PipSession,
166175
location: Path,
167-
link: Link,
176+
download_info: Tuple[Link, Optional[int]],
168177
) -> None:
178+
link, total_length = download_info
169179
with semaphore:
170180
try:
171181
try:
@@ -188,11 +198,19 @@ def _copy_chunks(
188198

189199
with filepath.open("wb") as output_file:
190200
chunk_index = 0
201+
current_bytes = 0
191202
for chunk in chunks:
192203
# Check if another thread exited with an exception between chunks.
193204
if event.is_set():
194205
return
195-
logger.debug("reading chunk %d for link %s", chunk_index, link)
206+
current_bytes += len(chunk)
207+
logger.debug(
208+
"reading chunk %d for file %s [%d/%s bytes]",
209+
chunk_index,
210+
filename,
211+
current_bytes,
212+
str(total_length or 0),
213+
)
196214
chunk_index += 1
197215
# Copy chunk directly to output file, without any
198216
# additional buffering.
@@ -214,13 +232,24 @@ def __init__(
214232
logger.info("Ignoring progress bar %s for parallel downloads", progress_bar)
215233

216234
def __call__(
217-
self, input_links: Iterable[Link], location: Path
235+
self, links: Iterable[Link], location: Path
218236
) -> Iterable[Tuple[Link, Tuple[Path, Optional[str]]]]:
219237
"""Download the files given by links into location."""
220-
links = list(input_links)
238+
# Calculate the byte length for each file, if available.
239+
links_with_lengths: List[Tuple[Link, Optional[int]]] = [
240+
(link, _http_content_length(self._session, link)) for link in links
241+
]
242+
# Sum up the total length we'll be downloading.
243+
total_length: Optional[int] = 0
244+
for _link, maybe_len in links_with_lengths:
245+
if maybe_len is None:
246+
total_length = None
247+
break
248+
assert total_length is not None
249+
total_length += maybe_len
221250

222251
# Set up state to track thread progress, including inner exceptions.
223-
total_downloads: int = len(links)
252+
total_downloads: int = len(links_with_lengths)
224253
completed_downloads: int = 0
225254
q: "Queue[Union[Tuple[Link, Path, Optional[str]], BaseException]]" = Queue()
226255
event = Event()
@@ -239,9 +268,9 @@ def __call__(
239268
workers = [
240269
Thread(
241270
target=_copy_chunks,
242-
args=(q, event, semaphore, self._session, location, link),
271+
args=(q, event, semaphore, self._session, location, download_info),
243272
)
244-
for link in links
273+
for download_info in links_with_lengths
245274
]
246275
for w in workers:
247276
w.start()

0 commit comments

Comments
 (0)