From 4b7dd8e9f07c99831ebde1f929014350ddfaeb62 Mon Sep 17 00:00:00 2001 From: Chris Hunt Date: Fri, 13 Dec 2019 17:45:47 -0500 Subject: [PATCH 1/2] Do not cleanup download tempdir immediately The previous logic forced us to handle populating the download directory in this function right next to the download and hash checking. By extending the lifetime of the directory we can more easily separate the code. This also allows for additional optimizations later: by using metadata from wheels directly instead of unpacking them, we can avoid extracting wheels unnecessarily. Unpacked files can be easily 3x larger than the archives themselves, so this should reduce disk utilization and general IO significantly. --- src/pip/_internal/operations/prepare.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/pip/_internal/operations/prepare.py b/src/pip/_internal/operations/prepare.py index 1e1d8fc3e4e..82d486970ea 100644 --- a/src/pip/_internal/operations/prepare.py +++ b/src/pip/_internal/operations/prepare.py @@ -144,7 +144,8 @@ def unpack_http_url( hashes=None, # type: Optional[Hashes] ): # type: (...) -> None - with TempDirectory(kind="unpack") as temp_dir: + temp_dir = TempDirectory(kind="unpack", globally_managed=True) + if True: # If a download dir is specified, is the file already downloaded there? already_downloaded_path = None if download_dir: From 0d23d1226dd4bffcf530f4f0b356879800d126ae Mon Sep 17 00:00:00 2001 From: Chris Hunt Date: Fri, 13 Dec 2019 18:03:56 -0500 Subject: [PATCH 2/2] Dedent unconditional block --- src/pip/_internal/operations/prepare.py | 45 ++++++++++++------------- 1 file changed, 22 insertions(+), 23 deletions(-) diff --git a/src/pip/_internal/operations/prepare.py b/src/pip/_internal/operations/prepare.py index 82d486970ea..47c8717830e 100644 --- a/src/pip/_internal/operations/prepare.py +++ b/src/pip/_internal/operations/prepare.py @@ -145,32 +145,31 @@ def unpack_http_url( ): # type: (...) -> None temp_dir = TempDirectory(kind="unpack", globally_managed=True) - if True: - # If a download dir is specified, is the file already downloaded there? - already_downloaded_path = None - if download_dir: - already_downloaded_path = _check_download_dir( - link, download_dir, hashes - ) + # If a download dir is specified, is the file already downloaded there? + already_downloaded_path = None + if download_dir: + already_downloaded_path = _check_download_dir( + link, download_dir, hashes + ) - if already_downloaded_path: - from_path = already_downloaded_path - content_type = mimetypes.guess_type(from_path)[0] - else: - # let's download to a tmp dir - from_path, content_type = _download_http_url( - link, downloader, temp_dir.path, hashes - ) + if already_downloaded_path: + from_path = already_downloaded_path + content_type = mimetypes.guess_type(from_path)[0] + else: + # let's download to a tmp dir + from_path, content_type = _download_http_url( + link, downloader, temp_dir.path, hashes + ) - # unpack the archive to the build dir location. even when only - # downloading archives, they have to be unpacked to parse dependencies - unpack_file(from_path, location, content_type) + # unpack the archive to the build dir location. even when only + # downloading archives, they have to be unpacked to parse dependencies + unpack_file(from_path, location, content_type) - # a download dir is specified; let's copy the archive there - if download_dir and not os.path.exists( - os.path.join(download_dir, link.filename) - ): - _copy_file(from_path, download_dir, link) + # a download dir is specified; let's copy the archive there + if download_dir and not os.path.exists( + os.path.join(download_dir, link.filename) + ): + _copy_file(from_path, download_dir, link) def _copy2_ignoring_special_files(src, dest):