Skip to content

Commit 1950393

Browse files
committed
New take on retrying order asset downloads
Wraps the entire orders.download_asset() in a retry, eliminating the double download and concentrating the retry complexity instead of spreading it over several modules. Resolves #1010
1 parent 39ff354 commit 1950393

File tree

3 files changed

+53
-33
lines changed

3 files changed

+53
-33
lines changed

planet/clients/orders.py

Lines changed: 50 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -14,19 +14,21 @@
1414
# the License.
1515
"""Functionality for interacting with the orders api"""
1616
import asyncio
17+
import hashlib
18+
import json
1719
import logging
18-
from pathlib import Path
20+
import re
1921
import time
20-
from typing import AsyncIterator, Callable, List, Optional
2122
import uuid
22-
import json
23-
import hashlib
23+
from pathlib import Path
24+
from typing import AsyncIterator, Callable, List, Optional
2425

26+
import stamina
2527
from tqdm.asyncio import tqdm
2628

2729
from .. import exceptions
2830
from ..constants import PLANET_BASE_URL
29-
from ..http import Session
31+
from ..http import RETRY_EXCEPTIONS, Session
3032
from ..models import Paged
3133

3234
BASE_URL = f'{PLANET_BASE_URL}/compute/ops'
@@ -232,6 +234,7 @@ async def aggregated_order_stats(self) -> dict:
232234
response = await self._session.request(method='GET', url=url)
233235
return response.json()
234236

237+
@stamina.retry(on=tuple(RETRY_EXCEPTIONS))
235238
async def download_asset(self,
236239
location: str,
237240
filename: Optional[str] = None,
@@ -257,31 +260,49 @@ async def download_asset(self,
257260
limit is exceeded.
258261
259262
"""
260-
response = await self._session.request(method='GET', url=location)
261-
filename = filename or response.filename
262-
length = response.length
263-
if not filename:
264-
raise exceptions.ClientError(
265-
f'Could not determine filename at {location}')
263+
async with self._session._limiter, self._session._client.stream('GET', location) as resp:
264+
content_length = int(resp.headers.get('content-length'))
265+
266+
# Fall back to content-disposition for a filename.
267+
if not filename:
268+
try:
269+
content_disposition = resp.headers['content-disposition']
270+
match = re.search('filename="?([^"]+)"?',
271+
content_disposition)
272+
filename = match.group(1) # type: ignore
273+
except (AttributeError, KeyError) as err:
274+
raise exceptions.ClientError(
275+
f'Could not determine filename at {location}') from err
276+
277+
dl_path = Path(directory, filename)
278+
dl_path.parent.mkdir(exist_ok=True, parents=True)
279+
LOGGER.info(f'Downloading {dl_path}')
266280

267-
dl_path = Path(directory, filename)
268-
dl_path.parent.mkdir(exist_ok=True, parents=True)
269-
LOGGER.info(f'Downloading {dl_path}')
270-
271-
try:
272-
mode = 'wb' if overwrite else 'xb'
273-
with open(dl_path, mode) as fp:
274-
with tqdm(total=length,
275-
unit_scale=True,
276-
unit_divisor=1024 * 1024,
277-
unit='B',
278-
desc=str(filename),
279-
disable=not progress_bar) as progress:
280-
await self._session.write(location, fp, progress.update)
281-
except FileExistsError:
282-
LOGGER.info(f'File {dl_path} exists, not overwriting')
283-
284-
return dl_path
281+
try:
282+
mode = 'wb' if overwrite else 'xb'
283+
with dl_path.open(mode) as fp:
284+
with tqdm(total=content_length,
285+
unit_scale=True,
286+
unit_divisor=1024 * 1024,
287+
unit='B',
288+
desc=str(filename),
289+
disable=not progress_bar) as progress:
290+
291+
previous = resp.num_bytes_downloaded
292+
293+
# Size from boto3.s3.transfer.TransferConfig
294+
# io_chunksize. Planet assets are generally
295+
# several MB or more.
296+
async for chunk in resp.aiter_bytes(chunk_size=262144):
297+
fp.write(chunk)
298+
current = resp.num_bytes_downloaded
299+
progress.update(current - previous)
300+
previous = current
301+
302+
except FileExistsError:
303+
LOGGER.info(f'File {dl_path} exists, not overwriting')
304+
305+
return dl_path
285306

286307
async def download_order(self,
287308
order_id: str,

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
'httpx>=0.23.0',
3030
'jsonschema',
3131
'pyjwt>=2.1',
32+
'stamina',
3233
'tqdm>=4.56',
3334
'typing-extensions',
3435
]

tests/integration/test_orders_api.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -566,7 +566,7 @@ async def test_download_asset_md(tmpdir, session):
566566

567567
@respx.mock
568568
@pytest.mark.anyio
569-
async def test_download_asset_img(tmpdir, open_test_img, session):
569+
async def test_download_asset_img_with_retry(tmpdir, open_test_img, session):
570570
dl_url = TEST_DOWNLOAD_URL + '/1?token=IAmAToken'
571571

572572
img_headers = {
@@ -587,9 +587,7 @@ async def _stream_img():
587587
# an error caused by respx and not this code
588588
# https://github.com/lundberg/respx/issues/130
589589
respx.get(dl_url).side_effect = [
590-
httpx.Response(HTTPStatus.OK,
591-
headers=img_headers,
592-
request='donotcloneme'),
590+
httpx.ReadError("no can do!"),
593591
httpx.Response(HTTPStatus.OK,
594592
stream=_stream_img(),
595593
headers=img_headers,

0 commit comments

Comments
 (0)