Skip to content

Commit 2eec820

Browse files
committed
Encode download url, fix tests and style updates.
1 parent 04740f1 commit 2eec820

File tree

7 files changed

+73
-61
lines changed

7 files changed

+73
-61
lines changed

mfr/core/exceptions.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -147,9 +147,8 @@ def __init__(self, message, *args, metadata_url: str='', response: str='', **kwa
147147

148148

149149
class QueryParameterError(ProviderError):
150-
"""The MFR related errors raised from a :class:`mfr.core.provider` and relating to query parameters
151-
should inherit from MetadataError
152-
This error is thrown when a query parameter is used missused
150+
"""The MFR related errors raised from a :class:`mfr.core.provider`and relating to query
151+
parameters. This error is thrown when the query has an invalid value.
153152
"""
154153

155154
__TYPE = 'query_parameter'

mfr/core/provider.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
import abc
2-
import markupsafe
32

3+
from aiohttp import HttpBadRequest
44
import furl
5+
import markupsafe
56

6-
from mfr.core import exceptions
7-
from mfr.server import settings
7+
from mfr.core import exceptions as core_exceptions
88
from mfr.core.metrics import MetricsRecord
9+
from mfr.server import settings as server_settings
910

1011

1112
class BaseProvider(metaclass=abc.ABCMeta):
@@ -17,12 +18,13 @@ class BaseProvider(metaclass=abc.ABCMeta):
1718
def __init__(self, request, url):
1819
self.request = request
1920
url_netloc = furl.furl(url).netloc
20-
if url_netloc not in settings.ALLOWED_PROVIDER_NETLOCS:
21-
raise exceptions.ProviderError(
21+
if url_netloc not in server_settings.ALLOWED_PROVIDER_NETLOCS:
22+
raise core_exceptions.ProviderError(
2223
message="{} is not a permitted provider domain.".format(
2324
markupsafe.escape(url_netloc)
2425
),
25-
code=400
26+
# TODO: using HTTPStatus.BAD_REQUEST fails tests, not sure why and I will take a another look later
27+
code=HttpBadRequest.code
2628
)
2729
self.url = url
2830
self.provider_metrics = MetricsRecord('provider')
@@ -60,8 +62,8 @@ def serialize(self):
6062
return {
6163
'name': self.name,
6264
'ext': self.ext,
63-
'is_public': self.is_public,
6465
'content_type': self.content_type,
6566
'unique_key': str(self.unique_key),
6667
'download_url': str(self.download_url),
68+
'is_public': self.is_public,
6769
}

mfr/core/utils.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -78,25 +78,27 @@ def make_renderer(name, metadata, file_path, url, assets_url, export_url):
7878
normalized_name = (name and name.lower()) or 'none'
7979
if metadata.is_public:
8080
try:
81+
# Use the public renderer if exist
8182
return driver.DriverManager(
8283
namespace='mfr.public_renderers',
8384
name=normalized_name,
8485
invoke_on_load=True,
8586
invoke_args=(metadata, file_path, url, assets_url, export_url),
8687
).driver
8788
except:
88-
# Check for a public renderer, if one doesn't exist, use a regular one
89-
# Real exceptions handled by main driver.DriverManager
89+
# If public render does not exist, use default renderer by MFR
90+
# If public render exists but exceptions occurs, delay the exception handling
9091
pass
9192

9293
try:
94+
# Use the default MFR handler
9395
return driver.DriverManager(
9496
namespace='mfr.renderers',
9597
name=normalized_name,
9698
invoke_on_load=True,
9799
invoke_args=(metadata, file_path, url, assets_url, export_url),
98100
).driver
99-
except RuntimeError:
101+
except:
100102
raise exceptions.MakeRendererError(
101103
namespace='mfr.renderers',
102104
name=normalized_name,
@@ -110,6 +112,7 @@ def make_renderer(name, metadata, file_path, url, assets_url, export_url):
110112
}
111113
)
112114

115+
113116
def sizeof_fmt(num, suffix='B'):
114117
if abs(num) < 1000:
115118
return '%3.0f%s' % (num, suffix)

mfr/extensions/office365/render.py

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,22 @@
11
import os
2+
from urllib import parse
3+
24
import furl
5+
from mako.lookup import TemplateLookup
36

47
from mfr.core import extension
5-
from mako.lookup import TemplateLookup
6-
from mfr.extensions.office365 import settings
8+
from mfr.extensions.office365 import settings as office365_settings
79

810

911
class Office365Renderer(extension.BaseRenderer):
10-
"""A renderer for use with public .docx files.
12+
"""A renderer for .docx files that are publicly available.
13+
14+
Office online can render `.docx` files to `.pdf` for us. This renderer will only be made
15+
if a query param with `public_file=true` is present. It then generates and embeds an
16+
office online URL into an `iframe` and returns the template. The file it is trying to
17+
render MUST be public.
1118
12-
Office online can render .docx files to pdf for us.
13-
This renderer will only ever be made if a query param with `public_file=1` is sent.
14-
It then generates and embeds an office online url into an
15-
iframe and returns the template. The file it is trying to render MUST
16-
be available publically online. This renderer will not work if testing locally.
19+
Note: this renderer DOES NOT work locally.
1720
1821
"""
1922

@@ -23,9 +26,10 @@ class Office365Renderer(extension.BaseRenderer):
2326
]).get_template('viewer.mako')
2427

2528
def render(self):
26-
download_url = furl.furl(self.metadata.download_url).set(query='')
27-
url = settings.OFFICE_BASE_URL + download_url.url
28-
return self.TEMPLATE.render(base=self.assets_url, url=url)
29+
download_url = furl.furl(self.metadata.download_url).set(query='').url
30+
encoded_download_url = parse.quote(download_url)
31+
office_render_url = office365_settings.OFFICE_BASE_URL + encoded_download_url
32+
return self.TEMPLATE.render(base=self.assets_url, url=office_render_url)
2933

3034
@property
3135
def file_required(self):

mfr/providers/osf/provider.py

Lines changed: 26 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,21 @@
1-
import os
21
import json
32
import hashlib
3+
from http import HTTPStatus
44
import logging
5-
from urllib.parse import urlparse
65
import mimetypes
6+
import os
7+
from urllib.parse import urlparse
78

89
import furl
910
import aiohttp
1011
from aiohttp.errors import ContentEncodingError
1112

1213
from waterbutler.core import streams
1314

14-
from mfr.core import exceptions
15-
from mfr.core import provider
16-
from mfr.core.utils import sizeof_fmt
17-
from mfr.providers.osf import settings
18-
from mfr.settings import MAX_FILE_SIZE_TO_RENDER
19-
from mfr.core.exceptions import TooBigToRenderError
15+
from mfr import settings as mfr_settings
16+
from mfr.core import exceptions as core_exceptions
17+
from mfr.core import provider, utils
18+
from mfr.providers.osf import settings as provider_settings
2019

2120
logger = logging.getLogger(__name__)
2221

@@ -74,14 +73,14 @@ async def metadata(self):
7473
response_reason = metadata_response.reason
7574
response_headers = metadata_response.headers
7675
await metadata_response.release()
77-
if response_code != 200:
78-
raise exceptions.MetadataError(
76+
if response_code != HTTPStatus.OK:
77+
raise core_exceptions.MetadataError(
7978
'Failed to fetch file metadata from WaterButler. Received response: ',
8079
'code {} {}'.format(str(response_code), str(response_reason)),
8180
metadata_url=download_url,
8281
response=response_reason,
8382
provider=self.NAME,
84-
code=400
83+
code=HTTPStatus.BAD_REQUEST
8584
)
8685

8786
try:
@@ -104,12 +103,12 @@ async def metadata(self):
104103
name, ext = os.path.splitext(metadata['data']['name'])
105104
size = metadata['data']['size']
106105

107-
max_file_size = MAX_FILE_SIZE_TO_RENDER.get(ext)
106+
max_file_size = mfr_settings.MAX_FILE_SIZE_TO_RENDER.get(ext)
108107
if max_file_size and size and int(size) > max_file_size:
109-
raise TooBigToRenderError(
108+
raise core_exceptions.TooBigToRenderError(
110109
"This file with extension '{ext}' exceeds the size limit of {max_size} and will not "
111110
"be rendered. To view this file download it and view it "
112-
"offline.".format(ext=ext, max_size=sizeof_fmt(max_file_size)),
111+
"offline.".format(ext=ext, max_size=utils.sizeof_fmt(max_file_size)),
113112
requested_size=int(size), maximum_size=max_file_size,
114113
)
115114

@@ -121,40 +120,38 @@ async def metadata(self):
121120
unique_key = hashlib.sha256((metadata['data']['etag'] + cleaned_url.url).encode('utf-8')).hexdigest()
122121

123122
is_public = False
124-
125-
if 'public_file' in cleaned_url.args:
126-
if cleaned_url.args['public_file'] not in ['0', '1']:
127-
raise exceptions.QueryParameterError(
128-
'The `public_file` query paramter should either `0`, `1`, or unused. Instead '
129-
'got {}'.format(cleaned_url.args['public_file']),
123+
public_file = cleaned_url.args.get('public_file', None)
124+
if public_file:
125+
if public_file not in ['True', 'False']:
126+
raise core_exceptions.QueryParameterError(
127+
'Invalid value for query parameter `public_file`: {}'.format(cleaned_url.args['public_file']),
130128
url=download_url,
131129
provider=self.NAME,
132-
code=400,
130+
code=HTTPStatus.BAD_REQUEST,
133131
)
134-
135-
is_public = cleaned_url.args['public_file'] == '1'
132+
is_public = public_file == 'True'
136133

137134
return provider.ProviderMetadata(name, ext, content_type,
138135
unique_key, download_url, is_public=is_public)
139136

140137
async def download(self):
141138
"""Download file from WaterButler, returning stream."""
142139
download_url = await self._fetch_download_url()
143-
headers = {settings.MFR_IDENTIFYING_HEADER: '1'}
140+
headers = {provider_settings.MFR_IDENTIFYING_HEADER: '1'}
144141
response = await self._make_request('GET', download_url, allow_redirects=False, headers=headers)
145142

146-
if response.status >= 400:
143+
if response.status >= HTTPStatus.BAD_REQUEST:
147144
resp_text = await response.text()
148145
logger.error('Unable to download file: ({}) {}'.format(response.status, resp_text))
149-
raise exceptions.DownloadError(
146+
raise core_exceptions.DownloadError(
150147
'Unable to download the requested file, please try again later.',
151148
download_url=download_url,
152149
response=resp_text,
153150
provider=self.NAME,
154151
)
155152

156153
self.metrics.add('download.saw_redirect', False)
157-
if response.status in (302, 301):
154+
if response.status in (HTTPStatus.MOVED_PERMANENTLY, HTTPStatus.FOUND):
158155
await response.release()
159156
response = await aiohttp.request('GET', response.headers['location'])
160157
self.metrics.add('download.saw_redirect', True)
@@ -186,8 +183,8 @@ async def _fetch_download_url(self):
186183
)
187184
await request.release()
188185

189-
if request.status != 302:
190-
raise exceptions.MetadataError(
186+
if request.status != HTTPStatus.FOUND:
187+
raise core_exceptions.MetadataError(
191188
request.reason,
192189
metadata_url=self.url,
193190
provider=self.NAME,

tests/extensions/office365/test_renderer.py

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,23 @@
1+
from urllib import parse
2+
13
import furl
24
import pytest
35

4-
from mfr.extensions.office365 import settings
56
from mfr.core.provider import ProviderMetadata
67
from mfr.extensions.office365 import Office365Renderer
8+
from mfr.extensions.office365 import settings as office365_settings
79

810

911
@pytest.fixture
1012
def metadata():
11-
return ProviderMetadata('test', '.pdf', 'text/plain', '1234',
13+
return ProviderMetadata(
14+
'test',
15+
'.pdf',
16+
'text/plain',
17+
'1234',
1218
'http://wb.osf.io/file/test.pdf?token=1234&public_file=1',
13-
is_public=True)
19+
is_public=True
20+
)
1421

1522

1623
@pytest.fixture
@@ -20,7 +27,7 @@ def file_path():
2027

2128
@pytest.fixture
2229
def url():
23-
return 'http://osf.io/file/test.pdf'
30+
return parse.quote('http://osf.io/file/test.pdf')
2431

2532

2633
@pytest.fixture
@@ -40,8 +47,8 @@ def renderer(metadata, file_path, url, assets_url, export_url):
4047

4148
class TestOffice365Renderer:
4249

43-
def test_render_pdf(self, renderer, metadata, assets_url):
50+
def test_render_pdf(self, renderer, metadata):
4451
download_url = furl.furl(metadata.download_url).set(query='')
45-
body_url = settings.OFFICE_BASE_URL + download_url.url
52+
office_render_url = office365_settings.OFFICE_BASE_URL + parse.quote(download_url.url)
4653
body = renderer.render()
47-
assert '<iframe src={} frameborder=\'0\'></iframe>'.format(body_url) in body
54+
assert '<iframe src={} frameborder=\'0\'></iframe>'.format(office_render_url) in body

tests/server/handlers/test_query_params.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from tests import utils
88

99

10-
class TestRenderHandler(utils.HandlerTestCase):
10+
class TestQueryParamsHandler(utils.HandlerTestCase):
1111

1212
@testing.gen_test
1313
def test_format_url(self):

0 commit comments

Comments
 (0)