From ba1dfaaf9c171568d107a78bf1ce3e3bf2557775 Mon Sep 17 00:00:00 2001 From: Addison Schiller Date: Mon, 2 Oct 2017 12:15:07 -0400 Subject: [PATCH 1/3] `Public_file` query param and office365 renderer Adding support for a `public_file` query param so the OSF can request a public renderer. Added office365 which is a public renderer. This uses office online to do .docx file conversions. --- mfr/core/exceptions.py | 19 ++++++++ mfr/core/provider.py | 4 +- mfr/core/utils.py | 12 +++++ mfr/extensions/office365/README.md | 20 ++++++++ mfr/extensions/office365/__init__.py | 1 + mfr/extensions/office365/render.py | 36 ++++++++++++++ mfr/extensions/office365/settings.py | 6 +++ .../office365/templates/viewer.mako | 11 +++++ mfr/providers/osf/provider.py | 21 ++++++++- setup.py | 4 ++ tests/extensions/office365/__init__.py | 0 tests/extensions/office365/test_renderer.py | 47 +++++++++++++++++++ 12 files changed, 179 insertions(+), 2 deletions(-) create mode 100644 mfr/extensions/office365/README.md create mode 100644 mfr/extensions/office365/__init__.py create mode 100644 mfr/extensions/office365/render.py create mode 100644 mfr/extensions/office365/settings.py create mode 100644 mfr/extensions/office365/templates/viewer.mako create mode 100644 tests/extensions/office365/__init__.py create mode 100644 tests/extensions/office365/test_renderer.py diff --git a/mfr/core/exceptions.py b/mfr/core/exceptions.py index 8f3573b54..9f7d5c8dc 100644 --- a/mfr/core/exceptions.py +++ b/mfr/core/exceptions.py @@ -145,6 +145,25 @@ def __init__(self, message, *args, metadata_url: str='', response: str='', **kwa 'response': self.response }]) + +class QueryParameterError(ProviderError): + """The MFR related errors raised from a :class:`mfr.core.provider` and relating to query parameters + should inherit from MetadataError + This error is thrown when a query parameter is used missused + """ + + __TYPE = 'query_parameter' + + def __init__(self, message, *args, url: str='', code: int=400, **kwargs): + super().__init__(message, code=code, *args, **kwargs) + self.url = url + self.return_code = code + self.attr_stack.append([self.__TYPE, { + 'url': self.url, + 'returncode': self.return_code, + }]) + + class TooBigToRenderError(ProviderError): """If the user tries to render a file larger than a server specified maximum, throw a TooBigToRenderError. diff --git a/mfr/core/provider.py b/mfr/core/provider.py index 7931f1d36..00054c313 100644 --- a/mfr/core/provider.py +++ b/mfr/core/provider.py @@ -48,17 +48,19 @@ def download(self): class ProviderMetadata: - def __init__(self, name, ext, content_type, unique_key, download_url): + def __init__(self, name, ext, content_type, unique_key, download_url, is_public=False): self.name = name self.ext = ext self.content_type = content_type self.unique_key = unique_key self.download_url = download_url + self.is_public = is_public def serialize(self): return { 'name': self.name, 'ext': self.ext, + 'is_public': self.is_public, 'content_type': self.content_type, 'unique_key': str(self.unique_key), 'download_url': str(self.download_url), diff --git a/mfr/core/utils.py b/mfr/core/utils.py index b248b11cf..1f77b42a4 100644 --- a/mfr/core/utils.py +++ b/mfr/core/utils.py @@ -76,6 +76,18 @@ def make_renderer(name, metadata, file_path, url, assets_url, export_url): :rtype: :class:`mfr.core.extension.BaseRenderer` """ normalized_name = (name and name.lower()) or 'none' + if metadata.is_public: + try: + return driver.DriverManager( + namespace='mfr.public_renderers', + name=normalized_name, + invoke_on_load=True, + invoke_args=(metadata, file_path, url, assets_url, export_url), + ).driver + except: + # Check for a public renderer, if one doesn't exist, use a regular one + pass + try: return driver.DriverManager( namespace='mfr.renderers', diff --git a/mfr/extensions/office365/README.md b/mfr/extensions/office365/README.md new file mode 100644 index 000000000..a43ca9cc4 --- /dev/null +++ b/mfr/extensions/office365/README.md @@ -0,0 +1,20 @@ + +# Office 365 Renderer + + +This renderer uses Office Online to render .docx files for us. If the Office Online URL ever changes, it will also need to be changed here in settings. + +Currently there is no OSF side component for these changes. Once there is, this specific note can be removed. In the meantime in order to test this renderer, you need to go to your local OSF copy of this file: https://github.com/CenterForOpenScience/osf.io/blob/develop/addons/base/views.py#L728-L736 +and add 'public_file' : 1, to the dict. This will send all files as public files. + +Testing this renderer locally is hard. Since Office Online needs access to the files it will not work with private files or ones hosted locally. To see what the docx files will render like, replace the render function with something that looks like this: + +``` + def render(self): + static_url = 'https://files.osf.io/v1/resources//providers/osfstorage/' + url = settings.OFFICE_BASE_URL + download_url.url + return self.TEMPLATE.render(base=self.assets_url, url=url) + +``` + +The file at `static_url` must be publicly available. diff --git a/mfr/extensions/office365/__init__.py b/mfr/extensions/office365/__init__.py new file mode 100644 index 000000000..08833dba1 --- /dev/null +++ b/mfr/extensions/office365/__init__.py @@ -0,0 +1 @@ +from .render import Office365Renderer # noqa diff --git a/mfr/extensions/office365/render.py b/mfr/extensions/office365/render.py new file mode 100644 index 000000000..2760ce761 --- /dev/null +++ b/mfr/extensions/office365/render.py @@ -0,0 +1,36 @@ +import os +import furl + +from mfr.core import extension +from mako.lookup import TemplateLookup +from mfr.extensions.office365 import settings + + +class Office365Renderer(extension.BaseRenderer): + """A renderer for use with public .docx files. + + Office online can render .docx files to pdf for us. + This renderer will only ever be made if a query param with `public_file=1` is sent. + It then generates and embeds an office online url into an + iframe and returns the template. The file it is trying to render MUST + be available publically online. This renderer will not work if testing locally. + + """ + + TEMPLATE = TemplateLookup( + directories=[ + os.path.join(os.path.dirname(__file__), 'templates') + ]).get_template('viewer.mako') + + def render(self): + download_url = furl.furl(self.metadata.download_url).set(query='') + url = settings.OFFICE_BASE_URL + download_url.url + return self.TEMPLATE.render(base=self.assets_url, url=url) + + @property + def file_required(self): + return False + + @property + def cache_result(self): + return False diff --git a/mfr/extensions/office365/settings.py b/mfr/extensions/office365/settings.py new file mode 100644 index 000000000..c92ba78e4 --- /dev/null +++ b/mfr/extensions/office365/settings.py @@ -0,0 +1,6 @@ +from mfr import settings + + +config = settings.child('OFFICE365_EXTENSION_CONFIG') + +OFFICE_BASE_URL = 'https://view.officeapps.live.com/op/embed.aspx?src=' diff --git a/mfr/extensions/office365/templates/viewer.mako b/mfr/extensions/office365/templates/viewer.mako new file mode 100644 index 000000000..cfc2840dc --- /dev/null +++ b/mfr/extensions/office365/templates/viewer.mako @@ -0,0 +1,11 @@ + + + + + + diff --git a/mfr/providers/osf/provider.py b/mfr/providers/osf/provider.py index 2cae6228f..e958e137b 100644 --- a/mfr/providers/osf/provider.py +++ b/mfr/providers/osf/provider.py @@ -116,7 +116,26 @@ async def metadata(self): cleaned_url.args.pop(unneeded, None) self.metrics.add('metadata.clean_url_args', str(cleaned_url)) unique_key = hashlib.sha256((metadata['data']['etag'] + cleaned_url.url).encode('utf-8')).hexdigest() - return provider.ProviderMetadata(name, ext, content_type, unique_key, download_url) + + if 'public_file' in cleaned_url.args: + if cleaned_url.args['public_file'] not in ['0', '1']: + raise exceptions.QueryParameterError( + 'The `public_file` query paramter should either `0`, `1`, or unused. Instead ' + 'got {}'.format(cleaned_url.args['public_file']), + url=download_url, + provider=self.NAME, + code=400, + ) + + if cleaned_url.args['public_file'] == '1': + is_public = True + else: + is_public = False + else: + is_public = False + + return provider.ProviderMetadata(name, ext, content_type, + unique_key, download_url, is_public=is_public) async def download(self): """Download file from WaterButler, returning stream.""" diff --git a/setup.py b/setup.py index d7fe25fff..934e8bd93 100755 --- a/setup.py +++ b/setup.py @@ -40,6 +40,10 @@ def parse_requirements(requirements): 'http = mfr.providers.http:HttpProvider', 'osf = mfr.providers.osf:OsfProvider', ], + 'mfr.public_renderers': [ + '.docx = mfr.extensions.office365:Office365Renderer', + + ], 'mfr.exporters': [ # google docs '.gdraw = mfr.extensions.image:ImageExporter', diff --git a/tests/extensions/office365/__init__.py b/tests/extensions/office365/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/extensions/office365/test_renderer.py b/tests/extensions/office365/test_renderer.py new file mode 100644 index 000000000..ae485a125 --- /dev/null +++ b/tests/extensions/office365/test_renderer.py @@ -0,0 +1,47 @@ +import furl +import pytest + +from mfr.extensions.office365 import settings +from mfr.core.provider import ProviderMetadata +from mfr.extensions.office365 import Office365Renderer + + +@pytest.fixture +def metadata(): + return ProviderMetadata('test', '.pdf', 'text/plain', '1234', + 'http://wb.osf.io/file/test.pdf?token=1234&public_file=1', + is_public=True) + + +@pytest.fixture +def file_path(): + return '/tmp/test.docx' + + +@pytest.fixture +def url(): + return 'http://osf.io/file/test.pdf' + + +@pytest.fixture +def assets_url(): + return 'http://mfr.osf.io/assets' + + +@pytest.fixture +def export_url(): + return 'http://mfr.osf.io/export?url=' + url() + + +@pytest.fixture +def renderer(metadata, file_path, url, assets_url, export_url): + return Office365Renderer(metadata, file_path, url, assets_url, export_url) + + +class TestOffice365Renderer: + + def test_render_pdf(self, renderer, metadata, assets_url): + download_url = furl.furl(metadata.download_url).set(query='') + body_url = settings.OFFICE_BASE_URL + download_url.url + body = renderer.render() + assert ''.format(body_url) in body From 8c014eaa95f3ab1a351b881914b394d39b70c2c1 Mon Sep 17 00:00:00 2001 From: Addison Schiller Date: Wed, 8 Nov 2017 10:44:04 -0500 Subject: [PATCH 2/3] Style changes --- mfr/core/utils.py | 4 ++++ mfr/providers/osf/provider.py | 9 +++------ setup.py | 1 - 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/mfr/core/utils.py b/mfr/core/utils.py index 1f77b42a4..67cfd16ae 100644 --- a/mfr/core/utils.py +++ b/mfr/core/utils.py @@ -1,7 +1,9 @@ from stevedore import driver from mfr.core import exceptions +import logging +logger = logging.getLogger(__name__) def make_provider(name, request, url): """Returns an instance of :class:`mfr.core.provider.BaseProvider` @@ -76,6 +78,7 @@ def make_renderer(name, metadata, file_path, url, assets_url, export_url): :rtype: :class:`mfr.core.extension.BaseRenderer` """ normalized_name = (name and name.lower()) or 'none' + logger.info(str(metadata.is_public)) if metadata.is_public: try: return driver.DriverManager( @@ -86,6 +89,7 @@ def make_renderer(name, metadata, file_path, url, assets_url, export_url): ).driver except: # Check for a public renderer, if one doesn't exist, use a regular one + # Real exceptions handled by main driver.DriverManager pass try: diff --git a/mfr/providers/osf/provider.py b/mfr/providers/osf/provider.py index e958e137b..5b677b288 100644 --- a/mfr/providers/osf/provider.py +++ b/mfr/providers/osf/provider.py @@ -117,6 +117,8 @@ async def metadata(self): self.metrics.add('metadata.clean_url_args', str(cleaned_url)) unique_key = hashlib.sha256((metadata['data']['etag'] + cleaned_url.url).encode('utf-8')).hexdigest() + is_public = False + if 'public_file' in cleaned_url.args: if cleaned_url.args['public_file'] not in ['0', '1']: raise exceptions.QueryParameterError( @@ -127,12 +129,7 @@ async def metadata(self): code=400, ) - if cleaned_url.args['public_file'] == '1': - is_public = True - else: - is_public = False - else: - is_public = False + is_public = cleaned_url.args['public_file'] == '1' return provider.ProviderMetadata(name, ext, content_type, unique_key, download_url, is_public=is_public) diff --git a/setup.py b/setup.py index 934e8bd93..78ce86ea1 100755 --- a/setup.py +++ b/setup.py @@ -42,7 +42,6 @@ def parse_requirements(requirements): ], 'mfr.public_renderers': [ '.docx = mfr.extensions.office365:Office365Renderer', - ], 'mfr.exporters': [ # google docs From e3c30ff63b8d5c124b4d8b40d877c722047fe099 Mon Sep 17 00:00:00 2001 From: Addison Schiller Date: Wed, 8 Nov 2017 14:20:52 -0500 Subject: [PATCH 3/3] Remove debugging --- mfr/core/utils.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/mfr/core/utils.py b/mfr/core/utils.py index 67cfd16ae..e826483ce 100644 --- a/mfr/core/utils.py +++ b/mfr/core/utils.py @@ -1,9 +1,7 @@ from stevedore import driver from mfr.core import exceptions -import logging -logger = logging.getLogger(__name__) def make_provider(name, request, url): """Returns an instance of :class:`mfr.core.provider.BaseProvider` @@ -78,7 +76,6 @@ def make_renderer(name, metadata, file_path, url, assets_url, export_url): :rtype: :class:`mfr.core.extension.BaseRenderer` """ normalized_name = (name and name.lower()) or 'none' - logger.info(str(metadata.is_public)) if metadata.is_public: try: return driver.DriverManager(