Skip to content

Ft/docx rendering, RE: #304 #347

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 7 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions mfr/core/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,27 @@ def __init__(self, message, *args, metadata_url: str='', response: str='', **kwa
'response': self.response
}])


class QueryParameterError(ProviderError):
"""The MFR related errors raised from a :class:`mfr.core.provider`and relating to query
parameters. This error is thrown when the query has an invalid value.
"""

__TYPE = 'query_parameter'

def __init__(self, message, *args, url: str='', code: int=400, **kwargs):
super().__init__(message, code=code, *args, **kwargs)
self.url = url
self.return_code = code
self.attr_stack.append((
self.__TYPE,
{
'url': self.url,
'returncode': self.return_code,
}
))


class TooBigToRenderError(ProviderError):
"""If the user tries to render a file larger than a server specified maximum, throw a
TooBigToRenderError.
Expand Down
39 changes: 24 additions & 15 deletions mfr/core/provider.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,34 @@
import abc
import markupsafe
from abc import (
ABCMeta,
abstractmethod,
abstractproperty
)

import furl
from aiohttp import HttpBadRequest
from furl import furl
import markupsafe

from mfr.core import exceptions
from mfr.server import settings
from mfr.core.exceptions import ProviderError
from mfr.core.metrics import MetricsRecord
from mfr.server.settings import ALLOWED_PROVIDER_NETLOCS


class BaseProvider(metaclass=abc.ABCMeta):
class BaseProvider(metaclass=ABCMeta):
"""Base class for MFR Providers. Requires ``download`` and ``metadata`` methods.
Validates that the given file url is hosted at a domain listed in
`mfr.server.settings.ALLOWED_PROVIDER_DOMAINS`.
"""

def __init__(self, request, url, action=None):
self.request = request
url_netloc = furl.furl(url).netloc
if url_netloc not in settings.ALLOWED_PROVIDER_NETLOCS:
raise exceptions.ProviderError(
netloc = furl(url).netloc
if netloc not in ALLOWED_PROVIDER_NETLOCS:
raise ProviderError(
message="{} is not a permitted provider domain.".format(
markupsafe.escape(url_netloc)
markupsafe.escape(netloc)
),
code=400
# TODO: using HTTPStatus.BAD_REQUEST fails tests, not sure why and I will take a another look later
code=HttpBadRequest.code
)
self.url = url
self.action = action
Expand All @@ -34,28 +40,30 @@ def __init__(self, request, url, action=None):
'url': str(self.url),
})

@abc.abstractproperty
@abstractproperty
def NAME(self):
raise NotImplementedError

@abc.abstractmethod
@abstractmethod
def metadata(self):
pass

@abc.abstractmethod
@abstractmethod
def download(self):
pass


class ProviderMetadata:

def __init__(self, name, ext, content_type, unique_key, download_url, stable_id=None):
def __init__(self, name, ext, content_type, unique_key,
download_url, is_public=False, stable_id=None):
self.name = name
self.ext = ext
self.content_type = content_type
self.unique_key = unique_key
self.download_url = download_url
self.stable_id = stable_id
self.is_public = is_public

def serialize(self):
return {
Expand All @@ -65,4 +73,5 @@ def serialize(self):
'unique_key': str(self.unique_key),
'download_url': str(self.download_url),
'stable_id': None if self.stable_id is None else str(self.stable_id),
'is_public': self.is_public,
}
17 changes: 16 additions & 1 deletion mfr/core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,14 +78,29 @@ def make_renderer(name, metadata, file_path, url, assets_url, export_url):
:rtype: :class:`mfr.core.extension.BaseRenderer`
"""
normalized_name = (name and name.lower()) or 'none'
if metadata.is_public:
try:
# Use the public renderer if exist
return driver.DriverManager(
namespace='mfr.public_renderers',
name=normalized_name,
invoke_on_load=True,
invoke_args=(metadata, file_path, url, assets_url, export_url),
).driver
except:
# If public render does not exist, use default renderer by MFR
# If public render exists but exceptions occurs, delay the exception handling
pass

try:
# Use the default MFR handler
return driver.DriverManager(
namespace='mfr.renderers',
name=normalized_name,
invoke_on_load=True,
invoke_args=(metadata, file_path, url, assets_url, export_url),
).driver
except RuntimeError:
except:
raise exceptions.MakeRendererError(
namespace='mfr.renderers',
name=normalized_name,
Expand Down
20 changes: 20 additions & 0 deletions mfr/extensions/office365/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@

# Office 365 Renderer


This renderer uses Office Online to render .docx files for us. If the Office Online URL ever changes, it will also need to be changed here in settings.

Currently there is no OSF side component for these changes. Once there is, this specific note can be removed. In the meantime in order to test this renderer, you need to go to your local OSF copy of this file: https://github.com/CenterForOpenScience/osf.io/blob/develop/addons/base/views.py#L728-L736
and add 'public_file' : 1, to the dict. This will send all files as public files.

Testing this renderer locally is hard. Since Office Online needs access to the files it will not work with private files or ones hosted locally. To see what the docx files will render like, replace the render function with something that looks like this:

```
def render(self):
static_url = 'https://files.osf.io/v1/resources/<fake_project_id>/providers/osfstorage/<fake_file_id>'
url = settings.OFFICE_BASE_URL + download_url.url
return self.TEMPLATE.render(base=self.assets_url, url=url)

```

The file at `static_url` must be publicly available.
1 change: 1 addition & 0 deletions mfr/extensions/office365/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .render import Office365Renderer # noqa
44 changes: 44 additions & 0 deletions mfr/extensions/office365/render.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import os
from urllib import parse

from furl import furl
from mako.lookup import TemplateLookup

from mfr.core.extension import BaseRenderer
from mfr.extensions.office365.settings import OFFICE_BASE_URL


class Office365Renderer(BaseRenderer):
"""A renderer for .docx files that are publicly available.

Office online can render `.docx` files to `.pdf` for us. This renderer will only be made
if a query param with `public_file=true` is present. It then generates and embeds an
office online URL into an `iframe` and returns the template. The file it is trying to
render MUST be public.

Note: The url for the file to convert must be available publicly on the
internet in order for the renderer to access it. This means files stored on
OSF storage locally will not render unless the local server is listening on
external connections and waterbutler is providing urls that are externally
accessible.
"""

TEMPLATE = TemplateLookup(
directories=[
os.path.join(os.path.dirname(__file__), 'templates')
]).get_template('viewer.mako')

def render(self):
download_url = furl(self.metadata.download_url).set(query='').url
return self.TEMPLATE.render(
base=self.assets_url,
url=OFFICE_BASE_URL + parse.quote(download_url)
)

@property
def file_required(self):
return False

@property
def cache_result(self):
return False
6 changes: 6 additions & 0 deletions mfr/extensions/office365/settings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from mfr import settings


config = settings.child('OFFICE365_EXTENSION_CONFIG')

OFFICE_BASE_URL = 'https://view.officeapps.live.com/op/embed.aspx?src='
11 changes: 11 additions & 0 deletions mfr/extensions/office365/templates/viewer.mako
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<style>
iframe {
width: 100%;
height: 800;
}
</style>

<iframe src=${url} frameborder='0'></iframe>

<script src="/static/js/mfr.js"></script>
<script src="/static/js/mfr.child.js"></script>
Loading