diff --git a/desktop/core/src/desktop/middleware.py b/desktop/core/src/desktop/middleware.py index 426677ea971..106b05609ef 100644 --- a/desktop/core/src/desktop/middleware.py +++ b/desktop/core/src/desktop/middleware.py @@ -17,65 +17,76 @@ from __future__ import absolute_import -from builtins import object -import inspect +import re +import sys import json +import time +import socket +import inspect import logging -import mimetypes import os.path -import re -import socket -import sys import tempfile -import time +import mimetypes import traceback +from builtins import object +from urllib.parse import urlparse import kerberos import django.db -import django.views.static import django_prometheus - +import django.views.static from django.conf import settings from django.contrib import messages -from django.contrib.auth import REDIRECT_FIELD_NAME, BACKEND_SESSION_KEY, authenticate, load_backend, login +from django.contrib.auth import BACKEND_SESSION_KEY, REDIRECT_FIELD_NAME, authenticate, load_backend, login from django.contrib.auth.middleware import RemoteUserMiddleware from django.core import exceptions -from django.http import HttpResponseNotAllowed, HttpResponseForbidden +from django.core.files.uploadhandler import MemoryFileUploadHandler, TemporaryFileUploadHandler +from django.http import HttpResponse, HttpResponseForbidden, HttpResponseNotAllowed, HttpResponseRedirect from django.urls import resolve -from django.http import HttpResponseRedirect, HttpResponse from django.utils.deprecation import MiddlewareMixin -from hadoop import cluster -from dashboard.conf import IS_ENABLED as DASHBOARD_ENABLED -from useradmin.models import User - import desktop.views +from dashboard.conf import IS_ENABLED as DASHBOARD_ENABLED from desktop import appmanager, metrics -from desktop.auth.backend import is_admin, find_or_create_user, ensure_has_a_group, rewrite_user -from desktop.conf import AUTH, HTTP_ALLOWED_METHODS, ENABLE_PROMETHEUS, KNOX, DJANGO_DEBUG_MODE, AUDIT_EVENT_LOG_DIR, \ - METRICS, SERVER_USER, REDIRECT_WHITELIST, SECURE_CONTENT_SECURITY_POLICY, has_connectors, is_gunicorn_report_enabled, \ - CUSTOM_CACHE_CONTROL, HUE_LOAD_BALANCER +from desktop.auth.backend import ensure_has_a_group, find_or_create_user, is_admin, knox_login_headers, rewrite_user +from desktop.conf import ( + AUDIT_EVENT_LOG_DIR, + AUTH, + CUSTOM_CACHE_CONTROL, + DJANGO_DEBUG_MODE, + ENABLE_PROMETHEUS, + HTTP_ALLOWED_METHODS, + HUE_LOAD_BALANCER, + KNOX, + METRICS, + REDIRECT_WHITELIST, + SECURE_CONTENT_SECURITY_POLICY, + SERVER_USER, + has_connectors, + is_gunicorn_report_enabled, +) from desktop.context_processors import get_app_name -from desktop.lib import apputil, i18n, fsmanager +from desktop.lib import apputil, fsmanager, i18n from desktop.lib.django_util import JsonResponse, render, render_json from desktop.lib.exceptions import StructuredException from desktop.lib.exceptions_renderable import PopupException from desktop.lib.metrics import global_registry from desktop.lib.view_util import is_ajax from desktop.log import get_audit_logger -from desktop.log.access import access_log, log_page_hit, access_warn -from desktop.auth.backend import knox_login_headers - +from desktop.log.access import access_log, access_warn, log_page_hit +from hadoop import cluster +from hadoop.fs.upload import HDFSfileUploadHandler from libsaml.conf import CDP_LOGOUT_URL -from urllib.parse import urlparse +from useradmin.models import User if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ - from django.utils.http import url_has_allowed_host_and_scheme from urllib.parse import quote + + from django.utils.http import url_has_allowed_host_and_scheme + from django.utils.translation import gettext as _ else: - from django.utils.translation import ugettext as _ from django.utils.http import is_safe_url as url_has_allowed_host_and_scheme, urlquote as quote + from django.utils.translation import ugettext as _ LOG = logging.getLogger() @@ -94,6 +105,7 @@ HUE_LB_HOSTS = [urlparse(hue_lb).netloc for hue_lb in HUE_LOAD_BALANCER.get()] if HUE_LOAD_BALANCER.get() else [] + class AjaxMiddleware(MiddlewareMixin): """ Middleware that augments request to set request.ajax @@ -217,7 +229,8 @@ def process_view(self, request, view_func, view_args, view_kwargs): ret = None for middleware in self._get_middlewares(request._desktop_app, 'view'): ret = middleware(request, view_func, view_args, view_kwargs) - if ret: return ret # Short circuit + if ret: + return ret # Short circuit return ret def process_response(self, request, response): @@ -240,7 +253,8 @@ def process_exception(self, request, exception): ret = None for middleware in self._get_middlewares(request._desktop_app, 'exception'): ret = middleware(request, exception) - if ret: return ret # short circuit + if ret: + return ret # short circuit return ret def _load_app_middleware(cls, app): @@ -256,7 +270,7 @@ def _load_app_middleware(cls, app): dot = middleware_path.rindex('.') except ValueError: raise exceptions.ImproperlyConfigured(_('%(module)s isn\'t a middleware module.') % {'module': middleware_path}) - mw_module, mw_classname = middleware_path[:dot], middleware_path[dot+1:] + mw_module, mw_classname = middleware_path[:dot], middleware_path[dot + 1:] try: mod = __import__(mw_module, {}, {}, ['']) except ImportError as e: @@ -279,7 +293,7 @@ def _load_app_middleware(cls, app): # We need to make sure we don't have a process_request function because we don't know what # application will handle the request at the point process_request is called if hasattr(mw_instance, 'process_request'): - raise exceptions.ImproperlyConfigured(_('AppSpecificMiddleware module "%(module)s" has a process_request function' + \ + raise exceptions.ImproperlyConfigured(_('AppSpecificMiddleware module "%(module)s" has a process_request function' + ' which is impossible.') % {'module': middleware_path}) if hasattr(mw_instance, 'process_view'): result['view'].append(mw_instance.process_view) @@ -316,7 +330,7 @@ def process_view(self, request, view_func, view_args, view_kwargs): return None if AUTH.AUTO_LOGIN_ENABLED.get() and request.path.startswith('/api/v1/token/auth'): - pass # allow /api/token/auth can create user or make it active + pass # allow /api/token/auth can create user or make it active elif request.path.startswith('/api/'): return None @@ -410,7 +424,7 @@ def process_view(self, request, view_func, view_args, view_kwargs): REDIRECT_FIELD_NAME, quote('/hue' + request.get_full_path().replace('is_embeddable=true', '').replace('&&', '&')) ) - }) # Remove embeddable so redirect from & to login works. Login page is not embeddable + }) # Remove embeddable so redirect from & to login works. Login page is not embeddable else: return HttpResponseRedirect("%s?%s=%s" % (settings.LOGIN_URL, REDIRECT_FIELD_NAME, quote(request.get_full_path()))) @@ -557,8 +571,8 @@ def process_response(self, request, response): try: fn = resolve(request.path)[0] fn_name = '%s.%s' % (fn.__module__, fn.__name__) - except: - LOG.exception('failed to resolve url') + except Exception as e: + LOG.exception('Failed to resolve URL: %s', str(e)) fn_name = '' # Write the two versions of html out for offline debugging @@ -598,7 +612,7 @@ class ProxyMiddleware(MiddlewareMixin): def __init__(self, get_response): self.get_response = get_response - if not 'desktop.auth.backend.AllowAllBackend' in AUTH.BACKEND.get(): + if 'desktop.auth.backend.AllowAllBackend' not in AUTH.BACKEND.get(): LOG.info('Unloading ProxyMiddleware') raise exceptions.MiddlewareNotUsed @@ -635,8 +649,8 @@ def process_request(self, request): 'operationText': msg } return - except: - LOG.exception('Unexpected error when authenticating') + except Exception as e: + LOG.exception('Unexpected error when authenticating: %s', str(e)) return def clean_username(self, username, request): @@ -764,8 +778,8 @@ def process_request(self, request): } access_warn(request, msg) return - except: - LOG.exception('Unexpected error when authenticating against KDC') + except Exception as e: + LOG.exception('Unexpected error when authenticating against KDC: %s', str(e)) return else: request.META['Return-401'] = '' @@ -831,7 +845,7 @@ class HueRemoteUserMiddleware(RemoteUserMiddleware): in use. """ def __init__(self, get_response): - if not 'desktop.auth.backend.RemoteUserDjangoBackend' in AUTH.BACKEND.get(): + if 'desktop.auth.backend.RemoteUserDjangoBackend' not in AUTH.BACKEND.get(): LOG.info('Unloading HueRemoteUserMiddleware') raise exceptions.MiddlewareNotUsed super().__init__(get_response) @@ -874,6 +888,7 @@ def process_response(self, request, response): else: return response + class MetricsMiddleware(MiddlewareMixin): """ Middleware to track the number of active requests. @@ -908,7 +923,7 @@ def __init__(self, get_response): raise exceptions.MiddlewareNotUsed def process_response(self, request, response): - if self.secure_content_security_policy and not 'Content-Security-Policy' in response: + if self.secure_content_security_policy and 'Content-Security-Policy' not in response: response["Content-Security-Policy"] = self.secure_content_security_policy return response @@ -933,6 +948,7 @@ def process_response(self, request, response): return response + class MultipleProxyMiddleware: FORWARDED_FOR_FIELDS = [ 'HTTP_X_FORWARDED_FOR', @@ -959,7 +975,7 @@ def __call__(self, request): location += 1 else: request.META['HTTP_X_FORWARDED_FOR'] = item.strip() - break; + break for field in self.FORWARDED_FOR_FIELDS: if field in request.META: @@ -985,4 +1001,37 @@ def process_response(self, request, response): response['Cache-Control'] = 'no-cache, no-store, must-revalidate' response['Pragma'] = 'no-cache' response['Expires'] = '0' - return response \ No newline at end of file + return response + + +class CustomUploadHandlerMiddleware(MiddlewareMixin): + def process_request(self, request): + if request.path.endswith('/desktop/api2/doc/import'): + # Use HDFSfileUploadHandler for document uploads + request.upload_handlers = [ + self.get_hdfs_upload_handler(request), + MemoryFileUploadHandler(), + TemporaryFileUploadHandler(), + ] + else: + # Use the global configuration for other uploads + request.upload_handlers = self.get_upload_handlers(request) + + def get_hdfs_upload_handler(self, request): + return HDFSfileUploadHandler(request) + + def get_upload_handlers(self, request): + handlers = [] + for handler_path in settings.FILE_UPLOAD_HANDLERS: + module_name, class_name = handler_path.rsplit('.', 1) + module = __import__(module_name, fromlist=[class_name]) + handler_class = getattr(module, class_name) + if handler_class.__name__ == 'HDFSfileUploadHandler': + handlers.append(self.get_hdfs_upload_handler(request)) + elif handler_class.__name__ == 'FineUploaderChunkedUploadHandler': + handlers.append(handler_class(request)) + elif handler_class.__name__ in ['MemoryFileUploadHandler', 'TemporaryFileUploadHandler']: + handlers.append(handler_class()) + else: + handlers.append(handler_class()) + return handlers diff --git a/desktop/core/src/desktop/middleware_test.py b/desktop/core/src/desktop/middleware_test.py index 3ec6189269c..ac15871e0cd 100644 --- a/desktop/core/src/desktop/middleware_test.py +++ b/desktop/core/src/desktop/middleware_test.py @@ -15,29 +15,32 @@ # See the License for the specific language governing permissions and # limitations under the License. -import json import os -import pytest import sys +import json import tempfile +import pytest from django.conf import settings -from django.test.client import Client -from django.test import RequestFactory, TestCase -from django.http import HttpResponse +from django.contrib.auth.models import AnonymousUser from django.core import exceptions +from django.core.files.uploadhandler import MemoryFileUploadHandler, TemporaryFileUploadHandler +from django.http import HttpResponse +from django.test import RequestFactory, TestCase +from django.test.client import Client import desktop.conf - -from desktop.middleware import CacheControlMiddleware, MultipleProxyMiddleware from desktop.conf import AUDIT_EVENT_LOG_DIR, CUSTOM_CACHE_CONTROL from desktop.lib.django_test_util import make_logged_in_client from desktop.lib.test_utils import add_permission +from desktop.middleware import CacheControlMiddleware, CustomUploadHandlerMiddleware, MultipleProxyMiddleware +from hadoop.fs.upload import FineUploaderChunkedUploadHandler, HDFSfileUploadHandler if sys.version_info[0] > 2: - from unittest.mock import patch, Mock + from unittest.mock import Mock, patch else: - from mock import patch, Mock + from mock import Mock, patch + @pytest.mark.django_db def test_view_perms(): @@ -60,7 +63,7 @@ def test_view_perms(): response = c.get("/useradmin/users/edit/test") assert 401 == response.status_code - response = c.get("/useradmin/users/edit/user") # Can access his profile page + response = c.get("/useradmin/users/edit/user") # Can access his profile page assert 200 == response.status_code, response.content @@ -92,7 +95,7 @@ def test_audit_logging_middleware_enable(): with tempfile.NamedTemporaryFile("w+t") as log_tmp: log_path = log_tmp.name reset = AUDIT_EVENT_LOG_DIR.set_for_testing(log_path) - settings.MIDDLEWARE.append('desktop.middleware.AuditLoggingMiddleware') # Re-add middleware + settings.MIDDLEWARE.append('desktop.middleware.AuditLoggingMiddleware') # Re-add middleware try: # Check if we audit correctly @@ -110,6 +113,7 @@ def test_audit_logging_middleware_enable(): settings.MIDDLEWARE.pop() reset() + @pytest.mark.django_db def test_audit_logging_middleware_disable(): c = make_logged_in_client(username='test_audit_logging', is_superuser=False) @@ -118,7 +122,7 @@ def test_audit_logging_middleware_disable(): try: # No middleware yet response = c.get("/oozie/") - assert not 'audited' in response, response + assert 'audited' not in response, response finally: reset() @@ -139,7 +143,7 @@ def test_ensure_safe_redirect_middleware(): assert 302 == response.status_code # Disallow most redirects - done.append(desktop.conf.REDIRECT_WHITELIST.set_for_testing('^\d+$')) + done.append(desktop.conf.REDIRECT_WHITELIST.set_for_testing(r'^\d+$')) response = c.post("/hue/accounts/login/", { 'username': 'test', 'password': 'test', @@ -158,7 +162,7 @@ def test_ensure_safe_redirect_middleware(): # Allow all redirects and disallow most at the same time. # should have a logic OR functionality. - done.append(desktop.conf.REDIRECT_WHITELIST.set_for_testing('\d+,.*')) + done.append(desktop.conf.REDIRECT_WHITELIST.set_for_testing(r'\d+,.*')) response = c.post("", { 'username': 'test', 'password': 'test', @@ -170,6 +174,7 @@ def test_ensure_safe_redirect_middleware(): for finish in done: finish() + @pytest.mark.django_db def test_spnego_middleware(): done = [] @@ -209,6 +214,7 @@ def test_spnego_middleware(): finish() settings.AUTHENTICATION_BACKENDS = orig_backends + def test_cache_control_middleware(): c = Client() request = c.get("/") @@ -238,9 +244,11 @@ def dummy_get_response(request): finally: reset() + def get_response(request): return request + @pytest.mark.django_db class TestMultipleProxyMiddleware(TestCase): @@ -267,3 +275,41 @@ def test_multiple_proxy_middleware_without_x_forwarded_for(self): self.middleware(request) assert request.META['HTTP_X_FORWARDED_FOR'] == '192.0.2.0' + +@pytest.mark.django_db +class TestCustomUploadHandlerMiddleware: + + def setup_method(self, method): + self.factory = RequestFactory() + self.middleware = CustomUploadHandlerMiddleware(get_response=lambda request: request) + + def test_process_request_for_doc_import(self): + request = self.factory.get('/desktop/api2/doc/import') + request.user = AnonymousUser() + + self.middleware.process_request(request) + + assert len(request.upload_handlers) == 3 + assert isinstance(request.upload_handlers[0], HDFSfileUploadHandler) + assert isinstance(request.upload_handlers[1], MemoryFileUploadHandler) + assert isinstance(request.upload_handlers[2], TemporaryFileUploadHandler) + + def test_process_request_for_other_paths(self): + request = self.factory.get('/some/other/path') + request.user = AnonymousUser() + + self.middleware.process_request(request) + + assert len(request.upload_handlers) > 0 # Ensure there are handlers + + # Check if the handlers are of the expected types + expected_handler_types = (HDFSfileUploadHandler, FineUploaderChunkedUploadHandler, + MemoryFileUploadHandler, TemporaryFileUploadHandler) + + for handler in request.upload_handlers: + assert isinstance(handler, expected_handler_types), \ + f"Unexpected handler type: {type(handler)}" + + +def get_response(request): + return request diff --git a/desktop/core/src/desktop/settings.py b/desktop/core/src/desktop/settings.py index ca0f93188a4..0008e281b15 100644 --- a/desktop/core/src/desktop/settings.py +++ b/desktop/core/src/desktop/settings.py @@ -162,6 +162,7 @@ 'desktop.middleware.ExceptionMiddleware', 'desktop.middleware.ClusterMiddleware', 'django.middleware.csrf.CsrfViewMiddleware', + 'desktop.middleware.CustomUploadHandlerMiddleware', 'desktop.middleware.CacheControlMiddleware', 'django.middleware.http.ConditionalGetMiddleware', # 'axes.middleware.FailedLoginMiddleware', @@ -682,7 +683,7 @@ def is_oidc_configured(): if is_ofs_enabled(): file_upload_handlers.insert(0, 'desktop.lib.fs.ozone.upload.OFSFileUploadHandler') -FILE_UPLOAD_HANDLERS = tuple(file_upload_handlers) +FILE_UPLOAD_HANDLERS = file_upload_handlers ############################################################