diff --git a/events/forms.py b/events/forms.py index 8455888..714bb11 100644 --- a/events/forms.py +++ b/events/forms.py @@ -4,9 +4,7 @@ from django.conf import settings from django_summernote.widgets import SummernoteInplaceWidget from django.utils.translation import gettext_lazy as _ - -from sanitizer.forms import SanitizedCharField - +from .html_sanitizer import sanitize_html from .models import Event, EventParticipation from .mixins import CrispyFormMixin, ReadOnlyFieldsMixin @@ -21,11 +19,11 @@ class EventForm(CrispyFormMixin): - description = SanitizedCharField( - allowed_tags=settings.ALLOWED_HTML_TAGS_INPUT, - allowed_attributes=settings.ALLOWED_HTML_ATTRIBUTES_INPUT, - allowed_styles=settings.ALLOWED_HTML_STYLES_INPUT, - strip=False, widget=SummernoteInplaceWidget()) + description = forms.CharField( + widget=SummernoteInplaceWidget(), + required=False, + label=_("Descripción") + ) start_at = forms.SplitDateTimeField( required=True, @@ -72,15 +70,30 @@ class Meta: 'has_sponsors': HAS_SPONSORS_HELP_TEXT, } + def clean_description(self): + """ + Clean and sanitize the 'description' field using the allowed tags/attrs/styles + from Django settings. + """ + raw_description = self.cleaned_data.get('description', '') + safe_html = sanitize_html( + html=raw_description, + allowed_tags=settings.ALLOWED_HTML_TAGS_INPUT, + allowed_attrs=settings.ALLOWED_HTML_ATTRIBUTES_INPUT, + allowed_styles=settings.ALLOWED_HTML_STYLES_INPUT + ) + return safe_html + def clean(self): cleaned_data = super().clean() start_at = cleaned_data.get('start_at') end_at = cleaned_data.get('end_at') - if start_at is not None and end_at is not None: - if start_at > end_at: - msg = 'La fecha de inicio es menor a la fecha de finalizacion' - self._errors['start_at'] = [_(msg)] - self._errors['end_at'] = [_(msg)] + + if start_at and end_at and start_at > end_at: + msg = 'La fecha de inicio es menor a la fecha de finalizacion' + self._errors['start_at'] = [_(msg)] + self._errors['end_at'] = [_(msg)] + return cleaned_data def save(self, *args, **kwargs): diff --git a/events/html_sanitizer.py b/events/html_sanitizer.py new file mode 100644 index 0000000..be7ba36 --- /dev/null +++ b/events/html_sanitizer.py @@ -0,0 +1,107 @@ +from bs4 import BeautifulSoup + +from urllib.parse import urlparse + +from django.conf import settings + + +def is_safe_url(url: str, attr: str) -> bool: + """ + Check if the URL uses an allowed scheme for the given attribute. + + Args: + url (str): The URL to validate. + attr (str): The attribute name (e.g., 'href', 'src'). + + Returns: + bool: True if the URL is safe, False otherwise. + """ + parsed = urlparse(url) + if parsed.scheme == "": + # URLs without a scheme are considered safe (relative URLs) + return True + allowed_schemes = settings.ALLOWED_URL_SCHEMES.get(attr, []) + return parsed.scheme.lower() in allowed_schemes + + +def remove_disallowed_tags(soup: BeautifulSoup, allowed_tags: list[str]) -> None: + """ + Remove any tags that are not present in allowed_tags. + """ + for tag in soup.find_all(): + if tag.name not in allowed_tags: + tag.decompose() + + +def filter_style_attribute(style_value: str, allowed_styles: list[str]) -> str: + """ + Take a CSS style string (e.g. "color: red; font-weight: bold;") and + return a sanitized version containing only the allowed properties. + Returns an empty string if no allowed properties remain. + """ + filtered_style_pairs = [] + for prop_pair in style_value.split(";"): + prop_pair = prop_pair.strip() + if not prop_pair: + continue + if ":" not in prop_pair: + continue + + prop_name, prop_value = prop_pair.split(":", 1) + prop_name = prop_name.strip().lower() + prop_value = prop_value.strip() + + if prop_name in allowed_styles: + filtered_style_pairs.append(f"{prop_name}: {prop_value}") + + return "; ".join(filtered_style_pairs) + + +def filter_attributes( + soup: BeautifulSoup, allowed_attrs: list[str], allowed_styles: list[str] +) -> None: + """ + For each remaining (allowed) tag in the soup, remove any attribute + not in allowed_attrs. If the attribute is 'style', filter out + disallowed CSS properties. + """ + for tag in soup.find_all(): + for attr_name in list(tag.attrs): + # If attribute name is not allowed, remove it + if attr_name not in allowed_attrs: + del tag.attrs[attr_name] + # If it's a style attribute, apply style filtering + elif attr_name == "style": + style_value = tag.attrs["style"] + safe_style = filter_style_attribute(style_value, allowed_styles) + if safe_style: + tag.attrs["style"] = safe_style + else: + # If no allowed properties remain, remove the style attribute + del tag.attrs["style"] + elif attr_name in settings.ALLOWED_URL_SCHEMES: + # Validate URL schemes for attributes like 'href' and 'src' + url = tag.attrs[attr_name] + if not is_safe_url(url, attr_name): + del tag.attrs[attr_name] + + +def sanitize_html( + html: str, + allowed_tags: list[str], + allowed_attrs: list[str], + allowed_styles: list[str], +) -> str: + """ + Main function that orchestrates the sanitization process. + """ + soup = BeautifulSoup(html, "html.parser") + + # 1. Remove disallowed tags entirely + remove_disallowed_tags(soup, allowed_tags) + + # 2. Filter attributes (including style) on the remaining tags + filter_attributes(soup, allowed_attrs, allowed_styles) + + # Return the resulting sanitized HTML + return str(soup) diff --git a/events/tests/test_html_sanitizer.py b/events/tests/test_html_sanitizer.py new file mode 100644 index 0000000..857c3ae --- /dev/null +++ b/events/tests/test_html_sanitizer.py @@ -0,0 +1,182 @@ +from django.test import TestCase, override_settings +from events.html_sanitizer import sanitize_html +from django.conf import settings + + +class SanitizeHTMLTests(TestCase): + def setUp(self): + # Define allowed tags, attributes, and styles from settings + self.allowed_tags = settings.ALLOWED_HTML_TAGS_INPUT + self.allowed_attrs = settings.ALLOWED_HTML_ATTRIBUTES_INPUT + self.allowed_styles = settings.ALLOWED_HTML_STYLES_INPUT + + def test_allowed_tags_preserved(self): + """Ensure that allowed tags are preserved in the output.""" + input_html = "
This is a test paragraph.
" + expected_output = "This is a test paragraph.
" + sanitized = sanitize_html( + input_html, self.allowed_tags, self.allowed_attrs, self.allowed_styles + ) + self.assertEqual(sanitized, expected_output) + + def test_disallowed_tags_removed(self): + """Ensure that disallowed tags are removed from the output.""" + input_html = "This is a test.
" + expected_output = "This is a test.
" + sanitized = sanitize_html( + input_html, self.allowed_tags, self.allowed_attrs, self.allowed_styles + ) + self.assertEqual(sanitized, expected_output) + + def test_allowed_attributes_preserved(self): + """Ensure that allowed attributes are preserved.""" + input_html = 'Link' + expected_output = 'Link' + sanitized = sanitize_html( + input_html, self.allowed_tags, self.allowed_attrs, self.allowed_styles + ) + self.assertEqual(sanitized, expected_output) + + def test_disallowed_attributes_removed(self): + """Ensure that disallowed attributes are removed.""" + input_html = 'Link' + expected_output = 'Link' + sanitized = sanitize_html( + input_html, self.allowed_tags, self.allowed_attrs, self.allowed_styles + ) + self.assertEqual(sanitized, expected_output) + + def test_allowed_styles_preserved(self): + """Ensure that allowed CSS styles are preserved.""" + input_html = 'Styled text
' + expected_output = 'Styled text
' + sanitized = sanitize_html( + input_html, self.allowed_tags, self.allowed_attrs, self.allowed_styles + ) + self.assertEqual(sanitized, expected_output) + + def test_malformed_html(self): + """Ensure that malformed HTML is handled gracefully.""" + input_html = "This is bold and italic
" + expected_output = "This is bold and italic
" + sanitized = sanitize_html( + input_html, self.allowed_tags, self.allowed_attrs, self.allowed_styles + ) + self.assertEqual(sanitized, expected_output) + + def test_escape_entities(self): + """Ensure that HTML entities are preserved.""" + input_html = "5 < 10 & 10 > 5" + expected_output = "5 < 10 & 10 > 5" # If using convert_charrefs=False + sanitized = sanitize_html( + input_html, self.allowed_tags, self.allowed_attrs, self.allowed_styles + ) + self.assertEqual(sanitized, expected_output) + + def test_href_sanitization(self): + """Ensure that href attributes do not contain 'javascript:'.""" + input_html = "Bad Link" + expected_output = "Bad Link" # 'href' removed due to unsafe scheme + sanitized = sanitize_html( + input_html, self.allowed_tags, self.allowed_attrs, self.allowed_styles + ) + self.assertEqual(sanitized, expected_output) + + def test_style_with_malicious_content(self): + """Ensure that style attributes do not contain malicious content.""" + input_html = ( + "Test
" + ) + expected_output = 'Test
' + + sanitized = sanitize_html( + input_html, self.allowed_tags, self.allowed_attrs, self.allowed_styles + ) + self.assertEqual(sanitized, expected_output) + + def test_completely_malicious_input(self): + """Ensure that completely malicious input is sanitized appropriately.""" + input_html = 'This is a strong and emphasized text.
+ + Example Link +This is a strong and emphasized text.
+ + Example Link + +No styles
' + expected_output = "No styles
" + sanitized = sanitize_html( + input_html, self.allowed_tags, self.allowed_attrs, self.allowed_styles + ) + self.assertEqual(sanitized, expected_output) + + def test_preserve_text_only(self): + """Ensure that plain text without HTML remains unchanged.""" + input_html = "Just plain text without HTML." + expected_output = "Just plain text without HTML." + sanitized = sanitize_html( + input_html, self.allowed_tags, self.allowed_attrs, self.allowed_styles + ) + self.assertEqual(sanitized, expected_output) + + @override_settings( + ALLOWED_HTML_TAGS_INPUT=["p", "a"], + ALLOWED_HTML_ATTRIBUTES_INPUT=["href"], + ALLOWED_HTML_STYLES_INPUT=[], + ) + def test_dynamic_allowed_tags(self): + """Ensure that sanitizer uses dynamically overridden settings.""" + input_html = ( + "Paragraph with " + 'link ' + "and bold.
" + ) + # With settings overridden, 'a' allows 'href' only, 'b' is disallowed + expected = 'Paragraph with link and .
' + sanitized = sanitize_html( + html=input_html, + allowed_tags=settings.ALLOWED_HTML_TAGS_INPUT, + allowed_attrs=settings.ALLOWED_HTML_ATTRIBUTES_INPUT, + allowed_styles=settings.ALLOWED_HTML_STYLES_INPUT, + ) + self.assertEqual(sanitized, expected) diff --git a/events/tests/test_views.py b/events/tests/test_views.py index 62100b9..1c53f22 100644 --- a/events/tests/test_views.py +++ b/events/tests/test_views.py @@ -1,5 +1,3 @@ -import bleach - from django.test import TestCase, Client from django.urls import reverse @@ -89,7 +87,7 @@ def test_html_sanitizer_in_description_field(self): self.assertEqual(response.status_code, 302) self.assertEqual(Event.objects.filter(name='PyDay San Rafael').count(), 1) event = Event.objects.filter(name='PyDay San Rafael').get() - self.assertEqual(event.description, bleach.clean('an example')) + self.assertEqual(event.description, ('an example')) def test_events_view_delete(self): event = EventFactory(owner=self.user) diff --git a/joboffers/forms.py b/joboffers/forms.py index 93b9aab..a21e425 100644 --- a/joboffers/forms.py +++ b/joboffers/forms.py @@ -7,18 +7,17 @@ from crispy_forms.layout import Submit, Reset, Layout from crispy_forms.helper import FormHelper from django_summernote.widgets import SummernoteInplaceWidget -from sanitizer.forms import SanitizedCharField - +from events.html_sanitizer import sanitize_html from . import utils class JobOfferForm(forms.ModelForm): """A PyAr Jobs form.""" - description = SanitizedCharField( - allowed_tags=settings.ALLOWED_HTML_TAGS_INPUT, - allowed_attributes=settings.ALLOWED_HTML_ATTRIBUTES_INPUT, - strip=False, widget=SummernoteInplaceWidget(), label='Descripción', + description = forms.CharField( + widget=SummernoteInplaceWidget(), + label='Descripción', + required=True ) def __init__(self, *args, **kwargs): @@ -49,6 +48,20 @@ def __init__(self, *args, **kwargs): Reset('reset', _('Limpiar'), css_class='btn-default') ) + def clean_description(self): + """ + Clean and sanitize the 'description' field using the allowed tags/attrs/styles + from Django settings (similar to your EventForm). + """ + raw_description = self.cleaned_data.get('description', '') + safe_html = sanitize_html( + html=raw_description, + allowed_tags=settings.ALLOWED_HTML_TAGS_INPUT, + allowed_attrs=settings.ALLOWED_HTML_ATTRIBUTES_INPUT, + allowed_styles=settings.ALLOWED_HTML_STYLES_INPUT + ) + return safe_html + def clean_tags(self): """ Normalizes repeated tags and special characters diff --git a/pyarweb/settings/base.py b/pyarweb/settings/base.py index ce437b6..516601d 100644 --- a/pyarweb/settings/base.py +++ b/pyarweb/settings/base.py @@ -71,7 +71,6 @@ 'email_obfuscator', 'dbbackup', 'captcha', - 'sanitizer', 'easyaudit', ) @@ -224,6 +223,10 @@ 'ul', 'ol', 'div', + 'strong', + 'em', + 'span', + 'i', ] ALLOWED_HTML_ATTRIBUTES_INPUT = [ 'href', @@ -232,13 +235,20 @@ 'width', 'class', 'face', + 'title', ] ALLOWED_HTML_STYLES_INPUT = [ 'text-align', 'margin-left', 'background-color', 'font-size', + 'font-weight', + 'color', ] +ALLOWED_URL_SCHEMES = { + 'href': ['http', 'https', 'mailto', 'tel'], + 'src': ['http', 'https', 'data'], +} TAGGIT_CASE_INSENSITIVE = True GOOGLE_TRACKING_ID = os.environ.get('GOOGLE_TRACKING_ID', '') diff --git a/pycompanies/views.py b/pycompanies/views.py index af26941..ec85ab9 100644 --- a/pycompanies/views.py +++ b/pycompanies/views.py @@ -1,7 +1,6 @@ -from braces.views import LoginRequiredMixin - from django.contrib import messages from django.contrib.auth import get_user_model +from django.contrib.auth.mixins import LoginRequiredMixin from django.core.exceptions import PermissionDenied from django.db.models import Count from django.shortcuts import redirect, render diff --git a/requirements.txt b/requirements.txt index c08f12a..7e81336 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,15 +1,12 @@ -bleach==4.1.0 # Pinning bleach until this issue is resolved: https://github.com/marksweb/django-bleach/issues/51 Django==3.2.2 django-allauth==0.50.0 django-autoslug==1.9.8 django-bootstrap3==21.1 -django-braces==1.15.0 django-crispy-forms==1.13.0 django-dbbackup==3.3.0 django-easy-audit==1.3.2 django-email-obfuscator==0.1.5 django-extensions==3.1.5 -django-html-sanitizer==0.1.5 django-model-utils==4.2.0 django-sendfile==0.3.11 django-simple-captcha==0.5.18