Skip to content

Commit

Permalink
feat: nh3 replace bleach
Browse files Browse the repository at this point in the history
  • Loading branch information
irtazaakram committed Jan 10, 2024
1 parent 2eabfe1 commit 211f13a
Show file tree
Hide file tree
Showing 22 changed files with 57 additions and 47 deletions.
1 change: 1 addition & 0 deletions .github/workflows/unit-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ jobs:
if: (github.repository == 'edx/edx-platform-private') || (github.repository == 'openedx/edx-platform' && (startsWith(github.base_ref, 'open-release') == false))
runs-on: [ edx-platform-runner ]
strategy:
fail-fast: false
matrix:
python-version:
- "3.8"
Expand Down
4 changes: 2 additions & 2 deletions lms/djangoapps/certificates/views/support.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import urllib
from functools import wraps

import bleach
import nh3
from django.db import transaction
from django.db.models import Q
from django.http import HttpResponse, HttpResponseBadRequest, HttpResponseForbidden, HttpResponseServerError
Expand Down Expand Up @@ -89,7 +89,7 @@ def search_certificates(request):
"""
unbleached_filter = urllib.parse.unquote(urllib.parse.quote_plus(request.GET.get("user", "")))
user_filter = bleach.clean(unbleached_filter)
user_filter = nh3.clean(unbleached_filter)
if not user_filter:
msg = _("user is not given.")
return HttpResponseBadRequest(msg)
Expand Down
4 changes: 2 additions & 2 deletions lms/djangoapps/courseware/views/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from datetime import datetime
from urllib.parse import quote_plus, urlencode, urljoin, urlparse, urlunparse

import bleach
import nh3
import requests
from django.conf import settings
from django.contrib.auth.decorators import login_required
Expand Down Expand Up @@ -1538,7 +1538,7 @@ def render_xblock(request, usage_key_string, check_if_enrolled=True, disable_sta
requested_view = request.GET.get('view', 'student_view')
if requested_view != 'student_view' and requested_view != 'public_view': # lint-amnesty, pylint: disable=consider-using-in
return HttpResponseBadRequest(
f"Rendering of the xblock view '{bleach.clean(requested_view, strip=True)}' is not supported."
f"Rendering of the xblock view '{nh3.clean(requested_view, strip_comments=True)}' is not supported."
)

staff_access = has_access(request.user, 'staff', course_key)
Expand Down
2 changes: 1 addition & 1 deletion lms/djangoapps/discussion/django_comment_client/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1038,7 +1038,7 @@ def sanitize_body(body):
This is possibly overly broad, and might tamper with legitimate posts that
contain this code in fenced code blocks. As far as we can tell, this is an
extra layer of protection, and current handling in the front end and using
bleach for HTML rendering on the server side should cover these cases.
nh3 for HTML rendering on the server side should cover these cases.
"""
if not body:
return body
Expand Down
16 changes: 8 additions & 8 deletions lms/djangoapps/discussion/rest_api/render.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,17 @@
Note that this module is designed to imitate the front end behavior as
implemented in Markdown.Sanitizer.js.
"""
import bleach
import nh3
import markdown

ALLOWED_TAGS = bleach.ALLOWED_TAGS | {
ALLOWED_TAGS = nh3.ALLOWED_TAGS | {
'br', 'dd', 'del', 'dl', 'dt', 'h1', 'h2', 'h3', 'h4', 'hr', 'img', 'kbd', 'p', 'pre', 's',
'strike', 'sub', 'sup'
}
ALLOWED_PROTOCOLS = {"http", "https", "ftp", "mailto"}
ALLOWED_ATTRIBUTES = {
"a": ["href", "title", "target", "rel"],
"img": ["src", "alt", "title", "width", "height"],
"a": {"href", "title", "target", "rel"},
"img": {"src", "alt", "title", "width", "height"},
}


Expand All @@ -25,17 +25,17 @@ def render_body(raw_body):
This includes the following steps:
* Convert Markdown to HTML
* Sanitise HTML using bleach
* Sanitise HTML using nh3
Note that this does not prevent Markdown syntax inside a MathJax block from
being processed, which the forums JavaScript code does.
"""
rendered_html = markdown.markdown(raw_body)
sanitised_html = bleach.clean(
sanitised_html = nh3.clean(
rendered_html,
tags=ALLOWED_TAGS,
protocols=ALLOWED_PROTOCOLS,
strip=True,
# protocols=ALLOWED_PROTOCOLS,
strip_comments=True,
attributes=ALLOWED_ATTRIBUTES
)
return sanitised_html
4 changes: 2 additions & 2 deletions lms/templates/courseware/progress_graph.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<%page args="grade_summary, grade_cutoffs, graph_div_id, show_grade_breakdown = True, show_grade_cutoffs = True, **kwargs"/>
<%!
import bleach
import nh3
import json
import math
import six
Expand Down Expand Up @@ -74,7 +74,7 @@ $(function () {
## allowing the display of such images, and remove any previously stored HTML
## to prevent ugly HTML from being shown to learners.
## xss-lint: disable=javascript-jquery-append
ticks.append( [tickIndex, bleach.clean(section['label'], tags=set(), strip=True)] )
ticks.append( [tickIndex, nh3.clean(section['label'], tags=set(), strip_comments=True)] )

if section['category'] in detail_tooltips:
## xss-lint: disable=javascript-jquery-append
Expand Down
2 changes: 1 addition & 1 deletion lms/templates/lti.html
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ <h4 class="hd hd-4 error_message">
% if has_score and comment:
<h4 class="hd hd-4 problem-feedback-label">${_("Feedback on your work from the grader:")}</h4>
<div class="problem-feedback">
## sanitized with bleach in view
## sanitized with nh3 in view
${comment | n, decode.utf8}
</div>
% endif
Expand Down
4 changes: 2 additions & 2 deletions openedx/core/djangoapps/debug/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"""


import bleach
import nh3
from django.http import HttpResponseNotFound
from django.template import TemplateDoesNotExist
from django.utils.translation import gettext as _
Expand Down Expand Up @@ -54,4 +54,4 @@ def show_reference_template(request, template):

return render_to_response(template, context)
except TemplateDoesNotExist:
return HttpResponseNotFound(f'Missing template {bleach.clean(template, strip=True)}')
return HttpResponseNotFound(f'Missing template {nh3.clean(template, strip_comments=True)}')
4 changes: 2 additions & 2 deletions openedx/core/djangoapps/user_authn/views/logout.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import urllib.parse as parse # pylint: disable=import-error
from urllib.parse import parse_qs, urlsplit, urlunsplit # pylint: disable=import-error

import bleach
import nh3
from django.conf import settings
from django.contrib.auth import logout
from django.shortcuts import redirect
Expand Down Expand Up @@ -60,7 +60,7 @@ def target(self):
# >> /courses/course-v1:ARTS+D1+2018_T/course/
# to handle this scenario we need to encode our URL using quote_plus and then unquote it again.
if target_url:
target_url = bleach.clean(parse.unquote(parse.quote_plus(target_url)))
target_url = nh3.clean(parse.unquote(parse.quote_plus(target_url)))

use_target_url = target_url and is_safe_login_or_logout_redirect(
redirect_to=target_url,
Expand Down
4 changes: 2 additions & 2 deletions openedx/core/djangoapps/user_authn/views/tests/test_logout.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import urllib
from unittest import mock
import ddt
import bleach
import nh3
from django.conf import settings
from django.test import TestCase
from django.test.utils import override_settings
Expand Down Expand Up @@ -237,6 +237,6 @@ def test_logout_redirect_failure_with_xss_vulnerability(self, redirect_url, host
)
response = self.client.get(url, HTTP_HOST=host)
expected = {
'target': bleach.clean(urllib.parse.unquote(redirect_url)),
'target': nh3.clean(urllib.parse.unquote(redirect_url)),
}
self.assertDictContainsSubset(expected, response.context_data)
4 changes: 2 additions & 2 deletions openedx/core/djangolib/markup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@


import markupsafe
import bleach
import nh3
from lxml.html.clean import Cleaner
from mako.filters import decode

Expand Down Expand Up @@ -53,7 +53,7 @@ def strip_all_tags_but_br(string_to_strip):
string_to_strip = ""

string_to_strip = decode.utf8(string_to_strip)
string_to_strip = bleach.clean(string_to_strip, tags={'br'}, strip=True)
string_to_strip = nh3.clean(string_to_strip, tags={'br'}, strip_comments=True)

return HTML(string_to_strip)

Expand Down
2 changes: 2 additions & 0 deletions requirements/edx/base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -733,6 +733,8 @@ newrelic==9.3.0
# via
# -r requirements/edx/bundled.in
# edx-django-utils
nh3==0.2.15
# via -r requirements/edx/kernel.in
nltk==3.8.1
# via chem
nodeenv==1.8.0
Expand Down
4 changes: 4 additions & 0 deletions requirements/edx/development.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1238,6 +1238,10 @@ newrelic==9.3.0
# -r requirements/edx/doc.txt
# -r requirements/edx/testing.txt
# edx-django-utils
nh3==0.2.15
# via
# -r requirements/edx/doc.txt
# -r requirements/edx/testing.txt
nltk==3.8.1
# via
# -r requirements/edx/doc.txt
Expand Down
2 changes: 2 additions & 0 deletions requirements/edx/doc.txt
Original file line number Diff line number Diff line change
Expand Up @@ -870,6 +870,8 @@ newrelic==9.3.0
# via
# -r requirements/edx/base.txt
# edx-django-utils
nh3==0.2.15
# via -r requirements/edx/base.txt
nltk==3.8.1
# via
# -r requirements/edx/base.txt
Expand Down
1 change: 1 addition & 0 deletions requirements/edx/kernel.in
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ mako # Primary template language used for server-
Markdown # Convert text markup to HTML; used in capa problems, forums, and course wikis
mongoengine # Object-document mapper for MongoDB, used in the LMS dashboard
mysqlclient # Driver for the default production relational database
nh3
nodeenv # Utility for managing Node.js environments; we use this for deployments and testing
oauthlib # OAuth specification support for authenticating via LTI or other Open edX services
olxcleaner
Expand Down
2 changes: 2 additions & 0 deletions requirements/edx/testing.txt
Original file line number Diff line number Diff line change
Expand Up @@ -930,6 +930,8 @@ newrelic==9.3.0
# via
# -r requirements/edx/base.txt
# edx-django-utils
nh3==0.2.15
# via -r requirements/edx/base.txt
nltk==3.8.1
# via
# -r requirements/edx/base.txt
Expand Down
4 changes: 2 additions & 2 deletions xmodule/capa/inputtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
import time
from datetime import datetime

import bleach
import nh3
import html5lib
import pyparsing
import six
Expand Down Expand Up @@ -800,7 +800,7 @@ def setup_code_response_rendering(self):
if self.status == 'incomplete':
self.status = 'queued'
self.queue_len = self.msg # lint-amnesty, pylint: disable=attribute-defined-outside-init
self.msg = bleach.clean(self.submitted_msg)
self.msg = nh3.clean(self.submitted_msg)

def setup(self):
""" setup this input type """
Expand Down
4 changes: 2 additions & 2 deletions xmodule/capa/tests/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def test_compare_with_tolerance(self): # lint-amnesty, pylint: disable=too-many

def test_sanitize_html(self):
"""
Test for html sanitization with bleach.
Test for html sanitization with nh3.
"""
allowed_tags = ['div', 'p', 'audio', 'pre', 'span']
for tag in allowed_tags:
Expand All @@ -142,7 +142,7 @@ def test_get_inner_html_from_xpath(self):

def test_remove_markup(self):
"""
Test for markup removal with bleach.
Test for markup removal with nh3.
"""
assert remove_markup('The <mark>Truth</mark> is <em>Out There</em> & you need to <strong>find</strong> it') ==\
'The Truth is Out There &amp; you need to find it'
Expand Down
24 changes: 11 additions & 13 deletions xmodule/capa/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,10 @@
from cmath import isinf, isnan
from decimal import Decimal

import bleach
import nh3
from calc import evaluator
from lxml import etree

from bleach.css_sanitizer import CSSSanitizer
from openedx.core.djangolib.markup import HTML

#-----------------------------------------------------------------------------
Expand Down Expand Up @@ -182,17 +181,16 @@ def sanitize_html(html_code):
Used to sanitize XQueue responses from Matlab.
"""
attributes = bleach.ALLOWED_ATTRIBUTES.copy()
attributes = nh3.ALLOWED_ATTRIBUTES.copy()
attributes.update({
'*': ['class', 'style', 'id'],
'audio': ['controls', 'autobuffer', 'autoplay', 'src'],
'img': ['src', 'width', 'height', 'class']
'*': {'class', 'style', 'id'},
'audio': {'controls', 'autobuffer', 'autoplay', 'src'},
'img': {'src', 'width', 'height', 'class'}
})
output = bleach.clean(
output = nh3.clean(
html_code,
protocols=bleach.ALLOWED_PROTOCOLS | {'data'},
tags=bleach.ALLOWED_TAGS | {'div', 'p', 'audio', 'pre', 'img', 'span'},
css_sanitizer=CSSSanitizer(allowed_css_properties=["white-space"]),
# protocols=nh3.ALLOWED_PROTOCOLS | {'data'},
tags=nh3.ALLOWED_TAGS | {'div', 'p', 'audio', 'pre', 'img', 'span'},
attributes=attributes
)
return output
Expand All @@ -215,12 +213,12 @@ def remove_markup(html):
"""
Return html with markup stripped and text HTML-escaped.
>>> bleach.clean("<b>Rock & Roll</b>", tags=set(), strip=True)
>>> nh3.clean("<b>Rock & Roll</b>", tags=set(), strip_comments=True)
'Rock &amp; Roll'
>>> bleach.clean("<b>Rock &amp; Roll</b>", tags=set(), strip=True)
>>> nh3.clean("<b>Rock &amp; Roll</b>", tags=set(), strip_comments=True)
'Rock &amp; Roll'
"""
return HTML(bleach.clean(html, tags=set(), strip=True))
return HTML(nh3.clean(html, tags=set(), strip_comments=True))


def get_course_id_from_capa_block(capa_block):
Expand Down
4 changes: 2 additions & 2 deletions xmodule/capa_block.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import sys
import traceback

from bleach.sanitizer import Cleaner
import nh3
from django.conf import settings
from django.core.exceptions import ImproperlyConfigured
from django.utils.encoding import smart_str
Expand Down Expand Up @@ -557,7 +557,7 @@ def index_dictionary(self):
capa_content = re.sub(
r"(\s|&nbsp;|//)+",
" ",
Cleaner(tags=[], strip=True).clean(capa_content)
nh3.clean(capa_content, tags=set())
)

capa_body = {
Expand Down
4 changes: 2 additions & 2 deletions xmodule/library_content_block.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from copy import copy
from gettext import ngettext, gettext

import bleach
import nh3
from django.conf import settings
from django.core.exceptions import ObjectDoesNotExist, PermissionDenied
from django.utils.functional import classproperty
Expand Down Expand Up @@ -714,7 +714,7 @@ def source_library_values(self):
lib_tools = self.get_tools()
user_perms = self.runtime.service(self, 'studio_user_permissions')
all_libraries = [
(key, bleach.clean(name)) for key, name in lib_tools.list_available_libraries()
(key, nh3.clean(name)) for key, name in lib_tools.list_available_libraries()
if user_perms.can_read(key) or self.source_library_id == str(key)
]
all_libraries.sort(key=lambda entry: entry[1]) # Sort by name
Expand Down
4 changes: 2 additions & 2 deletions xmodule/lti_block.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@
from unittest import mock
from urllib import parse

import bleach
import nh3
import oauthlib.oauth1
from django.conf import settings
from lxml import etree
Expand Down Expand Up @@ -468,7 +468,7 @@ def get_context(self):
# 'acronym': ['title'],
#
# This lets all plaintext through.
sanitized_comment = bleach.clean(self.score_comment)
sanitized_comment = nh3.clean(self.score_comment)

return {
'input_fields': self.get_input_fields(),
Expand Down

0 comments on commit 211f13a

Please sign in to comment.