diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml
index c3b1086c20eb..aa59f1a69d74 100644
--- a/.github/workflows/unit-tests.yml
+++ b/.github/workflows/unit-tests.yml
@@ -12,6 +12,7 @@ jobs:
if: (github.repository == 'edx/edx-platform-private') || (github.repository == 'openedx/edx-platform' && (startsWith(github.base_ref, 'open-release') == false))
runs-on: [ edx-platform-runner ]
strategy:
+ fail-fast: false
matrix:
python-version:
- "3.8"
diff --git a/lms/djangoapps/certificates/views/support.py b/lms/djangoapps/certificates/views/support.py
index 7aadf3091952..12c3c8157eda 100644
--- a/lms/djangoapps/certificates/views/support.py
+++ b/lms/djangoapps/certificates/views/support.py
@@ -10,7 +10,7 @@
import urllib
from functools import wraps
-import bleach
+import nh3
from django.db import transaction
from django.db.models import Q
from django.http import HttpResponse, HttpResponseBadRequest, HttpResponseForbidden, HttpResponseServerError
@@ -89,7 +89,7 @@ def search_certificates(request):
"""
unbleached_filter = urllib.parse.unquote(urllib.parse.quote_plus(request.GET.get("user", "")))
- user_filter = bleach.clean(unbleached_filter)
+ user_filter = nh3.clean(unbleached_filter)
if not user_filter:
msg = _("user is not given.")
return HttpResponseBadRequest(msg)
diff --git a/lms/djangoapps/courseware/views/views.py b/lms/djangoapps/courseware/views/views.py
index d0e657775bce..cff0136142d4 100644
--- a/lms/djangoapps/courseware/views/views.py
+++ b/lms/djangoapps/courseware/views/views.py
@@ -10,7 +10,7 @@
from datetime import datetime
from urllib.parse import quote_plus, urlencode, urljoin, urlparse, urlunparse
-import bleach
+import nh3
import requests
from django.conf import settings
from django.contrib.auth.decorators import login_required
@@ -1538,7 +1538,7 @@ def render_xblock(request, usage_key_string, check_if_enrolled=True, disable_sta
requested_view = request.GET.get('view', 'student_view')
if requested_view != 'student_view' and requested_view != 'public_view': # lint-amnesty, pylint: disable=consider-using-in
return HttpResponseBadRequest(
- f"Rendering of the xblock view '{bleach.clean(requested_view, strip=True)}' is not supported."
+ f"Rendering of the xblock view '{nh3.clean(requested_view, strip_comments=True)}' is not supported."
)
staff_access = has_access(request.user, 'staff', course_key)
diff --git a/lms/djangoapps/discussion/django_comment_client/utils.py b/lms/djangoapps/discussion/django_comment_client/utils.py
index 3b5e1af4a039..e26b748270e3 100644
--- a/lms/djangoapps/discussion/django_comment_client/utils.py
+++ b/lms/djangoapps/discussion/django_comment_client/utils.py
@@ -1038,7 +1038,7 @@ def sanitize_body(body):
This is possibly overly broad, and might tamper with legitimate posts that
contain this code in fenced code blocks. As far as we can tell, this is an
extra layer of protection, and current handling in the front end and using
- bleach for HTML rendering on the server side should cover these cases.
+ nh3 for HTML rendering on the server side should cover these cases.
"""
if not body:
return body
diff --git a/lms/djangoapps/discussion/rest_api/render.py b/lms/djangoapps/discussion/rest_api/render.py
index 44e2d8b692ce..e998a6bded87 100644
--- a/lms/djangoapps/discussion/rest_api/render.py
+++ b/lms/djangoapps/discussion/rest_api/render.py
@@ -4,17 +4,17 @@
Note that this module is designed to imitate the front end behavior as
implemented in Markdown.Sanitizer.js.
"""
-import bleach
+import nh3
import markdown
-ALLOWED_TAGS = bleach.ALLOWED_TAGS | {
+ALLOWED_TAGS = nh3.ALLOWED_TAGS | {
'br', 'dd', 'del', 'dl', 'dt', 'h1', 'h2', 'h3', 'h4', 'hr', 'img', 'kbd', 'p', 'pre', 's',
'strike', 'sub', 'sup'
}
ALLOWED_PROTOCOLS = {"http", "https", "ftp", "mailto"}
ALLOWED_ATTRIBUTES = {
- "a": ["href", "title", "target", "rel"],
- "img": ["src", "alt", "title", "width", "height"],
+ "a": {"href", "title", "target", "rel"},
+ "img": {"src", "alt", "title", "width", "height"},
}
@@ -25,17 +25,17 @@ def render_body(raw_body):
This includes the following steps:
* Convert Markdown to HTML
- * Sanitise HTML using bleach
+ * Sanitise HTML using nh3
Note that this does not prevent Markdown syntax inside a MathJax block from
being processed, which the forums JavaScript code does.
"""
rendered_html = markdown.markdown(raw_body)
- sanitised_html = bleach.clean(
+ sanitised_html = nh3.clean(
rendered_html,
tags=ALLOWED_TAGS,
- protocols=ALLOWED_PROTOCOLS,
- strip=True,
+ # protocols=ALLOWED_PROTOCOLS,
+ strip_comments=True,
attributes=ALLOWED_ATTRIBUTES
)
return sanitised_html
diff --git a/lms/templates/courseware/progress_graph.js b/lms/templates/courseware/progress_graph.js
index 5a1d64c36ee6..c3117b6e92ba 100644
--- a/lms/templates/courseware/progress_graph.js
+++ b/lms/templates/courseware/progress_graph.js
@@ -1,6 +1,6 @@
<%page args="grade_summary, grade_cutoffs, graph_div_id, show_grade_breakdown = True, show_grade_cutoffs = True, **kwargs"/>
<%!
- import bleach
+ import nh3
import json
import math
import six
@@ -74,7 +74,7 @@ $(function () {
## allowing the display of such images, and remove any previously stored HTML
## to prevent ugly HTML from being shown to learners.
## xss-lint: disable=javascript-jquery-append
- ticks.append( [tickIndex, bleach.clean(section['label'], tags=set(), strip=True)] )
+ ticks.append( [tickIndex, nh3.clean(section['label'], tags=set(), strip_comments=True)] )
if section['category'] in detail_tooltips:
## xss-lint: disable=javascript-jquery-append
diff --git a/lms/templates/lti.html b/lms/templates/lti.html
index 2f89182597b1..05346ec3dc40 100644
--- a/lms/templates/lti.html
+++ b/lms/templates/lti.html
@@ -61,7 +61,7 @@
% if has_score and comment:
${_("Feedback on your work from the grader:")}
- ## sanitized with bleach in view
+ ## sanitized with nh3 in view
${comment | n, decode.utf8}
% endif
diff --git a/openedx/core/djangoapps/debug/views.py b/openedx/core/djangoapps/debug/views.py
index 6a3bcc40f280..9b64763015e3 100644
--- a/openedx/core/djangoapps/debug/views.py
+++ b/openedx/core/djangoapps/debug/views.py
@@ -5,7 +5,7 @@
"""
-import bleach
+import nh3
from django.http import HttpResponseNotFound
from django.template import TemplateDoesNotExist
from django.utils.translation import gettext as _
@@ -54,4 +54,4 @@ def show_reference_template(request, template):
return render_to_response(template, context)
except TemplateDoesNotExist:
- return HttpResponseNotFound(f'Missing template {bleach.clean(template, strip=True)}')
+ return HttpResponseNotFound(f'Missing template {nh3.clean(template, strip_comments=True)}')
diff --git a/openedx/core/djangoapps/user_authn/views/logout.py b/openedx/core/djangoapps/user_authn/views/logout.py
index 13301f5e3bb1..616b792b9f22 100644
--- a/openedx/core/djangoapps/user_authn/views/logout.py
+++ b/openedx/core/djangoapps/user_authn/views/logout.py
@@ -5,7 +5,7 @@
import urllib.parse as parse # pylint: disable=import-error
from urllib.parse import parse_qs, urlsplit, urlunsplit # pylint: disable=import-error
-import bleach
+import nh3
from django.conf import settings
from django.contrib.auth import logout
from django.shortcuts import redirect
@@ -60,7 +60,7 @@ def target(self):
# >> /courses/course-v1:ARTS+D1+2018_T/course/
# to handle this scenario we need to encode our URL using quote_plus and then unquote it again.
if target_url:
- target_url = bleach.clean(parse.unquote(parse.quote_plus(target_url)))
+ target_url = nh3.clean(parse.unquote(parse.quote_plus(target_url)))
use_target_url = target_url and is_safe_login_or_logout_redirect(
redirect_to=target_url,
diff --git a/openedx/core/djangoapps/user_authn/views/tests/test_logout.py b/openedx/core/djangoapps/user_authn/views/tests/test_logout.py
index 7d10fe1021ef..c59969c2d00d 100644
--- a/openedx/core/djangoapps/user_authn/views/tests/test_logout.py
+++ b/openedx/core/djangoapps/user_authn/views/tests/test_logout.py
@@ -5,7 +5,7 @@
import urllib
from unittest import mock
import ddt
-import bleach
+import nh3
from django.conf import settings
from django.test import TestCase
from django.test.utils import override_settings
@@ -237,6 +237,6 @@ def test_logout_redirect_failure_with_xss_vulnerability(self, redirect_url, host
)
response = self.client.get(url, HTTP_HOST=host)
expected = {
- 'target': bleach.clean(urllib.parse.unquote(redirect_url)),
+ 'target': nh3.clean(urllib.parse.unquote(redirect_url)),
}
self.assertDictContainsSubset(expected, response.context_data)
diff --git a/openedx/core/djangolib/markup.py b/openedx/core/djangolib/markup.py
index 3009f1d53f34..2b22a3035044 100644
--- a/openedx/core/djangolib/markup.py
+++ b/openedx/core/djangolib/markup.py
@@ -4,7 +4,7 @@
import markupsafe
-import bleach
+import nh3
from lxml.html.clean import Cleaner
from mako.filters import decode
@@ -53,7 +53,7 @@ def strip_all_tags_but_br(string_to_strip):
string_to_strip = ""
string_to_strip = decode.utf8(string_to_strip)
- string_to_strip = bleach.clean(string_to_strip, tags={'br'}, strip=True)
+ string_to_strip = nh3.clean(string_to_strip, tags={'br'}, strip_comments=True)
return HTML(string_to_strip)
diff --git a/requirements/edx/base.txt b/requirements/edx/base.txt
index b077658dbbad..bce0e43ba3a1 100644
--- a/requirements/edx/base.txt
+++ b/requirements/edx/base.txt
@@ -733,6 +733,8 @@ newrelic==9.3.0
# via
# -r requirements/edx/bundled.in
# edx-django-utils
+nh3==0.2.15
+ # via -r requirements/edx/kernel.in
nltk==3.8.1
# via chem
nodeenv==1.8.0
diff --git a/requirements/edx/development.txt b/requirements/edx/development.txt
index 4ecb8930ff73..840ade96b6b9 100644
--- a/requirements/edx/development.txt
+++ b/requirements/edx/development.txt
@@ -1238,6 +1238,10 @@ newrelic==9.3.0
# -r requirements/edx/doc.txt
# -r requirements/edx/testing.txt
# edx-django-utils
+nh3==0.2.15
+ # via
+ # -r requirements/edx/doc.txt
+ # -r requirements/edx/testing.txt
nltk==3.8.1
# via
# -r requirements/edx/doc.txt
diff --git a/requirements/edx/doc.txt b/requirements/edx/doc.txt
index bd6f40cb991f..4e3d0bfa84cb 100644
--- a/requirements/edx/doc.txt
+++ b/requirements/edx/doc.txt
@@ -870,6 +870,8 @@ newrelic==9.3.0
# via
# -r requirements/edx/base.txt
# edx-django-utils
+nh3==0.2.15
+ # via -r requirements/edx/base.txt
nltk==3.8.1
# via
# -r requirements/edx/base.txt
diff --git a/requirements/edx/kernel.in b/requirements/edx/kernel.in
index 744f1bd632ce..e360f607e89e 100644
--- a/requirements/edx/kernel.in
+++ b/requirements/edx/kernel.in
@@ -109,6 +109,7 @@ mako # Primary template language used for server-
Markdown # Convert text markup to HTML; used in capa problems, forums, and course wikis
mongoengine # Object-document mapper for MongoDB, used in the LMS dashboard
mysqlclient # Driver for the default production relational database
+nh3
nodeenv # Utility for managing Node.js environments; we use this for deployments and testing
oauthlib # OAuth specification support for authenticating via LTI or other Open edX services
olxcleaner
diff --git a/requirements/edx/testing.txt b/requirements/edx/testing.txt
index 358fb7633904..aff856a319ec 100644
--- a/requirements/edx/testing.txt
+++ b/requirements/edx/testing.txt
@@ -930,6 +930,8 @@ newrelic==9.3.0
# via
# -r requirements/edx/base.txt
# edx-django-utils
+nh3==0.2.15
+ # via -r requirements/edx/base.txt
nltk==3.8.1
# via
# -r requirements/edx/base.txt
diff --git a/xmodule/capa/inputtypes.py b/xmodule/capa/inputtypes.py
index 5decad4a2ca8..8dff57768688 100644
--- a/xmodule/capa/inputtypes.py
+++ b/xmodule/capa/inputtypes.py
@@ -47,7 +47,7 @@
import time
from datetime import datetime
-import bleach
+import nh3
import html5lib
import pyparsing
import six
@@ -800,7 +800,7 @@ def setup_code_response_rendering(self):
if self.status == 'incomplete':
self.status = 'queued'
self.queue_len = self.msg # lint-amnesty, pylint: disable=attribute-defined-outside-init
- self.msg = bleach.clean(self.submitted_msg)
+ self.msg = nh3.clean(self.submitted_msg)
def setup(self):
""" setup this input type """
diff --git a/xmodule/capa/tests/test_util.py b/xmodule/capa/tests/test_util.py
index 38f488a2befd..2fb24ccd3682 100644
--- a/xmodule/capa/tests/test_util.py
+++ b/xmodule/capa/tests/test_util.py
@@ -121,7 +121,7 @@ def test_compare_with_tolerance(self): # lint-amnesty, pylint: disable=too-many
def test_sanitize_html(self):
"""
- Test for html sanitization with bleach.
+ Test for html sanitization with nh3.
"""
allowed_tags = ['div', 'p', 'audio', 'pre', 'span']
for tag in allowed_tags:
@@ -142,7 +142,7 @@ def test_get_inner_html_from_xpath(self):
def test_remove_markup(self):
"""
- Test for markup removal with bleach.
+ Test for markup removal with nh3.
"""
assert remove_markup('The Truth is Out There & you need to find it') ==\
'The Truth is Out There & you need to find it'
diff --git a/xmodule/capa/util.py b/xmodule/capa/util.py
index 5af2a0dd6e99..418abcf5ce81 100644
--- a/xmodule/capa/util.py
+++ b/xmodule/capa/util.py
@@ -8,11 +8,10 @@
from cmath import isinf, isnan
from decimal import Decimal
-import bleach
+import nh3
from calc import evaluator
from lxml import etree
-from bleach.css_sanitizer import CSSSanitizer
from openedx.core.djangolib.markup import HTML
#-----------------------------------------------------------------------------
@@ -182,17 +181,16 @@ def sanitize_html(html_code):
Used to sanitize XQueue responses from Matlab.
"""
- attributes = bleach.ALLOWED_ATTRIBUTES.copy()
+ attributes = nh3.ALLOWED_ATTRIBUTES.copy()
attributes.update({
- '*': ['class', 'style', 'id'],
- 'audio': ['controls', 'autobuffer', 'autoplay', 'src'],
- 'img': ['src', 'width', 'height', 'class']
+ '*': {'class', 'style', 'id'},
+ 'audio': {'controls', 'autobuffer', 'autoplay', 'src'},
+ 'img': {'src', 'width', 'height', 'class'}
})
- output = bleach.clean(
+ output = nh3.clean(
html_code,
- protocols=bleach.ALLOWED_PROTOCOLS | {'data'},
- tags=bleach.ALLOWED_TAGS | {'div', 'p', 'audio', 'pre', 'img', 'span'},
- css_sanitizer=CSSSanitizer(allowed_css_properties=["white-space"]),
+ # protocols=nh3.ALLOWED_PROTOCOLS | {'data'},
+ tags=nh3.ALLOWED_TAGS | {'div', 'p', 'audio', 'pre', 'img', 'span'},
attributes=attributes
)
return output
@@ -215,12 +213,12 @@ def remove_markup(html):
"""
Return html with markup stripped and text HTML-escaped.
- >>> bleach.clean("Rock & Roll", tags=set(), strip=True)
+ >>> nh3.clean("Rock & Roll", tags=set(), strip_comments=True)
'Rock & Roll'
- >>> bleach.clean("Rock & Roll", tags=set(), strip=True)
+ >>> nh3.clean("Rock & Roll", tags=set(), strip_comments=True)
'Rock & Roll'
"""
- return HTML(bleach.clean(html, tags=set(), strip=True))
+ return HTML(nh3.clean(html, tags=set(), strip_comments=True))
def get_course_id_from_capa_block(capa_block):
diff --git a/xmodule/capa_block.py b/xmodule/capa_block.py
index 7b58b5aa9ab8..0108a86a8e91 100644
--- a/xmodule/capa_block.py
+++ b/xmodule/capa_block.py
@@ -13,7 +13,7 @@
import sys
import traceback
-from bleach.sanitizer import Cleaner
+import nh3
from django.conf import settings
from django.core.exceptions import ImproperlyConfigured
from django.utils.encoding import smart_str
@@ -557,7 +557,7 @@ def index_dictionary(self):
capa_content = re.sub(
r"(\s| |//)+",
" ",
- Cleaner(tags=[], strip=True).clean(capa_content)
+ nh3.clean(capa_content, tags=set())
)
capa_body = {
diff --git a/xmodule/library_content_block.py b/xmodule/library_content_block.py
index e4c4af267a1f..bd5dd23dff14 100644
--- a/xmodule/library_content_block.py
+++ b/xmodule/library_content_block.py
@@ -9,7 +9,7 @@
from copy import copy
from gettext import ngettext, gettext
-import bleach
+import nh3
from django.conf import settings
from django.core.exceptions import ObjectDoesNotExist, PermissionDenied
from django.utils.functional import classproperty
@@ -714,7 +714,7 @@ def source_library_values(self):
lib_tools = self.get_tools()
user_perms = self.runtime.service(self, 'studio_user_permissions')
all_libraries = [
- (key, bleach.clean(name)) for key, name in lib_tools.list_available_libraries()
+ (key, nh3.clean(name)) for key, name in lib_tools.list_available_libraries()
if user_perms.can_read(key) or self.source_library_id == str(key)
]
all_libraries.sort(key=lambda entry: entry[1]) # Sort by name
diff --git a/xmodule/lti_block.py b/xmodule/lti_block.py
index a463da79ef0b..9a5531601ba2 100644
--- a/xmodule/lti_block.py
+++ b/xmodule/lti_block.py
@@ -63,7 +63,7 @@
from unittest import mock
from urllib import parse
-import bleach
+import nh3
import oauthlib.oauth1
from django.conf import settings
from lxml import etree
@@ -468,7 +468,7 @@ def get_context(self):
# 'acronym': ['title'],
#
# This lets all plaintext through.
- sanitized_comment = bleach.clean(self.score_comment)
+ sanitized_comment = nh3.clean(self.score_comment)
return {
'input_fields': self.get_input_fields(),