From b5f4b41f82f7b4975ffa57090d35478be0c15248 Mon Sep 17 00:00:00 2001 From: Michael Hamann Date: Fri, 14 Jan 2022 11:08:55 +0100 Subject: [PATCH 1/4] XCOMMONS-2349: Improve performance of AttributeFilter in HTMLCleaner * Store the XPathExpression in a thread-local variable. Note: this will lead to memory leaks if threads are re-used for other applications after stopping XWiki. --- .../internal/html/filter/AttributeFilter.java | 32 ++++++++++++------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/xwiki-commons-core/xwiki-commons-xml/src/main/java/org/xwiki/xml/internal/html/filter/AttributeFilter.java b/xwiki-commons-core/xwiki-commons-xml/src/main/java/org/xwiki/xml/internal/html/filter/AttributeFilter.java index f2eed9a7cd..530291862c 100644 --- a/xwiki-commons-core/xwiki-commons-xml/src/main/java/org/xwiki/xml/internal/html/filter/AttributeFilter.java +++ b/xwiki-commons-core/xwiki-commons-xml/src/main/java/org/xwiki/xml/internal/html/filter/AttributeFilter.java @@ -27,6 +27,7 @@ import javax.inject.Singleton; import javax.xml.xpath.XPath; import javax.xml.xpath.XPathConstants; +import javax.xml.xpath.XPathExpression; import javax.xml.xpath.XPathExpressionException; import javax.xml.xpath.XPathFactory; @@ -68,13 +69,30 @@ public class AttributeFilter extends AbstractHTMLFilter */ private static final String VERTICAL_ALIGN = "vertical-align"; + private final ThreadLocal attributeMatcher = ThreadLocal.withInitial(() -> { + StringBuilder xpathExpression = new StringBuilder(); + for (String attributeName : ATTRIBUTE_TO_CSS_PROPERTY.keySet()) { + if (xpathExpression.length() > 0) { + xpathExpression.append('|'); + } + xpathExpression.append("//@").append(attributeName); + } + + XPath xpath = XPathFactory.newInstance().newXPath(); + try { + return xpath.compile(xpathExpression.toString()); + } catch (XPathExpressionException e) { + return null; + } + }); + /** * The logger. */ @Inject private Logger logger; - { + static { ATTRIBUTE_TO_CSS_PROPERTY.put("align", "text-align"); ATTRIBUTE_TO_CSS_PROPERTY.put("valign", VERTICAL_ALIGN); ATTRIBUTE_TO_CSS_PROPERTY.put("bgcolor", "background-color"); @@ -83,18 +101,10 @@ public class AttributeFilter extends AbstractHTMLFilter @Override public void filter(Document document, Map cleaningParameters) { - StringBuilder xpathExpression = new StringBuilder(); - for (String attributeName : ATTRIBUTE_TO_CSS_PROPERTY.keySet()) { - if (xpathExpression.length() > 0) { - xpathExpression.append('|'); - } - xpathExpression.append("//@").append(attributeName); - } + NodeList attributes; - NodeList attributes = null; - XPath xpath = XPathFactory.newInstance().newXPath(); try { - attributes = (NodeList) xpath.evaluate(xpathExpression.toString(), document, XPathConstants.NODESET); + attributes = (NodeList) attributeMatcher.get().evaluate(document, XPathConstants.NODESET); } catch (XPathExpressionException e) { // Shouldn't happen. this.logger.error("Failed to apply the HTML attribute cleaning filter.", e); From b913a9bda103b7905623a6b2307c2a454595b90f Mon Sep 17 00:00:00 2001 From: Michael Hamann Date: Fri, 14 Jan 2022 12:03:12 +0100 Subject: [PATCH 2/4] XCOMMONS-2349: Improve performance of AttributeFilter in HTMLCleaner * Store the XPathFactory in the execution context. --- .../internal/html/filter/AttributeFilter.java | 56 ++++++++++++------- 1 file changed, 36 insertions(+), 20 deletions(-) diff --git a/xwiki-commons-core/xwiki-commons-xml/src/main/java/org/xwiki/xml/internal/html/filter/AttributeFilter.java b/xwiki-commons-core/xwiki-commons-xml/src/main/java/org/xwiki/xml/internal/html/filter/AttributeFilter.java index 530291862c..e0e2281912 100644 --- a/xwiki-commons-core/xwiki-commons-xml/src/main/java/org/xwiki/xml/internal/html/filter/AttributeFilter.java +++ b/xwiki-commons-core/xwiki-commons-xml/src/main/java/org/xwiki/xml/internal/html/filter/AttributeFilter.java @@ -27,7 +27,6 @@ import javax.inject.Singleton; import javax.xml.xpath.XPath; import javax.xml.xpath.XPathConstants; -import javax.xml.xpath.XPathExpression; import javax.xml.xpath.XPathExpressionException; import javax.xml.xpath.XPathFactory; @@ -37,6 +36,8 @@ import org.w3c.dom.Element; import org.w3c.dom.NodeList; import org.xwiki.component.annotation.Component; +import org.xwiki.context.Execution; +import org.xwiki.context.ExecutionContext; import org.xwiki.xml.html.HTMLConstants; import org.xwiki.xml.html.filter.AbstractHTMLFilter; @@ -69,29 +70,15 @@ public class AttributeFilter extends AbstractHTMLFilter */ private static final String VERTICAL_ALIGN = "vertical-align"; - private final ThreadLocal attributeMatcher = ThreadLocal.withInitial(() -> { - StringBuilder xpathExpression = new StringBuilder(); - for (String attributeName : ATTRIBUTE_TO_CSS_PROPERTY.keySet()) { - if (xpathExpression.length() > 0) { - xpathExpression.append('|'); - } - xpathExpression.append("//@").append(attributeName); - } - - XPath xpath = XPathFactory.newInstance().newXPath(); - try { - return xpath.compile(xpathExpression.toString()); - } catch (XPathExpressionException e) { - return null; - } - }); - /** * The logger. */ @Inject private Logger logger; + @Inject + private Execution execution; + static { ATTRIBUTE_TO_CSS_PROPERTY.put("align", "text-align"); ATTRIBUTE_TO_CSS_PROPERTY.put("valign", VERTICAL_ALIGN); @@ -101,10 +88,39 @@ public class AttributeFilter extends AbstractHTMLFilter @Override public void filter(Document document, Map cleaningParameters) { - NodeList attributes; + ExecutionContext executionContext = this.execution.getContext(); + XPathFactory xPathFactory; + + if (executionContext != null) { + xPathFactory = (XPathFactory) executionContext.getProperty(XPathFactory.class.getName()); + if (xPathFactory == null) { + synchronized (XPathFactory.class) { + xPathFactory = XPathFactory.newInstance(); + } + executionContext.newProperty(XPathFactory.class.getName()).type(XPathFactory.class).inherited() + .nonNull().initial(xPathFactory).makeFinal().declare(); + } + } else { + synchronized (XPathFactory.class) { + xPathFactory = XPathFactory.newInstance(); + } + } + + + StringBuilder xpathExpression = new StringBuilder(); + for (String attributeName : ATTRIBUTE_TO_CSS_PROPERTY.keySet()) { + if (xpathExpression.length() > 0) { + xpathExpression.append('|'); + } + xpathExpression.append("//@").append(attributeName); + } + + XPath xpath = xPathFactory.newXPath(); + + NodeList attributes; try { - attributes = (NodeList) attributeMatcher.get().evaluate(document, XPathConstants.NODESET); + attributes = (NodeList) xpath.evaluate(xpathExpression.toString(), document, XPathConstants.NODESET); } catch (XPathExpressionException e) { // Shouldn't happen. this.logger.error("Failed to apply the HTML attribute cleaning filter.", e); From 47e0887278b4956a619330ae0d13618bd54f625f Mon Sep 17 00:00:00 2001 From: Michael Hamann Date: Fri, 14 Jan 2022 15:20:14 +0100 Subject: [PATCH 3/4] XCOMMONS-2349: Improve performance of AttributeFilter in HTMLCleaner * Do not protect the call to XPathFactory.newInstance() as it seems thread-safe after all. --- .../xwiki/xml/internal/html/filter/AttributeFilter.java | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/xwiki-commons-core/xwiki-commons-xml/src/main/java/org/xwiki/xml/internal/html/filter/AttributeFilter.java b/xwiki-commons-core/xwiki-commons-xml/src/main/java/org/xwiki/xml/internal/html/filter/AttributeFilter.java index e0e2281912..cf6ebb39e1 100644 --- a/xwiki-commons-core/xwiki-commons-xml/src/main/java/org/xwiki/xml/internal/html/filter/AttributeFilter.java +++ b/xwiki-commons-core/xwiki-commons-xml/src/main/java/org/xwiki/xml/internal/html/filter/AttributeFilter.java @@ -95,16 +95,12 @@ public void filter(Document document, Map cleaningParameters) xPathFactory = (XPathFactory) executionContext.getProperty(XPathFactory.class.getName()); if (xPathFactory == null) { - synchronized (XPathFactory.class) { - xPathFactory = XPathFactory.newInstance(); - } + xPathFactory = XPathFactory.newInstance(); executionContext.newProperty(XPathFactory.class.getName()).type(XPathFactory.class).inherited() .nonNull().initial(xPathFactory).makeFinal().declare(); } } else { - synchronized (XPathFactory.class) { - xPathFactory = XPathFactory.newInstance(); - } + xPathFactory = XPathFactory.newInstance(); } From 2ff4e3a1456f52c0371560d9b4a1565a02360da6 Mon Sep 17 00:00:00 2001 From: Michael Hamann Date: Mon, 13 Jun 2022 15:09:02 +0200 Subject: [PATCH 4/4] XCOMMONS-2349: Improve performance of AttributeFilter in HTMLCleaner * Replace XPath by simple loop over all elements. * Make sure that replaced attributes are sorted lexicographically to guarantee deterministic results. --- .../internal/html/filter/AttributeFilter.java | 71 +++++-------------- 1 file changed, 18 insertions(+), 53 deletions(-) diff --git a/xwiki-commons-core/xwiki-commons-xml/src/main/java/org/xwiki/xml/internal/html/filter/AttributeFilter.java b/xwiki-commons-core/xwiki-commons-xml/src/main/java/org/xwiki/xml/internal/html/filter/AttributeFilter.java index cf6ebb39e1..1acc78d7b2 100644 --- a/xwiki-commons-core/xwiki-commons-xml/src/main/java/org/xwiki/xml/internal/html/filter/AttributeFilter.java +++ b/xwiki-commons-core/xwiki-commons-xml/src/main/java/org/xwiki/xml/internal/html/filter/AttributeFilter.java @@ -19,25 +19,18 @@ */ package org.xwiki.xml.internal.html.filter; -import java.util.HashMap; +import java.util.LinkedHashMap; import java.util.Map; -import javax.inject.Inject; import javax.inject.Named; import javax.inject.Singleton; -import javax.xml.xpath.XPath; -import javax.xml.xpath.XPathConstants; -import javax.xml.xpath.XPathExpressionException; -import javax.xml.xpath.XPathFactory; -import org.slf4j.Logger; import org.w3c.dom.Attr; import org.w3c.dom.Document; import org.w3c.dom.Element; +import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.xwiki.component.annotation.Component; -import org.xwiki.context.Execution; -import org.xwiki.context.ExecutionContext; import org.xwiki.xml.html.HTMLConstants; import org.xwiki.xml.html.filter.AbstractHTMLFilter; @@ -63,68 +56,40 @@ public class AttributeFilter extends AbstractHTMLFilter /** * The map between HTML attribute names and the corresponding CSS property name. */ - private static final Map ATTRIBUTE_TO_CSS_PROPERTY = new HashMap<>(); + private static final Map ATTRIBUTE_TO_CSS_PROPERTY = new LinkedHashMap<>(); /** * The 'vertical-align' CSS property. */ private static final String VERTICAL_ALIGN = "vertical-align"; - /** - * The logger. - */ - @Inject - private Logger logger; - - @Inject - private Execution execution; - static { ATTRIBUTE_TO_CSS_PROPERTY.put("align", "text-align"); - ATTRIBUTE_TO_CSS_PROPERTY.put("valign", VERTICAL_ALIGN); ATTRIBUTE_TO_CSS_PROPERTY.put("bgcolor", "background-color"); + ATTRIBUTE_TO_CSS_PROPERTY.put("valign", VERTICAL_ALIGN); } @Override public void filter(Document document, Map cleaningParameters) { - ExecutionContext executionContext = this.execution.getContext(); - XPathFactory xPathFactory; - - if (executionContext != null) { - xPathFactory = (XPathFactory) executionContext.getProperty(XPathFactory.class.getName()); - - if (xPathFactory == null) { - xPathFactory = XPathFactory.newInstance(); - executionContext.newProperty(XPathFactory.class.getName()).type(XPathFactory.class).inherited() - .nonNull().initial(xPathFactory).makeFinal().declare(); + NodeList nodeList = document.getElementsByTagName("*"); + for (int i = 0, len = nodeList.getLength(); i < len; i++) { + Node node = nodeList.item(i); + if (node instanceof Element) { + filterElement((Element) node); } - } else { - xPathFactory = XPathFactory.newInstance(); } + } - - StringBuilder xpathExpression = new StringBuilder(); - for (String attributeName : ATTRIBUTE_TO_CSS_PROPERTY.keySet()) { - if (xpathExpression.length() > 0) { - xpathExpression.append('|'); + private void filterElement(Element element) + { + if (element.hasAttributes()) { + for (String attrName : ATTRIBUTE_TO_CSS_PROPERTY.keySet()) { + Attr attribute = element.getAttributeNode(attrName); + if (attribute != null) { + filterAttribute(attribute); + } } - xpathExpression.append("//@").append(attributeName); - } - - XPath xpath = xPathFactory.newXPath(); - - NodeList attributes; - try { - attributes = (NodeList) xpath.evaluate(xpathExpression.toString(), document, XPathConstants.NODESET); - } catch (XPathExpressionException e) { - // Shouldn't happen. - this.logger.error("Failed to apply the HTML attribute cleaning filter.", e); - return; - } - - for (int i = 0; i < attributes.getLength(); i++) { - filterAttribute((Attr) attributes.item(i)); } }