diff --git a/src/main/java/org/jsoup/safety/Safelist.java b/src/main/java/org/jsoup/safety/Safelist.java
index cb5038d06d..c1a48edf26 100644
--- a/src/main/java/org/jsoup/safety/Safelist.java
+++ b/src/main/java/org/jsoup/safety/Safelist.java
@@ -12,12 +12,8 @@ Thank you to Ryan Grove (wonko.com) for the Ruby HTML cleaner http://github.com/
import org.jsoup.nodes.Attributes;
import org.jsoup.nodes.Element;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.Map;
-import java.util.Objects;
-import java.util.Set;
+import java.util.*;
+import java.util.regex.Pattern;
import static org.jsoup.internal.Normalizer.lowerCase;
@@ -67,11 +63,14 @@ XSS attack examples (that jsoup will safegaurd against the default Cleaner and S
*/
public class Safelist {
private static final String All = ":all";
+ private static final TagName AllTagName = new TagName(All);
+
private final Set tagNames; // tags allowed, lower case. e.g. [p, br, span]
private final Map> attributes; // tag -> attribute[]. allowed attributes [href] for a tag.
private final Map> enforcedAttributes; // always set these attribute values
private final Map>> protocols; // allowed URL protocols for attributes
private boolean preserveRelativeLinks; // option to preserve relative links
+ private Map> wildcardAttributes = new LinkedHashMap<>();
/**
This safelist allows only text nodes: any HTML Element or any Node other than a TextNode will be removed.
@@ -237,6 +236,12 @@ public Safelist(Safelist copy) {
protocols.put(protocolsEntry.getKey(), attributeProtocolsCopy);
}
preserveRelativeLinks = copy.preserveRelativeLinks;
+
+ // create deep-ish copy. (The 'Pattern' is not deep-copied.)
+ wildcardAttributes = new LinkedHashMap<>(copy.wildcardAttributes.size());
+ for (Map.Entry> entry : copy.wildcardAttributes.entrySet()) {
+ wildcardAttributes.put(entry.getKey(), new LinkedHashMap<>(entry.getValue()));
+ }
}
/**
@@ -274,6 +279,7 @@ public Safelist removeTags(String... tags) {
attributes.remove(tagName);
enforcedAttributes.remove(tagName);
protocols.remove(tagName);
+ wildcardAttributes.remove(tagName);
}
}
return this;
@@ -408,6 +414,93 @@ public Safelist removeEnforcedAttribute(String tag, String attribute) {
return this;
}
+ /**
+ * Add wildcard attributes
+ *
+ * The wildcard should be recognized by java.util.regex.Pattern. Multiple calls
+ * will result in only the last one being used.
+ *
+ *
+ * Examples:
+ *
+ * data-.+
- HTML 5
+ * aria-.+
- a widely used library
+ *
+ *
+ *
+ * @param tag The tag the attributes are for.
+ * @param wildcards wildcard pattern recognized by java.util.regex.Pattern
+ * @return this Safelist, for chaining.
+ */
+ public Safelist addWildcardAttributes(String tag, String... wildcards) {
+ TagName tagName = TagName.valueOf(tag);
+ for (String wildcard : wildcards) {
+ if (!wildcardAttributes.containsKey(tagName)) {
+ wildcardAttributes.put(tagName, new LinkedHashMap<>());
+ }
+ wildcardAttributes.get(tagName).put(wildcard, Pattern.compile("^" + wildcard + "$",
+ Pattern.CASE_INSENSITIVE + Pattern.UNICODE_CASE));
+ }
+
+ return this;
+ }
+
+ /**
+ * Remove wildcard attributes
+ *
+ * @param tag The tag the attributes are for.
+ * @param wildcards wildcards pattern recognized by java.util.regex.Pattern
+ * @return this Safelist, for chaining.
+ */
+ public Safelist removeWildcardAttributes(String tag, String... wildcards) {
+ TagName tagName = TagName.valueOf(tag);
+ for (String wildcard : wildcards) {
+ if (wildcardAttributes.containsKey(tagName)) {
+ if (wildcardAttributes.get(tagName).containsKey(wildcard)) {
+ wildcardAttributes.get(tagName).remove(wildcard);
+ }
+
+ // remove any empty entries
+ if (wildcardAttributes.get(tagName).isEmpty()) {
+ wildcardAttributes.remove(tagName);
+ }
+ }
+ }
+
+ return this;
+ }
+
+ /**
+ * Add wildcard global attributes
+ *
+ * The wildcard should be recognized by java.util.regex.Pattern. Multiple calls
+ * will result in only the last pattern being used.
+ *
+ *
+ * Examples:
+ *
+ * data-.+
- HTML 5
+ * aria-.+
- a widely used library
+ *
+ *
+ *
+ * @param wildcards wildcard pattern recognized by java.util.regex.Pattern
+ * @return this Safelist, for chaining.
+ */
+ public Safelist addWildcardGlobalAttributes(String... wildcards) {
+ return addWildcardAttributes(All, wildcards);
+ }
+
+ /**
+ * Remove wildcard global attributes
+ *
+ * @param wildcards wildcard pattern recognized by java.util.regex.Pattern
+ * @return this Safelist, for chaining.
+ */
+ public Safelist removeWildcardGlobalAttributes(String wildcards) {
+ return removeWildcardAttributes(All, wildcards);
+ }
+
/**
* Configure this Safelist to preserve relative links in an element's URL attribute, or convert them to absolute
* links. By default, this is false: URLs will be made absolute (e.g. start with an allowed protocol, like
@@ -541,6 +634,22 @@ public boolean isSafeAttribute(String tagName, Element el, Attribute attr) {
return expect.getIgnoreCase(attrKey).equals(attr.getValue());
}
}
+ // might be a wildcard, e.g., "data-.+"?
+ if (wildcardAttributes.containsKey(tag)) {
+ for (Pattern pattern : wildcardAttributes.get(tag).values()) {
+ if (pattern.matcher(attr.getKey()).matches()) {
+ return true;
+ }
+ }
+ }
+ // might be a global wildcard, e.g., "data-.+"?
+ if (wildcardAttributes.containsKey(AllTagName)) {
+ for (Pattern pattern : wildcardAttributes.get(AllTagName).values()) {
+ if (pattern.matcher(attr.getKey()).matches()) {
+ return true;
+ }
+ }
+ }
// no attributes defined for tag, try :all tag
return !tagName.equals(All) && isSafeAttribute(All, el, attr);
}
diff --git a/src/test/java/org/jsoup/safety/SafelistTest.java b/src/test/java/org/jsoup/safety/SafelistTest.java
index 796ddc7225..03c58fd255 100644
--- a/src/test/java/org/jsoup/safety/SafelistTest.java
+++ b/src/test/java/org/jsoup/safety/SafelistTest.java
@@ -12,6 +12,7 @@
public class SafelistTest {
private static final String TEST_TAG = "testTag";
private static final String TEST_ATTRIBUTE = "testAttribute";
+ private static final String TEST_DATA_ATTRIBUTE = "data-" + TEST_ATTRIBUTE;
private static final String TEST_SCHEME = "valid-scheme";
private static final String TEST_VALUE = TEST_SCHEME + "://testValue";
@@ -75,5 +76,25 @@ void noscriptIsBlocked() {
assertNull(safelist);
}
+ @Test
+ public void testAttributeWildcard() {
+ Safelist safelist1 = Safelist.none();
+ Safelist safelist2 = new Safelist(safelist1).addWildcardAttributes(TEST_TAG, "data-.+");
+ Attribute attr = new Attribute(TEST_DATA_ATTRIBUTE, TEST_VALUE);
+
+ assertFalse(safelist1.isSafeAttribute(TEST_TAG, null, attr));
+ assertTrue(safelist2.isSafeAttribute(TEST_TAG, null, attr));
+ assertFalse(safelist1.isSafeAttribute(TEST_TAG + "1", null, attr));
+ }
+ @Test
+ public void test8GlobalAttributeWildcard() {
+ Safelist safelist1 = Safelist.none();
+ Safelist safelist2 = new Safelist(safelist1).addWildcardGlobalAttributes("data-.+");
+ Attribute attr = new Attribute(TEST_DATA_ATTRIBUTE, TEST_VALUE);
+
+ assertFalse(safelist1.isSafeAttribute(TEST_TAG, null, attr));
+ assertTrue(safelist2.isSafeAttribute(TEST_TAG, null, attr));
+ assertTrue(safelist2.isSafeAttribute(TEST_TAG + "1", null, attr));
+ }
}