diff --git a/appveyor.yml b/appveyor.yml
index 969a302..b869394 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -1,4 +1,4 @@
-version: 8.1.{build}
+version: 9.0.{build}
skip_tags: true
image: Visual Studio 2022
environment:
@@ -19,7 +19,7 @@ test_script:
dotnet sonarscanner begin /k:"mganss_HtmlSanitizer" /v:$env:APPVEYOR_BUILD_VERSION /o:"mganss-github" /d:sonar.host.url="https://sonarcloud.io" /d:sonar.login="$env:sonar_token" /d:sonar.cs.opencover.reportsPaths="$($env:APPVEYOR_BUILD_FOLDER)\coverage.xml" /d:sonar.coverage.exclusions="**/Program.cs"
dotnet build
}
- - dotnet test /p:CollectCoverage=true test\HtmlSanitizer.Tests\HtmlSanitizer.Tests.csproj -f net6.0
+ - dotnet test /p:CollectCoverage=true test\HtmlSanitizer.Tests\HtmlSanitizer.Tests.csproj -f net8.0
- ps: cp coverage.*.xml ./coverage.xml
- ps: |
if (-not $env:APPVEYOR_PULL_REQUEST_NUMBER) {
diff --git a/src/HtmlSanitizer/EventArgs.cs b/src/HtmlSanitizer/EventArgs.cs
index a36c769..8f2ba99 100644
--- a/src/HtmlSanitizer/EventArgs.cs
+++ b/src/HtmlSanitizer/EventArgs.cs
@@ -5,271 +5,270 @@
using System.Collections.Generic;
using System.ComponentModel;
-namespace Ganss.Xss
+namespace Ganss.Xss;
+
+///
+/// Provides data for the event.
+///
+///
+/// Initializes a new instance of the class.
+///
+public class PostProcessDomEventArgs(IHtmlDocument document) : EventArgs
{
///
- /// Provides data for the event.
+ /// Gets the document.
///
- ///
- /// Initializes a new instance of the class.
- ///
- public class PostProcessDomEventArgs(IHtmlDocument document) : EventArgs
- {
- ///
- /// Gets the document.
- ///
- ///
- /// The document.
- ///
- public IHtmlDocument Document { get; private set; } = document;
- }
+ ///
+ /// The document.
+ ///
+ public IHtmlDocument Document { get; private set; } = document;
+}
+///
+/// Provides data for the event.
+///
+///
+/// Initializes a new instance of the class.
+///
+public class PostProcessNodeEventArgs(IHtmlDocument document, INode node) : EventArgs
+{
///
- /// Provides data for the event.
+ /// Gets the document.
///
- ///
- /// Initializes a new instance of the class.
- ///
- public class PostProcessNodeEventArgs(IHtmlDocument document, INode node) : EventArgs
- {
- ///
- /// Gets the document.
- ///
- ///
- /// The document.
- ///
- public IHtmlDocument Document { get; private set; } = document;
+ ///
+ /// The document.
+ ///
+ public IHtmlDocument Document { get; private set; } = document;
- ///
- /// Gets the DOM node to be processed.
- ///
- ///
- /// The DOM node.
- ///
- public INode Node { get; private set; } = node;
+ ///
+ /// Gets the DOM node to be processed.
+ ///
+ ///
+ /// The DOM node.
+ ///
+ public INode Node { get; private set; } = node;
- ///
- /// Gets the replacement nodes. Leave empty if no replacement should occur.
- ///
- ///
- /// The replacement nodes.
- ///
- public ICollection ReplacementNodes { get; private set; } = new List();
- }
+ ///
+ /// Gets the replacement nodes. Leave empty if no replacement should occur.
+ ///
+ ///
+ /// The replacement nodes.
+ ///
+ public ICollection ReplacementNodes { get; private set; } = [];
+}
+///
+/// Provides data for the event.
+///
+///
+/// Initializes a new instance of the class.
+///
+/// The element to be removed.
+/// The reason why the tag will be removed.
+public class RemovingTagEventArgs(IElement tag, RemoveReason reason) : CancelEventArgs
+{
///
- /// Provides data for the event.
+ /// Gets the tag to be removed.
///
- ///
- /// Initializes a new instance of the class.
- ///
- /// The element to be removed.
- /// The reason why the tag will be removed.
- public class RemovingTagEventArgs(IElement tag, RemoveReason reason) : CancelEventArgs
- {
- ///
- /// Gets the tag to be removed.
- ///
- ///
- /// The tag.
- ///
- public IElement Tag { get; private set; } = tag;
+ ///
+ /// The tag.
+ ///
+ public IElement Tag { get; private set; } = tag;
- ///
- /// Gets the reason why the tag will be removed.
- ///
- ///
- /// The reason.
- ///
- public RemoveReason Reason { get; private set; } = reason;
- }
+ ///
+ /// Gets the reason why the tag will be removed.
+ ///
+ ///
+ /// The reason.
+ ///
+ public RemoveReason Reason { get; private set; } = reason;
+}
+///
+/// Provides data for the event.
+///
+///
+/// Initializes a new instance of the class.
+///
+/// The element containing the attribute.
+/// The attribute to be removed.
+/// The reason why the attribute will be removed.
+public class RemovingAttributeEventArgs(IElement tag, IAttr attribute, RemoveReason reason) : CancelEventArgs
+{
///
- /// Provides data for the event.
+ /// Gets the tag containing the attribute to be removed.
///
- ///
- /// Initializes a new instance of the class.
- ///
- /// The element containing the attribute.
- /// The attribute to be removed.
- /// The reason why the attribute will be removed.
- public class RemovingAttributeEventArgs(IElement tag, IAttr attribute, RemoveReason reason) : CancelEventArgs
- {
- ///
- /// Gets the tag containing the attribute to be removed.
- ///
- ///
- /// The tag.
- ///
- public IElement Tag { get; private set; } = tag;
+ ///
+ /// The tag.
+ ///
+ public IElement Tag { get; private set; } = tag;
- ///
- /// Gets the attribute to be removed.
- ///
- ///
- /// The attribute.
- ///
- public IAttr Attribute { get; private set; } = attribute;
+ ///
+ /// Gets the attribute to be removed.
+ ///
+ ///
+ /// The attribute.
+ ///
+ public IAttr Attribute { get; private set; } = attribute;
- ///
- /// Gets the reason why the attribute will be removed.
- ///
- ///
- /// The reason.
- ///
- public RemoveReason Reason { get; private set; } = reason;
- }
+ ///
+ /// Gets the reason why the attribute will be removed.
+ ///
+ ///
+ /// The reason.
+ ///
+ public RemoveReason Reason { get; private set; } = reason;
+}
+///
+/// Provides data for the event.
+///
+///
+/// Initializes a new instance of the class.
+///
+/// The element containing the attribute.
+/// The style to be removed.
+/// The reason why the attribute will be removed.
+public class RemovingStyleEventArgs(IElement tag, ICssProperty style, RemoveReason reason) : CancelEventArgs
+{
///
- /// Provides data for the event.
+ /// Gets the tag containing the style to be removed.
///
- ///
- /// Initializes a new instance of the class.
- ///
- /// The element containing the attribute.
- /// The style to be removed.
- /// The reason why the attribute will be removed.
- public class RemovingStyleEventArgs(IElement tag, ICssProperty style, RemoveReason reason) : CancelEventArgs
- {
- ///
- /// Gets the tag containing the style to be removed.
- ///
- ///
- /// The tag.
- ///
- public IElement Tag { get; private set; } = tag;
+ ///
+ /// The tag.
+ ///
+ public IElement Tag { get; private set; } = tag;
- ///
- /// Gets the style to be removed.
- ///
- ///
- /// The style.
- ///
- public ICssProperty Style { get; private set; } = style;
+ ///
+ /// Gets the style to be removed.
+ ///
+ ///
+ /// The style.
+ ///
+ public ICssProperty Style { get; private set; } = style;
- ///
- /// Gets the reason why the style will be removed.
- ///
- ///
- /// The reason.
- ///
- public RemoveReason Reason { get; private set; } = reason;
- }
+ ///
+ /// Gets the reason why the style will be removed.
+ ///
+ ///
+ /// The reason.
+ ///
+ public RemoveReason Reason { get; private set; } = reason;
+}
+///
+/// Provides data for the event.
+///
+///
+/// Initializes a new instance of the class.
+///
+/// The element containing the attribute.
+/// The rule to be removed.
+public class RemovingAtRuleEventArgs(IElement tag, ICssRule rule) : CancelEventArgs
+{
///
- /// Provides data for the event.
+ /// Gets the tag containing the at-rule to be removed.
///
- ///
- /// Initializes a new instance of the class.
- ///
- /// The element containing the attribute.
- /// The rule to be removed.
- public class RemovingAtRuleEventArgs(IElement tag, ICssRule rule) : CancelEventArgs
- {
- ///
- /// Gets the tag containing the at-rule to be removed.
- ///
- ///
- /// The tag.
- ///
- public IElement Tag { get; private set; } = tag;
+ ///
+ /// The tag.
+ ///
+ public IElement Tag { get; private set; } = tag;
- ///
- /// Gets the rule to be removed.
- ///
- ///
- /// The rule.
- ///
- public ICssRule Rule { get; private set; } = rule;
- }
+ ///
+ /// Gets the rule to be removed.
+ ///
+ ///
+ /// The rule.
+ ///
+ public ICssRule Rule { get; private set; } = rule;
+}
+///
+/// Provides data for the event.
+///
+///
+/// Initializes a new instance of the class.
+///
+/// The comment to be removed.
+public class RemovingCommentEventArgs(IComment comment) : CancelEventArgs
+{
///
- /// Provides data for the event.
+ /// Gets the comment node to be removed.
///
- ///
- /// Initializes a new instance of the class.
- ///
- /// The comment to be removed.
- public class RemovingCommentEventArgs(IComment comment) : CancelEventArgs
- {
- ///
- /// Gets the comment node to be removed.
- ///
- ///
- /// The comment node.
- ///
- public IComment Comment { get; private set; } = comment;
- }
+ ///
+ /// The comment node.
+ ///
+ public IComment Comment { get; private set; } = comment;
+}
+///
+/// Provides data for the event.
+///
+///
+/// Initializes a new instance of the class.
+///
+/// The element containing the attribute.
+/// The CSS class to be removed.
+/// The reason why the attribute will be removed.
+public class RemovingCssClassEventArgs(IElement tag, string cssClass, RemoveReason reason) : CancelEventArgs
+{
///
- /// Provides data for the event.
+ /// Gets the tag containing the CSS class to be removed.
///
- ///
- /// Initializes a new instance of the class.
- ///
- /// The element containing the attribute.
- /// The CSS class to be removed.
- /// The reason why the attribute will be removed.
- public class RemovingCssClassEventArgs(IElement tag, string cssClass, RemoveReason reason) : CancelEventArgs
- {
- ///
- /// Gets the tag containing the CSS class to be removed.
- ///
- ///
- /// The tag.
- ///
- public IElement Tag { get; private set; } = tag;
+ ///
+ /// The tag.
+ ///
+ public IElement Tag { get; private set; } = tag;
- ///
- /// Gets the CSS class to be removed.
- ///
- ///
- /// The CSS class.
- ///
- public string CssClass { get; private set; } = cssClass;
+ ///
+ /// Gets the CSS class to be removed.
+ ///
+ ///
+ /// The CSS class.
+ ///
+ public string CssClass { get; private set; } = cssClass;
- ///
- /// Gets the reason why the CSS class will be removed.
- ///
- ///
- /// The reason.
- ///
- public RemoveReason Reason { get; private set; } = reason;
- }
+ ///
+ /// Gets the reason why the CSS class will be removed.
+ ///
+ ///
+ /// The reason.
+ ///
+ public RemoveReason Reason { get; private set; } = reason;
+}
+///
+/// Provides data for the event.
+///
+///
+/// Initializes a new instance of the class.
+///
+/// The tag containing the URI being sanitized.
+/// The original URL.
+/// The sanitized URL.
+public class FilterUrlEventArgs(IElement tag, string originalUrl, string? sanitizedUrl = null) : EventArgs
+{
///
- /// Provides data for the event.
+ /// Gets the original URL.
///
- ///
- /// Initializes a new instance of the class.
- ///
- /// The tag containing the URI being sanitized.
- /// The original URL.
- /// The sanitized URL.
- public class FilterUrlEventArgs(IElement tag, string originalUrl, string? sanitizedUrl = null) : EventArgs
- {
- ///
- /// Gets the original URL.
- ///
- ///
- /// The original URL.
- ///
- public string OriginalUrl { get; private set; } = originalUrl;
+ ///
+ /// The original URL.
+ ///
+ public string OriginalUrl { get; private set; } = originalUrl;
- ///
- /// Gets or sets the sanitized URL.
- ///
- ///
- /// The sanitized URL. If it is null, it will be removed.
- ///
- public string? SanitizedUrl { get; set; } = sanitizedUrl;
+ ///
+ /// Gets or sets the sanitized URL.
+ ///
+ ///
+ /// The sanitized URL. If it is null, it will be removed.
+ ///
+ public string? SanitizedUrl { get; set; } = sanitizedUrl;
- ///
- /// Gets the tag containing the URI being sanitized.
- ///
- ///
- /// The tag.
- ///
- public IElement Tag { get; private set; } = tag;
- }
+ ///
+ /// Gets the tag containing the URI being sanitized.
+ ///
+ ///
+ /// The tag.
+ ///
+ public IElement Tag { get; private set; } = tag;
}
diff --git a/src/HtmlSanitizer/HtmlFormatter.cs b/src/HtmlSanitizer/HtmlFormatter.cs
index 425c594..7c0a55f 100644
--- a/src/HtmlSanitizer/HtmlFormatter.cs
+++ b/src/HtmlSanitizer/HtmlFormatter.cs
@@ -1,74 +1,69 @@
-using AngleSharp;
-using AngleSharp.Html;
+using AngleSharp.Html;
using AngleSharp.Dom;
using System;
-using System.Collections.Generic;
-using System.Linq;
using System.Text;
-using System.Threading.Tasks;
-namespace Ganss.Xss
+namespace Ganss.Xss;
+
+///
+/// HTML5 markup formatter. Identical to except for < and > which are
+/// encoded in attribute values.
+///
+public class HtmlFormatter: HtmlMarkupFormatter
{
///
- /// HTML5 markup formatter. Identical to except for < and > which are
- /// encoded in attribute values.
+ /// An instance of .
///
- public class HtmlFormatter: HtmlMarkupFormatter
- {
- ///
- /// An instance of .
- ///
- new public static readonly HtmlFormatter Instance = new ();
+ new public static readonly HtmlFormatter Instance = new ();
- // disable XML comments warnings
- #pragma warning disable 1591
+ // disable XML comments warnings
+ #pragma warning disable 1591
- protected override string Attribute(IAttr attr)
- {
- var namespaceUri = attr.NamespaceUri;
- var localName = attr.LocalName;
- var value = attr.Value;
- var temp = new StringBuilder();
+ protected override string Attribute(IAttr attr)
+ {
+ var namespaceUri = attr.NamespaceUri;
+ var localName = attr.LocalName;
+ var value = attr.Value;
+ var temp = new StringBuilder();
- if (String.IsNullOrEmpty(namespaceUri))
- {
- temp.Append(localName);
- }
- else if (namespaceUri == NamespaceNames.XmlUri)
- {
- temp.Append(NamespaceNames.XmlPrefix).Append(':').Append(localName);
- }
- else if (namespaceUri == NamespaceNames.XLinkUri)
- {
- temp.Append(NamespaceNames.XLinkPrefix).Append(':').Append(localName);
- }
- else if (namespaceUri == NamespaceNames.XmlNsUri)
- {
- temp.Append(XmlNamespaceLocalName(localName));
- }
- else
- {
- temp.Append(attr.Name);
- }
+ if (String.IsNullOrEmpty(namespaceUri))
+ {
+ temp.Append(localName);
+ }
+ else if (namespaceUri == NamespaceNames.XmlUri)
+ {
+ temp.Append(NamespaceNames.XmlPrefix).Append(':').Append(localName);
+ }
+ else if (namespaceUri == NamespaceNames.XLinkUri)
+ {
+ temp.Append(NamespaceNames.XLinkPrefix).Append(':').Append(localName);
+ }
+ else if (namespaceUri == NamespaceNames.XmlNsUri)
+ {
+ temp.Append(XmlNamespaceLocalName(localName));
+ }
+ else
+ {
+ temp.Append(attr.Name);
+ }
- temp.Append('=').Append('"');
+ temp.Append('=').Append('"');
- for (var i = 0; i < value.Length; i++)
+ for (var i = 0; i < value.Length; i++)
+ {
+ switch (value[i])
{
- switch (value[i])
- {
- case '&': temp.Append("&"); break;
- case '\u00a0': temp.Append(" "); break;
- case '"': temp.Append("""); break;
- case '<': temp.Append("<"); break;
- case '>': temp.Append(">"); break;
- default: temp.Append(value[i]); break;
- }
+ case '&': temp.Append("&"); break;
+ case '\u00a0': temp.Append(" "); break;
+ case '"': temp.Append("""); break;
+ case '<': temp.Append("<"); break;
+ case '>': temp.Append(">"); break;
+ default: temp.Append(value[i]); break;
}
-
- return temp.Append('"').ToString();
}
- #pragma warning restore 1591
+ return temp.Append('"').ToString();
}
+
+ #pragma warning restore 1591
}
diff --git a/src/HtmlSanitizer/HtmlSanitizer.cs b/src/HtmlSanitizer/HtmlSanitizer.cs
index d612581..a802786 100644
--- a/src/HtmlSanitizer/HtmlSanitizer.cs
+++ b/src/HtmlSanitizer/HtmlSanitizer.cs
@@ -13,969 +13,968 @@
using System.Text;
using System.Text.RegularExpressions;
-namespace Ganss.Xss
+namespace Ganss.Xss;
+
+///
+/// Cleans HTML documents and fragments from constructs that can lead to XSS attacks.
+///
+///
+/// XSS attacks can occur at several levels within an HTML document or fragment:
+///
+/// HTML tags (e.g. the <script> tag)
+/// HTML attributes (e.g. the "onload" attribute)
+/// CSS styles (url property values)
+/// malformed HTML or HTML that exploits parser bugs in specific browsers
+///
+///
+/// The HtmlSanitizer class addresses all of these possible attack vectors by using a sophisticated HTML parser (AngleSharp).
+///
+///
+/// In order to facilitate different use cases, HtmlSanitizer can be customized at the levels mentioned above:
+///
+/// You can specify the allowed HTML tags through the property . All other tags will be stripped.
+/// You can specify the allowed HTML attributes through the property . All other attributes will be stripped.
+/// You can specify the allowed CSS property names through the property . All other styles will be stripped.
+/// You can specify the allowed URI schemes through the property . All other URIs will be stripped.
+/// You can specify the HTML attributes that contain URIs (such as "src", "href" etc.) through the property .
+///
+///
+///
+///
+///
+/// alert('xss')
"
+/// ]]>
+///
+///
+public class HtmlSanitizer : IHtmlSanitizer
{
+ private const string StyleAttributeName = "style";
+
+ // from http://genshi.edgewall.org/
+ private static readonly Regex CssUnicodeEscapes = new(@"\\([0-9a-fA-F]{1,6})\s?|\\([^\r\n\f0-9a-fA-F'""{};:()#*])", RegexOptions.Compiled);
+ private static readonly Regex CssComments = new(@"/\*.*?\*/", RegexOptions.Compiled);
+ // IE6
+ private static readonly Regex CssExpression = new(@"[eE\uFF25\uFF45][xX\uFF38\uFF58][pP\uFF30\uFF50][rR\u0280\uFF32\uFF52][eE\uFF25\uFF45][sS\uFF33\uFF53]{2}[iI\u026A\uFF29\uFF49][oO\uFF2F\uFF4F][nN\u0274\uFF2E\uFF4E]", RegexOptions.Compiled);
+ private static readonly Regex CssUrl = new(@"[Uu][Rr\u0280][Ll\u029F]\((['""]?)([^'"")]+)(['""]?)", RegexOptions.Compiled);
+ private static readonly Regex WhitespaceRegex = new(@"\s*", RegexOptions.Compiled);
+ private static readonly IConfiguration defaultConfiguration = Configuration.Default.WithCss(new CssParserOptions
+ {
+ IsIncludingUnknownDeclarations = true,
+ IsIncludingUnknownRules = true,
+ IsToleratingInvalidSelectors = true,
+ });
+
+ private static readonly HtmlParser defaultHtmlParser = new(new HtmlParserOptions { IsScripting = true }, BrowsingContext.New(defaultConfiguration));
+
///
- /// Cleans HTML documents and fragments from constructs that can lead to XSS attacks.
- ///
- ///
- /// XSS attacks can occur at several levels within an HTML document or fragment:
- ///
- /// HTML tags (e.g. the <script> tag)
- /// HTML attributes (e.g. the "onload" attribute)
- /// CSS styles (url property values)
- /// malformed HTML or HTML that exploits parser bugs in specific browsers
- ///
- ///
- /// The HtmlSanitizer class addresses all of these possible attack vectors by using a sophisticated HTML parser (AngleSharp).
- ///
- ///
- /// In order to facilitate different use cases, HtmlSanitizer can be customized at the levels mentioned above:
- ///
- /// You can specify the allowed HTML tags through the property . All other tags will be stripped.
- /// You can specify the allowed HTML attributes through the property . All other attributes will be stripped.
- /// You can specify the allowed CSS property names through the property . All other styles will be stripped.
- /// You can specify the allowed URI schemes through the property . All other URIs will be stripped.
- /// You can specify the HTML attributes that contain URIs (such as "src", "href" etc.) through the property .
- ///
- ///
- ///
- ///
- ///
- /// alert('xss')
"
- /// ]]>
- ///
- ///
- public class HtmlSanitizer : IHtmlSanitizer
- {
- private const string StyleAttributeName = "style";
-
- // from http://genshi.edgewall.org/
- private static readonly Regex CssUnicodeEscapes = new(@"\\([0-9a-fA-F]{1,6})\s?|\\([^\r\n\f0-9a-fA-F'""{};:()#*])", RegexOptions.Compiled);
- private static readonly Regex CssComments = new(@"/\*.*?\*/", RegexOptions.Compiled);
- // IE6
- private static readonly Regex CssExpression = new(@"[eE\uFF25\uFF45][xX\uFF38\uFF58][pP\uFF30\uFF50][rR\u0280\uFF32\uFF52][eE\uFF25\uFF45][sS\uFF33\uFF53]{2}[iI\u026A\uFF29\uFF49][oO\uFF2F\uFF4F][nN\u0274\uFF2E\uFF4E]", RegexOptions.Compiled);
- private static readonly Regex CssUrl = new(@"[Uu][Rr\u0280][Ll\u029F]\((['""]?)([^'"")]+)(['""]?)", RegexOptions.Compiled);
- private static readonly Regex WhitespaceRegex = new(@"\s*", RegexOptions.Compiled);
- private static readonly IConfiguration defaultConfiguration = Configuration.Default.WithCss(new CssParserOptions
- {
- IsIncludingUnknownDeclarations = true,
- IsIncludingUnknownRules = true,
- IsToleratingInvalidSelectors = true,
- });
+ /// Initializes a new instance of the class
+ /// with the default options.
+ ///
+ public HtmlSanitizer()
+ {
+ AllowedTags = new HashSet(HtmlSanitizerDefaults.AllowedTags, StringComparer.OrdinalIgnoreCase);
+ AllowedSchemes = new HashSet(HtmlSanitizerDefaults.AllowedSchemes, StringComparer.OrdinalIgnoreCase);
+ AllowedAttributes = new HashSet(HtmlSanitizerDefaults.AllowedAttributes, StringComparer.OrdinalIgnoreCase);
+ UriAttributes = new HashSet(HtmlSanitizerDefaults.UriAttributes, StringComparer.OrdinalIgnoreCase);
+ AllowedCssProperties = new HashSet(HtmlSanitizerDefaults.AllowedCssProperties, StringComparer.OrdinalIgnoreCase);
+ AllowedAtRules = new HashSet(HtmlSanitizerDefaults.AllowedAtRules);
+ AllowedClasses = new HashSet(HtmlSanitizerDefaults.AllowedClasses);
+ }
- private static readonly HtmlParser defaultHtmlParser = new(new HtmlParserOptions { IsScripting = true }, BrowsingContext.New(defaultConfiguration));
+ ///
+ /// Initializes a new instance of the class
+ /// with the given options.
+ ///
+ /// Options to control the sanitizing.
+ public HtmlSanitizer(HtmlSanitizerOptions options)
+ {
+ AllowedTags = new HashSet(options.AllowedTags, StringComparer.OrdinalIgnoreCase);
+ AllowedSchemes = new HashSet(options.AllowedSchemes, StringComparer.OrdinalIgnoreCase);
+ AllowedAttributes = new HashSet(options.AllowedAttributes, StringComparer.OrdinalIgnoreCase);
+ UriAttributes = new HashSet(options.UriAttributes, StringComparer.OrdinalIgnoreCase);
+ AllowedClasses = new HashSet(options.AllowedCssClasses, StringComparer.OrdinalIgnoreCase);
+ AllowedCssProperties = new HashSet(options.AllowedCssProperties, StringComparer.OrdinalIgnoreCase);
+ AllowedAtRules = new HashSet(options.AllowedAtRules);
+ AllowCssCustomProperties = options.AllowCssCustomProperties;
+ AllowDataAttributes = options.AllowDataAttributes;
+ }
- ///
- /// Initializes a new instance of the class
- /// with the default options.
- ///
- public HtmlSanitizer()
- {
- AllowedTags = new HashSet(HtmlSanitizerDefaults.AllowedTags, StringComparer.OrdinalIgnoreCase);
- AllowedSchemes = new HashSet(HtmlSanitizerDefaults.AllowedSchemes, StringComparer.OrdinalIgnoreCase);
- AllowedAttributes = new HashSet(HtmlSanitizerDefaults.AllowedAttributes, StringComparer.OrdinalIgnoreCase);
- UriAttributes = new HashSet(HtmlSanitizerDefaults.UriAttributes, StringComparer.OrdinalIgnoreCase);
- AllowedCssProperties = new HashSet(HtmlSanitizerDefaults.AllowedCssProperties, StringComparer.OrdinalIgnoreCase);
- AllowedAtRules = new HashSet(HtmlSanitizerDefaults.AllowedAtRules);
- AllowedClasses = new HashSet(HtmlSanitizerDefaults.AllowedClasses);
- }
+ ///
+ /// Gets or sets the default method that encodes comments.
+ ///
+ public Action EncodeComment { get; set; } = DefaultEncodeComment;
- ///
- /// Initializes a new instance of the class
- /// with the given options.
- ///
- /// Options to control the sanitizing.
- public HtmlSanitizer(HtmlSanitizerOptions options)
- {
- AllowedTags = new HashSet(options.AllowedTags, StringComparer.OrdinalIgnoreCase);
- AllowedSchemes = new HashSet(options.AllowedSchemes, StringComparer.OrdinalIgnoreCase);
- AllowedAttributes = new HashSet(options.AllowedAttributes, StringComparer.OrdinalIgnoreCase);
- UriAttributes = new HashSet(options.UriAttributes, StringComparer.OrdinalIgnoreCase);
- AllowedClasses = new HashSet(options.AllowedCssClasses, StringComparer.OrdinalIgnoreCase);
- AllowedCssProperties = new HashSet(options.AllowedCssProperties, StringComparer.OrdinalIgnoreCase);
- AllowedAtRules = new HashSet(options.AllowedAtRules);
- AllowCssCustomProperties = options.AllowCssCustomProperties;
- AllowDataAttributes = options.AllowDataAttributes;
- }
+ ///
+ /// Gets or sets the default method that encodes literal text content.
+ ///
+ public Action EncodeLiteralTextElementContent { get; set; } = DefaultEncodeLiteralTextElementContent;
- ///
- /// Gets or sets the default method that encodes comments.
- ///
- public Action EncodeComment { get; set; } = DefaultEncodeComment;
-
- ///
- /// Gets or sets the default method that encodes literal text content.
- ///
- public Action EncodeLiteralTextElementContent { get; set; } = DefaultEncodeLiteralTextElementContent;
-
- ///
- /// Gets or sets the default value indicating whether to keep child nodes of elements that are removed. Default is false.
- ///
- public static bool DefaultKeepChildNodes { get; set; } = false;
-
- ///
- /// Gets or sets a value indicating whether to keep child nodes of elements that are removed. Default is .
- ///
- public bool KeepChildNodes { get; set; } = DefaultKeepChildNodes;
-
- ///
- /// Gets or sets the default object that creates the parser used for parsing the input.
- ///
- public static Func DefaultHtmlParserFactory { get; set; } = () => defaultHtmlParser;
-
- ///
- /// Gets or sets the object the creates the parser used for parsing the input.
- ///
- public Func HtmlParserFactory { get; set; } = DefaultHtmlParserFactory;
-
- ///
- /// Gets or sets the default object used for generating output. Default is .
- ///
- public static IMarkupFormatter DefaultOutputFormatter { get; set; } = HtmlFormatter.Instance;
-
- ///
- /// Gets or sets the object used for generating output. Default is .
- ///
- public IMarkupFormatter OutputFormatter { get; set; } = DefaultOutputFormatter;
-
- ///
- /// Gets or sets the default object used for generating CSS output. Default is .
- ///
- public static IStyleFormatter DefaultStyleFormatter { get; set; } = CssStyleFormatter.Instance;
-
- ///
- /// Gets or sets the object used for generating CSS output. Default is .
- ///
- public IStyleFormatter StyleFormatter { get; set; } = DefaultStyleFormatter;
-
- ///
- /// Gets or sets the allowed CSS at-rules such as "@media" and "@font-face".
- ///
- ///
- /// The allowed CSS at-rules.
- ///
- public ISet AllowedAtRules { get; private set; }
-
- ///
- /// Gets or sets the allowed URI schemes such as "http" and "https".
- ///
- ///
- /// The allowed URI schemes.
- ///
- public ISet AllowedSchemes { get; private set; }
-
- ///
- /// Gets or sets the allowed HTML tag names such as "a" and "div".
- ///
- ///
- /// The allowed tag names.
- ///
- public ISet AllowedTags { get; private set; }
-
- ///
- /// Gets or sets the allowed HTML attributes such as "href" and "alt".
- ///
- ///
- /// The allowed HTML attributes.
- ///
- public ISet AllowedAttributes { get; private set; }
-
- ///
- /// Allow all HTML5 data attributes; the attributes prefixed with data-.
- ///
- public bool AllowDataAttributes { get; set; }
-
- ///
- /// Gets or sets the HTML attributes that can contain a URI such as "href".
- ///
- ///
- /// The URI attributes.
- ///
- public ISet UriAttributes { get; private set; }
-
- ///
- /// Gets or sets the allowed CSS properties such as "font" and "margin".
- ///
- ///
- /// The allowed CSS properties.
- ///
- public ISet AllowedCssProperties { get; private set; }
-
- ///
- /// Allow all custom CSS properties (variables) prefixed with --.
- ///
- public bool AllowCssCustomProperties { get; set; }
-
- ///
- /// Gets or sets a regex that must not match for legal CSS property values.
- ///
- ///
- /// The regex.
- ///
- public Regex DisallowCssPropertyValue { get; set; } = DefaultDisallowedCssPropertyValue;
-
- ///
- /// Gets or sets the allowed CSS classes. If the set is empty, all classes will be allowed.
- ///
- ///
- /// The allowed CSS classes. An empty set means all classes are allowed.
- ///
- public ISet AllowedClasses { get; private set; }
-
- ///
- /// Occurs after sanitizing the document and post processing nodes.
- ///
- public event EventHandler? PostProcessDom;
- ///
- /// Occurs for every node after sanitizing.
- ///
- public event EventHandler? PostProcessNode;
- ///
- /// Occurs before a tag is removed.
- ///
- public event EventHandler? RemovingTag;
- ///
- /// Occurs before an attribute is removed.
- ///
- public event EventHandler? RemovingAttribute;
- ///
- /// Occurs before a style is removed.
- ///
- public event EventHandler? RemovingStyle;
- ///
- /// Occurs before an at-rule is removed.
- ///
- public event EventHandler? RemovingAtRule;
- ///
- /// Occurs before a comment is removed.
- ///
- public event EventHandler? RemovingComment;
- ///
- /// Occurs before a CSS class is removed.
- ///
- public event EventHandler? RemovingCssClass;
- ///
- /// Occurs when a URL is being sanitized.
- ///
- public event EventHandler? FilterUrl;
-
- ///
- /// Raises the event.
- ///
- /// The instance containing the event data.
- protected virtual void OnPostProcessDom(PostProcessDomEventArgs e)
- {
- PostProcessDom?.Invoke(this, e);
- }
+ ///
+ /// Gets or sets the default value indicating whether to keep child nodes of elements that are removed. Default is false.
+ ///
+ public static bool DefaultKeepChildNodes { get; set; } = false;
- ///
- /// Raises the event.
- ///
- /// The instance containing the event data.
- protected virtual void OnPostProcessNode(PostProcessNodeEventArgs e)
- {
- PostProcessNode?.Invoke(this, e);
- }
+ ///
+ /// Gets or sets a value indicating whether to keep child nodes of elements that are removed. Default is .
+ ///
+ public bool KeepChildNodes { get; set; } = DefaultKeepChildNodes;
- ///
- /// Raises the event.
- ///
- /// The instance containing the event data.
- protected virtual void OnRemovingTag(RemovingTagEventArgs e)
- {
- RemovingTag?.Invoke(this, e);
- }
+ ///
+ /// Gets or sets the default object that creates the parser used for parsing the input.
+ ///
+ public static Func DefaultHtmlParserFactory { get; set; } = () => defaultHtmlParser;
- ///
- /// Raises the event.
- ///
- /// The instance containing the event data.
- protected virtual void OnRemovingAttribute(RemovingAttributeEventArgs e)
- {
- RemovingAttribute?.Invoke(this, e);
- }
+ ///
+ /// Gets or sets the object the creates the parser used for parsing the input.
+ ///
+ public Func HtmlParserFactory { get; set; } = DefaultHtmlParserFactory;
- ///
- /// Raises the event.
- ///
- /// The instance containing the event data.
- protected virtual void OnRemovingStyle(RemovingStyleEventArgs e)
- {
- RemovingStyle?.Invoke(this, e);
- }
+ ///
+ /// Gets or sets the default object used for generating output. Default is .
+ ///
+ public static IMarkupFormatter DefaultOutputFormatter { get; set; } = HtmlFormatter.Instance;
- ///
- /// Raises the event.
- ///
- /// The instance containing the event data.
- protected virtual void OnRemovingAtRule(RemovingAtRuleEventArgs e)
- {
- RemovingAtRule?.Invoke(this, e);
- }
+ ///
+ /// Gets or sets the object used for generating output. Default is .
+ ///
+ public IMarkupFormatter OutputFormatter { get; set; } = DefaultOutputFormatter;
- ///
- /// Raises the event.
- ///
- /// The instance containing the event data.
- protected virtual void OnRemovingComment(RemovingCommentEventArgs e)
- {
- RemovingComment?.Invoke(this, e);
- }
+ ///
+ /// Gets or sets the default object used for generating CSS output. Default is .
+ ///
+ public static IStyleFormatter DefaultStyleFormatter { get; set; } = CssStyleFormatter.Instance;
+
+ ///
+ /// Gets or sets the object used for generating CSS output. Default is .
+ ///
+ public IStyleFormatter StyleFormatter { get; set; } = DefaultStyleFormatter;
- ///
- /// The default regex for disallowed CSS property values.
- ///
- public static readonly Regex DefaultDisallowedCssPropertyValue = new(@"[<>]", RegexOptions.Compiled);
+ ///
+ /// Gets or sets the allowed CSS at-rules such as "@media" and "@font-face".
+ ///
+ ///
+ /// The allowed CSS at-rules.
+ ///
+ public ISet AllowedAtRules { get; private set; }
- ///
- /// Raises the event.
- ///
- /// The instance containing the event data.
- protected virtual void OnRemovingCssClass(RemovingCssClassEventArgs e)
- {
- RemovingCssClass?.Invoke(this, e);
- }
+ ///
+ /// Gets or sets the allowed URI schemes such as "http" and "https".
+ ///
+ ///
+ /// The allowed URI schemes.
+ ///
+ public ISet AllowedSchemes { get; private set; }
+
+ ///
+ /// Gets or sets the allowed HTML tag names such as "a" and "div".
+ ///
+ ///
+ /// The allowed tag names.
+ ///
+ public ISet AllowedTags { get; private set; }
+
+ ///
+ /// Gets or sets the allowed HTML attributes such as "href" and "alt".
+ ///
+ ///
+ /// The allowed HTML attributes.
+ ///
+ public ISet AllowedAttributes { get; private set; }
+
+ ///
+ /// Allow all HTML5 data attributes; the attributes prefixed with data-.
+ ///
+ public bool AllowDataAttributes { get; set; }
+
+ ///
+ /// Gets or sets the HTML attributes that can contain a URI such as "href".
+ ///
+ ///
+ /// The URI attributes.
+ ///
+ public ISet UriAttributes { get; private set; }
+
+ ///
+ /// Gets or sets the allowed CSS properties such as "font" and "margin".
+ ///
+ ///
+ /// The allowed CSS properties.
+ ///
+ public ISet AllowedCssProperties { get; private set; }
+
+ ///
+ /// Allow all custom CSS properties (variables) prefixed with --.
+ ///
+ public bool AllowCssCustomProperties { get; set; }
+
+ ///
+ /// Gets or sets a regex that must not match for legal CSS property values.
+ ///
+ ///
+ /// The regex.
+ ///
+ public Regex DisallowCssPropertyValue { get; set; } = DefaultDisallowedCssPropertyValue;
+
+ ///
+ /// Gets or sets the allowed CSS classes. If the set is empty, all classes will be allowed.
+ ///
+ ///
+ /// The allowed CSS classes. An empty set means all classes are allowed.
+ ///
+ public ISet AllowedClasses { get; private set; }
+
+ ///
+ /// Occurs after sanitizing the document and post processing nodes.
+ ///
+ public event EventHandler? PostProcessDom;
+ ///
+ /// Occurs for every node after sanitizing.
+ ///
+ public event EventHandler? PostProcessNode;
+ ///
+ /// Occurs before a tag is removed.
+ ///
+ public event EventHandler? RemovingTag;
+ ///
+ /// Occurs before an attribute is removed.
+ ///
+ public event EventHandler? RemovingAttribute;
+ ///
+ /// Occurs before a style is removed.
+ ///
+ public event EventHandler? RemovingStyle;
+ ///
+ /// Occurs before an at-rule is removed.
+ ///
+ public event EventHandler? RemovingAtRule;
+ ///
+ /// Occurs before a comment is removed.
+ ///
+ public event EventHandler? RemovingComment;
+ ///
+ /// Occurs before a CSS class is removed.
+ ///
+ public event EventHandler? RemovingCssClass;
+ ///
+ /// Occurs when a URL is being sanitized.
+ ///
+ public event EventHandler? FilterUrl;
+
+ ///
+ /// Raises the event.
+ ///
+ /// The instance containing the event data.
+ protected virtual void OnPostProcessDom(PostProcessDomEventArgs e)
+ {
+ PostProcessDom?.Invoke(this, e);
+ }
+
+ ///
+ /// Raises the event.
+ ///
+ /// The instance containing the event data.
+ protected virtual void OnPostProcessNode(PostProcessNodeEventArgs e)
+ {
+ PostProcessNode?.Invoke(this, e);
+ }
+
+ ///
+ /// Raises the event.
+ ///
+ /// The instance containing the event data.
+ protected virtual void OnRemovingTag(RemovingTagEventArgs e)
+ {
+ RemovingTag?.Invoke(this, e);
+ }
+
+ ///
+ /// Raises the event.
+ ///
+ /// The instance containing the event data.
+ protected virtual void OnRemovingAttribute(RemovingAttributeEventArgs e)
+ {
+ RemovingAttribute?.Invoke(this, e);
+ }
+
+ ///
+ /// Raises the event.
+ ///
+ /// The instance containing the event data.
+ protected virtual void OnRemovingStyle(RemovingStyleEventArgs e)
+ {
+ RemovingStyle?.Invoke(this, e);
+ }
+
+ ///
+ /// Raises the event.
+ ///
+ /// The instance containing the event data.
+ protected virtual void OnRemovingAtRule(RemovingAtRuleEventArgs e)
+ {
+ RemovingAtRule?.Invoke(this, e);
+ }
+
+ ///
+ /// Raises the event.
+ ///
+ /// The instance containing the event data.
+ protected virtual void OnRemovingComment(RemovingCommentEventArgs e)
+ {
+ RemovingComment?.Invoke(this, e);
+ }
+
+ ///
+ /// The default regex for disallowed CSS property values.
+ ///
+ public static readonly Regex DefaultDisallowedCssPropertyValue = new(@"[<>]", RegexOptions.Compiled);
+
+ ///
+ /// Raises the event.
+ ///
+ /// The instance containing the event data.
+ protected virtual void OnRemovingCssClass(RemovingCssClassEventArgs e)
+ {
+ RemovingCssClass?.Invoke(this, e);
+ }
+
+ ///
+ /// Raises the event.
+ ///
+ /// The instance containing the event data.
+ protected virtual void OnFilteringUrl(FilterUrlEventArgs e)
+ {
+ FilterUrl?.Invoke(this, e);
+ }
- ///
- /// Raises the event.
- ///
- /// The instance containing the event data.
- protected virtual void OnFilteringUrl(FilterUrlEventArgs e)
+ ///
+ /// Return all nested subnodes of a node. The nodes are returned in DOM order.
+ ///
+ /// The root node.
+ /// All nested subnodes.
+ private static IEnumerable GetAllNodes(INode dom)
+ {
+ if (dom.ChildNodes.Length == 0) yield break;
+
+ var s = new Stack();
+ for (var i = dom.ChildNodes.Length - 1; i >= 0; i--)
{
- FilterUrl?.Invoke(this, e);
+ s.Push(dom.ChildNodes[i]);
}
- ///
- /// Return all nested subnodes of a node. The nodes are returned in DOM order.
- ///
- /// The root node.
- /// All nested subnodes.
- private static IEnumerable GetAllNodes(INode dom)
+ while (s.Count > 0)
{
- if (dom.ChildNodes.Length == 0) yield break;
+ var n = s.Pop();
+ yield return n;
- var s = new Stack();
- for (var i = dom.ChildNodes.Length - 1; i >= 0; i--)
+ for (var i = n.ChildNodes.Length - 1; i >= 0; i--)
{
- s.Push(dom.ChildNodes[i]);
- }
-
- while (s.Count > 0)
- {
- var n = s.Pop();
- yield return n;
-
- for (var i = n.ChildNodes.Length - 1; i >= 0; i--)
- {
- s.Push(n.ChildNodes[i]);
- }
+ s.Push(n.ChildNodes[i]);
}
}
+ }
- ///
- /// Sanitizes the specified HTML body fragment. If a document is given, only the body part will be returned.
- ///
- /// The HTML body fragment to sanitize.
- /// The base URL relative URLs are resolved against. No resolution if empty.
- /// The formatter used to render the DOM. Using the if null.
- /// The sanitized HTML body fragment.
- public string Sanitize(string html, string baseUrl = "", IMarkupFormatter? outputFormatter = null)
- {
- using var dom = SanitizeDom(html, baseUrl);
- if (dom.Body == null) return string.Empty;
- var output = dom.Body.ChildNodes.ToHtml(outputFormatter ?? OutputFormatter);
+ ///
+ /// Sanitizes the specified HTML body fragment. If a document is given, only the body part will be returned.
+ ///
+ /// The HTML body fragment to sanitize.
+ /// The base URL relative URLs are resolved against. No resolution if empty.
+ /// The formatter used to render the DOM. Using the if null.
+ /// The sanitized HTML body fragment.
+ public string Sanitize(string html, string baseUrl = "", IMarkupFormatter? outputFormatter = null)
+ {
+ using var dom = SanitizeDom(html, baseUrl);
+ if (dom.Body == null) return string.Empty;
+ var output = dom.Body.ChildNodes.ToHtml(outputFormatter ?? OutputFormatter);
- return output;
- }
+ return output;
+ }
- ///
- /// Sanitizes the specified HTML body fragment. If a document is given, only the body part will be returned.
- ///
- /// The HTML body fragment to sanitize.
- /// The base URL relative URLs are resolved against. No resolution if empty.
- /// The sanitized HTML document.
- public IHtmlDocument SanitizeDom(string html, string baseUrl = "")
- {
- var parser = HtmlParserFactory();
- var dom = parser.ParseDocument("" + html);
+ ///
+ /// Sanitizes the specified HTML body fragment. If a document is given, only the body part will be returned.
+ ///
+ /// The HTML body fragment to sanitize.
+ /// The base URL relative URLs are resolved against. No resolution if empty.
+ /// The sanitized HTML document.
+ public IHtmlDocument SanitizeDom(string html, string baseUrl = "")
+ {
+ var parser = HtmlParserFactory();
+ var dom = parser.ParseDocument("" + html);
- if (dom.Body != null)
- DoSanitize(dom, dom.Body, baseUrl);
+ if (dom.Body != null)
+ DoSanitize(dom, dom.Body, baseUrl);
- return dom;
- }
+ return dom;
+ }
- ///
- /// Sanitizes the specified parsed HTML body fragment.
- /// If the document has not been parsed with CSS support then all styles will be removed.
- ///
- /// The parsed HTML document.
- /// The node within which to sanitize.
- /// The base URL relative URLs are resolved against. No resolution if empty.
- /// The sanitized HTML document.
- public IHtmlDocument SanitizeDom(IHtmlDocument document, IHtmlElement? context = null, string baseUrl = "")
- {
- DoSanitize(document, context ?? (IParentNode)document, baseUrl);
+ ///
+ /// Sanitizes the specified parsed HTML body fragment.
+ /// If the document has not been parsed with CSS support then all styles will be removed.
+ ///
+ /// The parsed HTML document.
+ /// The node within which to sanitize.
+ /// The base URL relative URLs are resolved against. No resolution if empty.
+ /// The sanitized HTML document.
+ public IHtmlDocument SanitizeDom(IHtmlDocument document, IHtmlElement? context = null, string baseUrl = "")
+ {
+ DoSanitize(document, context ?? (IParentNode)document, baseUrl);
- return document;
- }
+ return document;
+ }
- ///
- /// Sanitizes the specified HTML document. Even if only a fragment is given, a whole document will be returned.
- ///
- /// The HTML document to sanitize.
- /// The base URL relative URLs are resolved against. No resolution if empty.
- /// The formatter used to render the DOM. Using the if null.
- /// The sanitized HTML document.
- public string SanitizeDocument(string html, string baseUrl = "", IMarkupFormatter? outputFormatter = null)
- {
- var parser = HtmlParserFactory();
- using var dom = parser.ParseDocument(html);
+ ///
+ /// Sanitizes the specified HTML document. Even if only a fragment is given, a whole document will be returned.
+ ///
+ /// The HTML document to sanitize.
+ /// The base URL relative URLs are resolved against. No resolution if empty.
+ /// The formatter used to render the DOM. Using the if null.
+ /// The sanitized HTML document.
+ public string SanitizeDocument(string html, string baseUrl = "", IMarkupFormatter? outputFormatter = null)
+ {
+ var parser = HtmlParserFactory();
+ using var dom = parser.ParseDocument(html);
- DoSanitize(dom, dom, baseUrl);
+ DoSanitize(dom, dom, baseUrl);
- var output = dom.ToHtml(outputFormatter ?? OutputFormatter);
+ var output = dom.ToHtml(outputFormatter ?? OutputFormatter);
- return output;
- }
+ return output;
+ }
- ///
- /// Sanitizes the specified HTML document. Even if only a fragment is given, a whole document will be returned.
- ///
- /// The HTML document to sanitize.
- /// The base URL relative URLs are resolved against. No resolution if empty.
- /// The formatter used to render the DOM. Using the if null.
- /// The sanitized HTML document.
- public string SanitizeDocument(Stream html, string baseUrl = "", IMarkupFormatter? outputFormatter = null)
- {
- var parser = HtmlParserFactory();
- using var dom = parser.ParseDocument(html);
+ ///
+ /// Sanitizes the specified HTML document. Even if only a fragment is given, a whole document will be returned.
+ ///
+ /// The HTML document to sanitize.
+ /// The base URL relative URLs are resolved against. No resolution if empty.
+ /// The formatter used to render the DOM. Using the if null.
+ /// The sanitized HTML document.
+ public string SanitizeDocument(Stream html, string baseUrl = "", IMarkupFormatter? outputFormatter = null)
+ {
+ var parser = HtmlParserFactory();
+ using var dom = parser.ParseDocument(html);
- DoSanitize(dom, dom, baseUrl);
+ DoSanitize(dom, dom, baseUrl);
- var output = dom.ToHtml(outputFormatter ?? OutputFormatter);
+ var output = dom.ToHtml(outputFormatter ?? OutputFormatter);
- return output;
- }
+ return output;
+ }
- ///
- /// Removes all comment nodes from a list of nodes.
- ///
- /// The node within which to remove comments.
- /// true if any comments were removed; otherwise, false.
- private void RemoveComments(INode context)
+ ///
+ /// Removes all comment nodes from a list of nodes.
+ ///
+ /// The node within which to remove comments.
+ /// true if any comments were removed; otherwise, false.
+ private void RemoveComments(INode context)
+ {
+ foreach (var comment in GetAllNodes(context).OfType().ToList())
{
- foreach (var comment in GetAllNodes(context).OfType().ToList())
- {
- EncodeComment(comment);
+ EncodeComment(comment);
- var e = new RemovingCommentEventArgs(comment);
- OnRemovingComment(e);
+ var e = new RemovingCommentEventArgs(comment);
+ OnRemovingComment(e);
- if (!e.Cancel)
- comment.Remove();
- }
+ if (!e.Cancel)
+ comment.Remove();
}
+ }
- private static void DefaultEncodeComment(IComment comment)
+ private static void DefaultEncodeComment(IComment comment)
+ {
+ var escapedText = comment.TextContent.Replace("<", "<").Replace(">", ">");
+ if (escapedText != comment.TextContent)
+ comment.TextContent = escapedText;
+ }
+
+ private static void DefaultEncodeLiteralTextElementContent(IElement tag)
+ {
+ var escapedHtml = tag.InnerHtml.Replace("<", "<").Replace(">", ">");
+ if (escapedHtml != tag.InnerHtml)
+ tag.InnerHtml = escapedHtml;
+ if (tag.InnerHtml != escapedHtml) // setting InnerHtml does not work for noscript
+ tag.SetInnerText(escapedHtml);
+ }
+
+ private void DoSanitize(IHtmlDocument dom, IParentNode context, string baseUrl = "")
+ {
+ // remove disallowed tags
+ foreach (var tag in context.QuerySelectorAll("*").Where(t => !IsAllowedTag(t)).ToList())
{
- var escapedText = comment.TextContent.Replace("<", "<").Replace(">", ">");
- if (escapedText != comment.TextContent)
- comment.TextContent = escapedText;
+ RemoveTag(tag, RemoveReason.NotAllowedTag);
}
- private static void DefaultEncodeLiteralTextElementContent(IElement tag)
+ // always encode text in raw data content
+ foreach (var tag in context.QuerySelectorAll("*")
+ .Where(t => t is not IHtmlStyleElement
+ && t.Flags.HasFlag(NodeFlags.LiteralText)
+ && !string.IsNullOrWhiteSpace(t.InnerHtml)))
{
- var escapedHtml = tag.InnerHtml.Replace("<", "<").Replace(">", ">");
- if (escapedHtml != tag.InnerHtml)
- tag.InnerHtml = escapedHtml;
- if (tag.InnerHtml != escapedHtml) // setting InnerHtml does not work for noscript
- tag.SetInnerText(escapedHtml);
+ EncodeLiteralTextElementContent(tag);
}
- private void DoSanitize(IHtmlDocument dom, IParentNode context, string baseUrl = "")
+ SanitizeStyleSheets(dom, baseUrl);
+
+ // cleanup attributes
+ foreach (var tag in context.QuerySelectorAll("*").ToList())
{
- // remove disallowed tags
- foreach (var tag in context.QuerySelectorAll("*").Where(t => !IsAllowedTag(t)).ToList())
+ // remove disallowed attributes
+ foreach (var attribute in tag.Attributes.Where(a => !IsAllowedAttribute(a)).ToList())
{
- RemoveTag(tag, RemoveReason.NotAllowedTag);
+ RemoveAttribute(tag, attribute, RemoveReason.NotAllowedAttribute);
}
- // always encode text in raw data content
- foreach (var tag in context.QuerySelectorAll("*")
- .Where(t => t is not IHtmlStyleElement
- && t.Flags.HasFlag(NodeFlags.LiteralText)
- && !string.IsNullOrWhiteSpace(t.InnerHtml)))
+ // sanitize URLs in URL-marked attributes
+ foreach (var attribute in tag.Attributes.Where(IsUriAttribute).ToList())
{
- EncodeLiteralTextElementContent(tag);
+ var url = SanitizeUrl(tag, attribute.Value, baseUrl);
+
+ if (url == null)
+ RemoveAttribute(tag, attribute, RemoveReason.NotAllowedUrlValue);
+ else
+ tag.SetAttribute(attribute.Name, url);
}
- SanitizeStyleSheets(dom, baseUrl);
+ // sanitize the style attribute
+ var oldStyleEmpty = string.IsNullOrEmpty(tag.GetAttribute(StyleAttributeName));
+ SanitizeStyle(tag, baseUrl);
- // cleanup attributes
- foreach (var tag in context.QuerySelectorAll("*").ToList())
+ // sanitize the value of the attributes
+ foreach (var attribute in tag.Attributes.ToList())
{
- // remove disallowed attributes
- foreach (var attribute in tag.Attributes.Where(a => !IsAllowedAttribute(a)).ToList())
+ // The '& Javascript include' is a possible method to execute Javascript and can lead to XSS.
+ // (see https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet#.26_JavaScript_includes)
+ if (attribute.Value.Contains("&{"))
{
- RemoveAttribute(tag, attribute, RemoveReason.NotAllowedAttribute);
+ RemoveAttribute(tag, attribute, RemoveReason.NotAllowedValue);
}
-
- // sanitize URLs in URL-marked attributes
- foreach (var attribute in tag.Attributes.Where(IsUriAttribute).ToList())
+ else
{
- var url = SanitizeUrl(tag, attribute.Value, baseUrl);
-
- if (url == null)
- RemoveAttribute(tag, attribute, RemoveReason.NotAllowedUrlValue);
- else
- tag.SetAttribute(attribute.Name, url);
- }
+ if (AllowedClasses.Any() && attribute.Name == "class")
+ {
+ var removedClasses = tag.ClassList.Except(AllowedClasses).ToArray();
- // sanitize the style attribute
- var oldStyleEmpty = string.IsNullOrEmpty(tag.GetAttribute(StyleAttributeName));
- SanitizeStyle(tag, baseUrl);
+ foreach (var removedClass in removedClasses)
+ RemoveCssClass(tag, removedClass, RemoveReason.NotAllowedCssClass);
- // sanitize the value of the attributes
- foreach (var attribute in tag.Attributes.ToList())
- {
- // The '& Javascript include' is a possible method to execute Javascript and can lead to XSS.
- // (see https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet#.26_JavaScript_includes)
- if (attribute.Value.Contains("&{"))
- {
- RemoveAttribute(tag, attribute, RemoveReason.NotAllowedValue);
+ if (!tag.ClassList.Any())
+ RemoveAttribute(tag, attribute, RemoveReason.ClassAttributeEmpty);
}
- else
+ else if (!oldStyleEmpty && attribute.Name == StyleAttributeName && string.IsNullOrEmpty(attribute.Value))
{
- if (AllowedClasses.Any() && attribute.Name == "class")
- {
- var removedClasses = tag.ClassList.Except(AllowedClasses).ToArray();
-
- foreach (var removedClass in removedClasses)
- RemoveCssClass(tag, removedClass, RemoveReason.NotAllowedCssClass);
-
- if (!tag.ClassList.Any())
- RemoveAttribute(tag, attribute, RemoveReason.ClassAttributeEmpty);
- }
- else if (!oldStyleEmpty && attribute.Name == StyleAttributeName && string.IsNullOrEmpty(attribute.Value))
- {
- RemoveAttribute(tag, attribute, RemoveReason.StyleAttributeEmpty);
- }
+ RemoveAttribute(tag, attribute, RemoveReason.StyleAttributeEmpty);
}
}
}
+ }
- if (context is INode node)
+ if (context is INode node)
+ {
+ RemoveComments(node);
+ }
+
+ DoPostProcess(dom, context as INode);
+ }
+
+ private void SanitizeStyleSheets(IHtmlDocument dom, string baseUrl)
+ {
+ foreach (var styleSheet in dom.StyleSheets.OfType())
+ {
+ var styleTag = styleSheet.OwnerNode;
+ var i = 0;
+
+ while (i < styleSheet.Rules.Length)
{
- RemoveComments(node);
+ var rule = styleSheet.Rules[i];
+ if (!SanitizeStyleRule(rule, styleTag, baseUrl) && RemoveAtRule(styleTag, rule))
+ styleSheet.RemoveAt(i);
+ else i++;
}
- DoPostProcess(dom, context as INode);
+ styleTag.InnerHtml = styleSheet.ToCss(StyleFormatter).Replace("<", "\\3c ");
}
+ }
+
+ private bool SanitizeStyleRule(ICssRule rule, IElement styleTag, string baseUrl)
+ {
+ if (!AllowedAtRules.Contains(rule.Type)) return false;
- private void SanitizeStyleSheets(IHtmlDocument dom, string baseUrl)
+ if (rule is ICssStyleRule styleRule)
+ {
+ SanitizeStyleDeclaration(styleTag, styleRule.Style, baseUrl);
+ }
+ else
{
- foreach (var styleSheet in dom.StyleSheets.OfType())
+ if (rule is ICssGroupingRule groupingRule)
{
- var styleTag = styleSheet.OwnerNode;
var i = 0;
- while (i < styleSheet.Rules.Length)
+ while (i < groupingRule.Rules.Length)
{
- var rule = styleSheet.Rules[i];
- if (!SanitizeStyleRule(rule, styleTag, baseUrl) && RemoveAtRule(styleTag, rule))
- styleSheet.RemoveAt(i);
+ var childRule = groupingRule.Rules[i];
+ if (!SanitizeStyleRule(childRule, styleTag, baseUrl) && RemoveAtRule(styleTag, childRule))
+ groupingRule.RemoveAt(i);
else i++;
}
-
- styleTag.InnerHtml = styleSheet.ToCss(StyleFormatter).Replace("<", "\\3c ");
}
- }
-
- private bool SanitizeStyleRule(ICssRule rule, IElement styleTag, string baseUrl)
- {
- if (!AllowedAtRules.Contains(rule.Type)) return false;
-
- if (rule is ICssStyleRule styleRule)
+ else if (rule is ICssPageRule pageRule)
{
- SanitizeStyleDeclaration(styleTag, styleRule.Style, baseUrl);
+ SanitizeStyleDeclaration(styleTag, pageRule.Style, baseUrl);
}
- else
+ else if (rule is ICssKeyframesRule keyFramesRule)
{
- if (rule is ICssGroupingRule groupingRule)
- {
- var i = 0;
-
- while (i < groupingRule.Rules.Length)
- {
- var childRule = groupingRule.Rules[i];
- if (!SanitizeStyleRule(childRule, styleTag, baseUrl) && RemoveAtRule(styleTag, childRule))
- groupingRule.RemoveAt(i);
- else i++;
- }
- }
- else if (rule is ICssPageRule pageRule)
- {
- SanitizeStyleDeclaration(styleTag, pageRule.Style, baseUrl);
- }
- else if (rule is ICssKeyframesRule keyFramesRule)
+ foreach (var childRule in keyFramesRule.Rules.OfType().ToList())
{
- foreach (var childRule in keyFramesRule.Rules.OfType().ToList())
- {
- if (!SanitizeStyleRule(childRule, styleTag, baseUrl) && RemoveAtRule(styleTag, childRule))
- keyFramesRule.Remove(childRule.KeyText);
- }
- }
- else if (rule is ICssKeyframeRule keyFrameRule)
- {
- SanitizeStyleDeclaration(styleTag, keyFrameRule.Style, baseUrl);
+ if (!SanitizeStyleRule(childRule, styleTag, baseUrl) && RemoveAtRule(styleTag, childRule))
+ keyFramesRule.Remove(childRule.KeyText);
}
}
-
- return true;
+ else if (rule is ICssKeyframeRule keyFrameRule)
+ {
+ SanitizeStyleDeclaration(styleTag, keyFrameRule.Style, baseUrl);
+ }
}
- ///
- /// Performs post processing on all nodes in the document.
- ///
- /// The HTML document.
- /// The node within which to post process all nodes.
- private void DoPostProcess(IHtmlDocument dom, INode? context)
+ return true;
+ }
+
+ ///
+ /// Performs post processing on all nodes in the document.
+ ///
+ /// The HTML document.
+ /// The node within which to post process all nodes.
+ private void DoPostProcess(IHtmlDocument dom, INode? context)
+ {
+ if (PostProcessNode != null)
{
- if (PostProcessNode != null)
- {
- dom.Normalize();
+ dom.Normalize();
- if (context != null)
+ if (context != null)
+ {
+ var nodes = GetAllNodes(context).ToList();
+ foreach (var node in nodes)
{
- var nodes = GetAllNodes(context).ToList();
- foreach (var node in nodes)
+ var e = new PostProcessNodeEventArgs(dom, node);
+ OnPostProcessNode(e);
+ if (e.ReplacementNodes.Any())
{
- var e = new PostProcessNodeEventArgs(dom, node);
- OnPostProcessNode(e);
- if (e.ReplacementNodes.Any())
- {
- ((IChildNode)node).Replace([.. e.ReplacementNodes]);
- }
+ ((IChildNode)node).Replace([.. e.ReplacementNodes]);
}
}
}
-
- if (PostProcessDom != null)
- {
- var e = new PostProcessDomEventArgs(dom);
- OnPostProcessDom(e);
- }
}
- ///
- /// Determines whether the specified attribute can contain a URI.
- ///
- /// The attribute.
- /// true if the attribute can contain a URI; otherwise, false.
- private bool IsUriAttribute(IAttr attribute)
+ if (PostProcessDom != null)
{
- return UriAttributes.Contains(attribute.Name);
+ var e = new PostProcessDomEventArgs(dom);
+ OnPostProcessDom(e);
}
+ }
- ///
- /// Determines whether the specified tag is allowed.
- ///
- /// The tag.
- /// true if the tag is allowed; otherwise, false.
- private bool IsAllowedTag(IElement tag)
- {
- return AllowedTags.Contains(tag.NodeName);
- }
+ ///
+ /// Determines whether the specified attribute can contain a URI.
+ ///
+ /// The attribute.
+ /// true if the attribute can contain a URI; otherwise, false.
+ private bool IsUriAttribute(IAttr attribute)
+ {
+ return UriAttributes.Contains(attribute.Name);
+ }
- ///
- /// Determines whether the specified attribute is allowed.
- ///
- /// The attribute.
- /// true if the attribute is allowed; otherwise, false.
- private bool IsAllowedAttribute(IAttr attribute)
- {
- return AllowedAttributes.Contains(attribute.Name)
- // test html5 data- attributes
- || (AllowDataAttributes && attribute.Name != null && attribute.Name.StartsWith("data-", StringComparison.OrdinalIgnoreCase));
- }
+ ///
+ /// Determines whether the specified tag is allowed.
+ ///
+ /// The tag.
+ /// true if the tag is allowed; otherwise, false.
+ private bool IsAllowedTag(IElement tag)
+ {
+ return AllowedTags.Contains(tag.NodeName);
+ }
+
+ ///
+ /// Determines whether the specified attribute is allowed.
+ ///
+ /// The attribute.
+ /// true if the attribute is allowed; otherwise, false.
+ private bool IsAllowedAttribute(IAttr attribute)
+ {
+ return AllowedAttributes.Contains(attribute.Name)
+ // test html5 data- attributes
+ || (AllowDataAttributes && attribute.Name != null && attribute.Name.StartsWith("data-", StringComparison.OrdinalIgnoreCase));
+ }
+
+ ///
+ /// Sanitizes the style.
+ ///
+ /// The element.
+ /// The base URL.
+ protected void SanitizeStyle(IElement element, string baseUrl)
+ {
+ // filter out invalid CSS declarations
+ // see https://github.com/AngleSharp/AngleSharp/issues/101
+ var attribute = element.GetAttribute(StyleAttributeName);
+ if (attribute == null)
+ return;
- ///
- /// Sanitizes the style.
- ///
- /// The element.
- /// The base URL.
- protected void SanitizeStyle(IElement element, string baseUrl)
+ if (element.GetStyle() == null)
{
- // filter out invalid CSS declarations
- // see https://github.com/AngleSharp/AngleSharp/issues/101
- var attribute = element.GetAttribute(StyleAttributeName);
- if (attribute == null)
- return;
+ element.RemoveAttribute(StyleAttributeName);
+ return;
+ }
- if (element.GetStyle() == null)
- {
- element.RemoveAttribute(StyleAttributeName);
- return;
- }
+ element.SetAttribute(StyleAttributeName, element.GetStyle().ToCss(StyleFormatter));
- element.SetAttribute(StyleAttributeName, element.GetStyle().ToCss(StyleFormatter));
+ var styles = element.GetStyle();
+ if (styles == null || styles.Length == 0)
+ return;
- var styles = element.GetStyle();
- if (styles == null || styles.Length == 0)
- return;
+ SanitizeStyleDeclaration(element, styles, baseUrl);
+ }
- SanitizeStyleDeclaration(element, styles, baseUrl);
- }
+ ///
+ /// Verify if the given CSS property name is allowed. By default this will
+ /// check if the property is in the set,
+ /// or if the property is a custom property and is true.
+ ///
+ /// The name of the CSS property.
+ /// True if the property is allowed or not.
+ protected virtual bool IsAllowedCssProperty(string propertyName)
+ {
+ return AllowedCssProperties.Contains(propertyName)
+ || AllowCssCustomProperties && propertyName != null && propertyName.StartsWith("--");
+ }
- ///
- /// Verify if the given CSS property name is allowed. By default this will
- /// check if the property is in the set,
- /// or if the property is a custom property and is true.
- ///
- /// The name of the CSS property.
- /// True if the property is allowed or not.
- protected virtual bool IsAllowedCssProperty(string propertyName)
- {
- return AllowedCssProperties.Contains(propertyName)
- || AllowCssCustomProperties && propertyName != null && propertyName.StartsWith("--");
- }
+ private void SanitizeStyleDeclaration(IElement element, ICssStyleDeclaration styles, string baseUrl)
+ {
+ var removeStyles = new List>();
+ var setStyles = new Dictionary();
- private void SanitizeStyleDeclaration(IElement element, ICssStyleDeclaration styles, string baseUrl)
+ foreach (var style in styles)
{
- var removeStyles = new List>();
- var setStyles = new Dictionary();
+ var key = DecodeCss(style.Name);
+ var val = DecodeCss(style.Value);
- foreach (var style in styles)
+ if (!IsAllowedCssProperty(key))
{
- var key = DecodeCss(style.Name);
- var val = DecodeCss(style.Value);
-
- if (!IsAllowedCssProperty(key))
- {
- removeStyles.Add(new Tuple(style, RemoveReason.NotAllowedStyle));
- continue;
- }
+ removeStyles.Add(new Tuple(style, RemoveReason.NotAllowedStyle));
+ continue;
+ }
- if (CssExpression.IsMatch(val) || DisallowCssPropertyValue.IsMatch(val))
- {
- removeStyles.Add(new Tuple(style, RemoveReason.NotAllowedValue));
- continue;
- }
+ if (CssExpression.IsMatch(val) || DisallowCssPropertyValue.IsMatch(val))
+ {
+ removeStyles.Add(new Tuple(style, RemoveReason.NotAllowedValue));
+ continue;
+ }
- val = WhitespaceRegex.Replace(val, string.Empty);
+ val = WhitespaceRegex.Replace(val, string.Empty);
- var urls = CssUrl.Matches(val).Cast().Select(m => (Match: m, Url: SanitizeUrl(element, m.Groups[2].Value, baseUrl)));
+ var urls = CssUrl.Matches(val).Cast().Select(m => (Match: m, Url: SanitizeUrl(element, m.Groups[2].Value, baseUrl)));
- if (urls.Any())
+ if (urls.Any())
+ {
+ if (urls.Any(u => u.Url == null))
+ removeStyles.Add(new Tuple(style, RemoveReason.NotAllowedUrlValue));
+ else
{
- if (urls.Any(u => u.Url == null))
- removeStyles.Add(new Tuple(style, RemoveReason.NotAllowedUrlValue));
- else
- {
- var sb = new StringBuilder();
- var ix = 0;
+ var sb = new StringBuilder();
+ var ix = 0;
- foreach (var url in urls)
- {
- sb.Append(val, ix, url.Match.Index - ix);
- sb.Append("url(");
- sb.Append(url.Match.Groups[1].Value);
- sb.Append(url.Url);
- sb.Append(url.Match.Groups[3].Value);
- ix = url.Match.Index + url.Match.Length;
- }
+ foreach (var url in urls)
+ {
+ sb.Append(val, ix, url.Match.Index - ix);
+ sb.Append("url(");
+ sb.Append(url.Match.Groups[1].Value);
+ sb.Append(url.Url);
+ sb.Append(url.Match.Groups[3].Value);
+ ix = url.Match.Index + url.Match.Length;
+ }
- sb.Append(val, ix, val.Length - ix);
+ sb.Append(val, ix, val.Length - ix);
- var s = sb.ToString();
+ var s = sb.ToString();
- if (s != val)
+ if (s != val)
+ {
+ if (key != style.Name)
{
- if (key != style.Name)
- {
- removeStyles.Add(new Tuple(style, RemoveReason.NotAllowedUrlValue));
- }
- setStyles[key] = s;
+ removeStyles.Add(new Tuple(style, RemoveReason.NotAllowedUrlValue));
}
+ setStyles[key] = s;
}
}
}
+ }
- foreach (var style in setStyles)
- {
- styles.SetProperty(style.Key, style.Value);
- }
-
- foreach (var style in removeStyles)
- {
- RemoveStyle(element, styles, style.Item1, style.Item2);
- }
+ foreach (var style in setStyles)
+ {
+ styles.SetProperty(style.Key, style.Value);
}
- ///
- /// Decodes CSS Unicode escapes and removes comments.
- ///
- /// The CSS string.
- /// The decoded CSS string.
- protected static string DecodeCss(string css)
+ foreach (var style in removeStyles)
{
- var r = CssUnicodeEscapes.Replace(css, m =>
- {
- if (m.Groups[1].Success)
- return ((char)int.Parse(m.Groups[1].Value, NumberStyles.HexNumber)).ToString();
- var t = m.Groups[2].Value;
- return t == "\\" ? @"\\" : t;
- });
+ RemoveStyle(element, styles, style.Item1, style.Item2);
+ }
+ }
- r = CssComments.Replace(r, m => "");
+ ///
+ /// Decodes CSS Unicode escapes and removes comments.
+ ///
+ /// The CSS string.
+ /// The decoded CSS string.
+ protected static string DecodeCss(string css)
+ {
+ var r = CssUnicodeEscapes.Replace(css, m =>
+ {
+ if (m.Groups[1].Success)
+ return ((char)int.Parse(m.Groups[1].Value, NumberStyles.HexNumber)).ToString();
+ var t = m.Groups[2].Value;
+ return t == "\\" ? @"\\" : t;
+ });
- return r;
- }
+ r = CssComments.Replace(r, m => "");
- private static readonly Regex SchemeRegex = new(@"^([^\/#]*?)(?:\:|*58|*3a)", RegexOptions.Compiled | RegexOptions.IgnoreCase);
+ return r;
+ }
- ///
- /// Tries to create a safe object from a string.
- ///
- /// The URL.
- /// The object or null if no safe can be created.
- protected Iri? GetSafeIri(string url)
- {
- url = url.TrimStart();
+ private static readonly Regex SchemeRegex = new(@"^([^\/#]*?)(?:\:|*58|*3a)", RegexOptions.Compiled | RegexOptions.IgnoreCase);
- var schemeMatch = SchemeRegex.Match(url);
+ ///
+ /// Tries to create a safe object from a string.
+ ///
+ /// The URL.
+ /// The object or null if no safe can be created.
+ protected Iri? GetSafeIri(string url)
+ {
+ url = url.TrimStart();
- if (schemeMatch.Success)
- {
- var scheme = schemeMatch.Groups[1].Value;
- return AllowedSchemes.Contains(scheme, StringComparer.OrdinalIgnoreCase) ? new Iri(url, scheme) : null;
- }
+ var schemeMatch = SchemeRegex.Match(url);
- return new Iri(url);
+ if (schemeMatch.Success)
+ {
+ var scheme = schemeMatch.Groups[1].Value;
+ return AllowedSchemes.Contains(scheme, StringComparer.OrdinalIgnoreCase) ? new Iri(url, scheme) : null;
}
- ///
- /// Sanitizes a URL.
- ///
- /// The tag containing the URL being sanitized.
- /// The URL.
- /// The base URL relative URLs are resolved against (empty or null for no resolution).
- /// The sanitized URL or null if no safe URL can be created.
- protected virtual string? SanitizeUrl(IElement element, string url, string baseUrl)
- {
- var iri = GetSafeIri(url);
+ return new Iri(url);
+ }
+
+ ///
+ /// Sanitizes a URL.
+ ///
+ /// The tag containing the URL being sanitized.
+ /// The URL.
+ /// The base URL relative URLs are resolved against (empty or null for no resolution).
+ /// The sanitized URL or null if no safe URL can be created.
+ protected virtual string? SanitizeUrl(IElement element, string url, string baseUrl)
+ {
+ var iri = GetSafeIri(url);
- if (iri != null && !iri.IsAbsolute && !string.IsNullOrEmpty(baseUrl))
+ if (iri != null && !iri.IsAbsolute && !string.IsNullOrEmpty(baseUrl))
+ {
+ // resolve relative URI
+ if (Uri.TryCreate(baseUrl, UriKind.Absolute, out Uri baseUri))
{
- // resolve relative URI
- if (Uri.TryCreate(baseUrl, UriKind.Absolute, out Uri baseUri))
+ try
{
- try
- {
- var sanitizedUrl = new Uri(baseUri, iri.Value).AbsoluteUri;
- var ev = new FilterUrlEventArgs(element, url, sanitizedUrl);
+ var sanitizedUrl = new Uri(baseUri, iri.Value).AbsoluteUri;
+ var ev = new FilterUrlEventArgs(element, url, sanitizedUrl);
- OnFilteringUrl(ev);
+ OnFilteringUrl(ev);
- return ev.SanitizedUrl;
- }
- catch (UriFormatException)
- {
- iri = null;
- }
+ return ev.SanitizedUrl;
+ }
+ catch (UriFormatException)
+ {
+ iri = null;
}
- else iri = null;
}
+ else iri = null;
+ }
- var e = new FilterUrlEventArgs(element, url, iri?.Value);
- OnFilteringUrl(e);
+ var e = new FilterUrlEventArgs(element, url, iri?.Value);
+ OnFilteringUrl(e);
- return e.SanitizedUrl;
- }
+ return e.SanitizedUrl;
+ }
- ///
- /// Removes a tag from the document.
- ///
- /// Tag to be removed.
- /// Reason for removal.
- private void RemoveTag(IElement tag, RemoveReason reason)
- {
- var e = new RemovingTagEventArgs(tag, reason);
- OnRemovingTag(e);
+ ///
+ /// Removes a tag from the document.
+ ///
+ /// Tag to be removed.
+ /// Reason for removal.
+ private void RemoveTag(IElement tag, RemoveReason reason)
+ {
+ var e = new RemovingTagEventArgs(tag, reason);
+ OnRemovingTag(e);
- if (!e.Cancel)
- {
- if (KeepChildNodes && tag.HasChildNodes)
- tag.Replace([.. tag.ChildNodes]);
- else
- tag.Remove();
- }
+ if (!e.Cancel)
+ {
+ if (KeepChildNodes && tag.HasChildNodes)
+ tag.Replace([.. tag.ChildNodes]);
+ else
+ tag.Remove();
}
+ }
- ///
- /// Removes an attribute from the document.
- ///
- /// Tag the attribute belongs to.
- /// Attribute to be removed.
- /// Reason for removal.
- private void RemoveAttribute(IElement tag, IAttr attribute, RemoveReason reason)
- {
- var e = new RemovingAttributeEventArgs(tag, attribute, reason);
- OnRemovingAttribute(e);
+ ///
+ /// Removes an attribute from the document.
+ ///
+ /// Tag the attribute belongs to.
+ /// Attribute to be removed.
+ /// Reason for removal.
+ private void RemoveAttribute(IElement tag, IAttr attribute, RemoveReason reason)
+ {
+ var e = new RemovingAttributeEventArgs(tag, attribute, reason);
+ OnRemovingAttribute(e);
- if (!e.Cancel)
- tag.RemoveAttribute(attribute.Name);
- }
+ if (!e.Cancel)
+ tag.RemoveAttribute(attribute.Name);
+ }
- ///
- /// Removes a style from the document.
- ///
- /// Tag the style belongs to.
- /// Style rule that contains the style to be removed.
- /// Style to be removed.
- /// Reason for removal.
- private void RemoveStyle(IElement tag, ICssStyleDeclaration styles, ICssProperty style, RemoveReason reason)
- {
- var e = new RemovingStyleEventArgs(tag, style, reason);
- OnRemovingStyle(e);
+ ///
+ /// Removes a style from the document.
+ ///
+ /// Tag the style belongs to.
+ /// Style rule that contains the style to be removed.
+ /// Style to be removed.
+ /// Reason for removal.
+ private void RemoveStyle(IElement tag, ICssStyleDeclaration styles, ICssProperty style, RemoveReason reason)
+ {
+ var e = new RemovingStyleEventArgs(tag, style, reason);
+ OnRemovingStyle(e);
- if (!e.Cancel)
- styles.RemoveProperty(style.Name);
- }
+ if (!e.Cancel)
+ styles.RemoveProperty(style.Name);
+ }
- ///
- /// Removes an at-rule from the document.
- ///
- /// Tag the style belongs to.
- /// Rule to be removed.
- /// true, if the rule can be removed; false, otherwise.
- private bool RemoveAtRule(IElement tag, ICssRule rule)
- {
- var e = new RemovingAtRuleEventArgs(tag, rule);
- OnRemovingAtRule(e);
+ ///
+ /// Removes an at-rule from the document.
+ ///
+ /// Tag the style belongs to.
+ /// Rule to be removed.
+ /// true, if the rule can be removed; false, otherwise.
+ private bool RemoveAtRule(IElement tag, ICssRule rule)
+ {
+ var e = new RemovingAtRuleEventArgs(tag, rule);
+ OnRemovingAtRule(e);
- return !e.Cancel;
- }
+ return !e.Cancel;
+ }
- ///
- /// Removes a CSS class from a class attribute.
- ///
- /// Tag the style belongs to.
- /// Class to be removed.
- /// Reason for removal.
- private void RemoveCssClass(IElement tag, string cssClass, RemoveReason reason)
- {
- var e = new RemovingCssClassEventArgs(tag, cssClass, reason);
- OnRemovingCssClass(e);
+ ///
+ /// Removes a CSS class from a class attribute.
+ ///
+ /// Tag the style belongs to.
+ /// Class to be removed.
+ /// Reason for removal.
+ private void RemoveCssClass(IElement tag, string cssClass, RemoveReason reason)
+ {
+ var e = new RemovingCssClassEventArgs(tag, cssClass, reason);
+ OnRemovingCssClass(e);
- if (!e.Cancel)
- tag.ClassList.Remove(cssClass);
- }
+ if (!e.Cancel)
+ tag.ClassList.Remove(cssClass);
}
}
diff --git a/src/HtmlSanitizer/HtmlSanitizerDefaults.cs b/src/HtmlSanitizer/HtmlSanitizerDefaults.cs
index 90ee373..084f8c1 100644
--- a/src/HtmlSanitizer/HtmlSanitizerDefaults.cs
+++ b/src/HtmlSanitizer/HtmlSanitizerDefaults.cs
@@ -3,362 +3,361 @@
using System.Collections.Generic;
using System.Collections.Immutable;
-namespace Ganss.Xss
+namespace Ganss.Xss;
+
+///
+/// Default options.
+///
+public static class HtmlSanitizerDefaults
{
///
- /// Default options.
+ /// The default allowed CSS at-rules.
///
- public static class HtmlSanitizerDefaults
+ public static ISet AllowedAtRules { get; } = new HashSet()
{
- ///
- /// The default allowed CSS at-rules.
- ///
- public static ISet AllowedAtRules { get; } = new HashSet()
- {
- CssRuleType.Style, CssRuleType.Namespace
- }.ToImmutableHashSet();
+ CssRuleType.Style, CssRuleType.Namespace
+ }.ToImmutableHashSet();
- ///
- /// The default allowed URI schemes.
- ///
- public static ISet AllowedSchemes { get; } = new HashSet(StringComparer.OrdinalIgnoreCase)
- {
- "http", "https"
- }.ToImmutableHashSet(StringComparer.OrdinalIgnoreCase);
+ ///
+ /// The default allowed URI schemes.
+ ///
+ public static ISet AllowedSchemes { get; } = new HashSet(StringComparer.OrdinalIgnoreCase)
+ {
+ "http", "https"
+ }.ToImmutableHashSet(StringComparer.OrdinalIgnoreCase);
- ///
- /// The default allowed HTML tag names.
- ///
- public static ISet AllowedTags { get; } = new HashSet(StringComparer.OrdinalIgnoreCase)
- {
- // https://developer.mozilla.org/en/docs/Web/Guide/HTML/HTML5/HTML5_element_list
- "a", "abbr", "acronym", "address", "area", "b",
- "big", "blockquote", "br", "button", "caption", "center", "cite",
- "code", "col", "colgroup", "dd", "del", "dfn", "dir", "div", "dl", "dt",
- "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6",
- "hr", "i", "img", "input", "ins", "kbd", "label", "legend", "li", "map",
- "menu", "ol", "optgroup", "option", "p", "pre", "q", "s", "samp",
- "select", "small", "span", "strike", "strong", "sub", "sup", "table",
- "tbody", "td", "textarea", "tfoot", "th", "thead", "tr", "tt", "u",
- "ul", "var",
- // HTML5
- // Sections
- "section", "nav", "article", "aside", "header", "footer", "main",
- // Grouping content
- "figure", "figcaption",
- // Text-level semantics
- "data", "time", "mark", "ruby", "rt", "rp", "bdi", "wbr",
- // Forms
- "datalist", "keygen", "output", "progress", "meter",
- // Interactive elements
- "details", "summary", "menuitem",
- // document elements
- "html", "head", "body"
- }.ToImmutableHashSet(StringComparer.OrdinalIgnoreCase);
+ ///
+ /// The default allowed HTML tag names.
+ ///
+ public static ISet AllowedTags { get; } = new HashSet(StringComparer.OrdinalIgnoreCase)
+ {
+ // https://developer.mozilla.org/en/docs/Web/Guide/HTML/HTML5/HTML5_element_list
+ "a", "abbr", "acronym", "address", "area", "b",
+ "big", "blockquote", "br", "button", "caption", "center", "cite",
+ "code", "col", "colgroup", "dd", "del", "dfn", "dir", "div", "dl", "dt",
+ "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6",
+ "hr", "i", "img", "input", "ins", "kbd", "label", "legend", "li", "map",
+ "menu", "ol", "optgroup", "option", "p", "pre", "q", "s", "samp",
+ "select", "small", "span", "strike", "strong", "sub", "sup", "table",
+ "tbody", "td", "textarea", "tfoot", "th", "thead", "tr", "tt", "u",
+ "ul", "var",
+ // HTML5
+ // Sections
+ "section", "nav", "article", "aside", "header", "footer", "main",
+ // Grouping content
+ "figure", "figcaption",
+ // Text-level semantics
+ "data", "time", "mark", "ruby", "rt", "rp", "bdi", "wbr",
+ // Forms
+ "datalist", "keygen", "output", "progress", "meter",
+ // Interactive elements
+ "details", "summary", "menuitem",
+ // document elements
+ "html", "head", "body"
+ }.ToImmutableHashSet(StringComparer.OrdinalIgnoreCase);
- ///
- /// The default allowed HTML attributes.
- ///
- public static ISet AllowedAttributes { get; } = new HashSet(StringComparer.OrdinalIgnoreCase)
- {
- // https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes
- "abbr", "accept", "accept-charset", "accesskey",
- "action", "align", "alt", "axis", "bgcolor", "border", "cellpadding",
- "cellspacing", "char", "charoff", "charset", "checked", "cite", /* "class", */
- "clear", "cols", "colspan", "color", "compact", "coords", "datetime",
- "dir", "disabled", "enctype", "for", "frame", "headers", "height",
- "href", "hreflang", "hspace", /* "id", */ "ismap", "label", "lang",
- "longdesc", "maxlength", "media", "method", "multiple", "name",
- "nohref", "noshade", "nowrap", "prompt", "readonly", "rel", "rev",
- "rows", "rowspan", "rules", "scope", "selected", "shape", "size",
- "span", "src", "start", "style", "summary", "tabindex", "target", "title",
- "type", "usemap", "valign", "value", "vspace", "width",
- // HTML5
- "high", //
- "keytype", //
- "list", //
- "low", //
- "max", // , ,