From d8dcf0b674ffaa341a42b659581976bf427fa5d0 Mon Sep 17 00:00:00 2001 From: Michael Ganss Date: Tue, 28 Jan 2025 12:10:26 +0100 Subject: [PATCH] Add FilterUrl to IHtmlSanitizer (fixes #565) Use file scoped namespace Implement VS suggestions Bump version to 9.0 --- appveyor.yml | 4 +- src/HtmlSanitizer/EventArgs.cs | 455 +++-- src/HtmlSanitizer/HtmlFormatter.cs | 107 +- src/HtmlSanitizer/HtmlSanitizer.cs | 1603 ++++++++--------- src/HtmlSanitizer/HtmlSanitizerDefaults.cs | 695 ++++--- src/HtmlSanitizer/HtmlSanitizerOptions.cs | 99 +- src/HtmlSanitizer/IHtmlSanitizer.cs | 336 ++-- src/HtmlSanitizer/Iri.cs | 64 +- src/HtmlSanitizer/RemoveReason.cs | 75 +- .../HtmlSanitizer.Benchmark.csproj | 2 +- .../HtmlSanitizer.Tests.csproj | 3 +- test/HtmlSanitizer.Tests/Tests.cs | 4 +- 12 files changed, 1718 insertions(+), 1729 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index 969a302a..b869394d 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -1,4 +1,4 @@ -version: 8.1.{build} +version: 9.0.{build} skip_tags: true image: Visual Studio 2022 environment: @@ -19,7 +19,7 @@ test_script: dotnet sonarscanner begin /k:"mganss_HtmlSanitizer" /v:$env:APPVEYOR_BUILD_VERSION /o:"mganss-github" /d:sonar.host.url="https://sonarcloud.io" /d:sonar.login="$env:sonar_token" /d:sonar.cs.opencover.reportsPaths="$($env:APPVEYOR_BUILD_FOLDER)\coverage.xml" /d:sonar.coverage.exclusions="**/Program.cs" dotnet build } - - dotnet test /p:CollectCoverage=true test\HtmlSanitizer.Tests\HtmlSanitizer.Tests.csproj -f net6.0 + - dotnet test /p:CollectCoverage=true test\HtmlSanitizer.Tests\HtmlSanitizer.Tests.csproj -f net8.0 - ps: cp coverage.*.xml ./coverage.xml - ps: | if (-not $env:APPVEYOR_PULL_REQUEST_NUMBER) { diff --git a/src/HtmlSanitizer/EventArgs.cs b/src/HtmlSanitizer/EventArgs.cs index a36c7690..8f2ba991 100644 --- a/src/HtmlSanitizer/EventArgs.cs +++ b/src/HtmlSanitizer/EventArgs.cs @@ -5,271 +5,270 @@ using System.Collections.Generic; using System.ComponentModel; -namespace Ganss.Xss +namespace Ganss.Xss; + +/// +/// Provides data for the event. +/// +/// +/// Initializes a new instance of the class. +/// +public class PostProcessDomEventArgs(IHtmlDocument document) : EventArgs { /// - /// Provides data for the event. + /// Gets the document. /// - /// - /// Initializes a new instance of the class. - /// - public class PostProcessDomEventArgs(IHtmlDocument document) : EventArgs - { - /// - /// Gets the document. - /// - /// - /// The document. - /// - public IHtmlDocument Document { get; private set; } = document; - } + /// + /// The document. + /// + public IHtmlDocument Document { get; private set; } = document; +} +/// +/// Provides data for the event. +/// +/// +/// Initializes a new instance of the class. +/// +public class PostProcessNodeEventArgs(IHtmlDocument document, INode node) : EventArgs +{ /// - /// Provides data for the event. + /// Gets the document. /// - /// - /// Initializes a new instance of the class. - /// - public class PostProcessNodeEventArgs(IHtmlDocument document, INode node) : EventArgs - { - /// - /// Gets the document. - /// - /// - /// The document. - /// - public IHtmlDocument Document { get; private set; } = document; + /// + /// The document. + /// + public IHtmlDocument Document { get; private set; } = document; - /// - /// Gets the DOM node to be processed. - /// - /// - /// The DOM node. - /// - public INode Node { get; private set; } = node; + /// + /// Gets the DOM node to be processed. + /// + /// + /// The DOM node. + /// + public INode Node { get; private set; } = node; - /// - /// Gets the replacement nodes. Leave empty if no replacement should occur. - /// - /// - /// The replacement nodes. - /// - public ICollection ReplacementNodes { get; private set; } = new List(); - } + /// + /// Gets the replacement nodes. Leave empty if no replacement should occur. + /// + /// + /// The replacement nodes. + /// + public ICollection ReplacementNodes { get; private set; } = []; +} +/// +/// Provides data for the event. +/// +/// +/// Initializes a new instance of the class. +/// +/// The element to be removed. +/// The reason why the tag will be removed. +public class RemovingTagEventArgs(IElement tag, RemoveReason reason) : CancelEventArgs +{ /// - /// Provides data for the event. + /// Gets the tag to be removed. /// - /// - /// Initializes a new instance of the class. - /// - /// The element to be removed. - /// The reason why the tag will be removed. - public class RemovingTagEventArgs(IElement tag, RemoveReason reason) : CancelEventArgs - { - /// - /// Gets the tag to be removed. - /// - /// - /// The tag. - /// - public IElement Tag { get; private set; } = tag; + /// + /// The tag. + /// + public IElement Tag { get; private set; } = tag; - /// - /// Gets the reason why the tag will be removed. - /// - /// - /// The reason. - /// - public RemoveReason Reason { get; private set; } = reason; - } + /// + /// Gets the reason why the tag will be removed. + /// + /// + /// The reason. + /// + public RemoveReason Reason { get; private set; } = reason; +} +/// +/// Provides data for the event. +/// +/// +/// Initializes a new instance of the class. +/// +/// The element containing the attribute. +/// The attribute to be removed. +/// The reason why the attribute will be removed. +public class RemovingAttributeEventArgs(IElement tag, IAttr attribute, RemoveReason reason) : CancelEventArgs +{ /// - /// Provides data for the event. + /// Gets the tag containing the attribute to be removed. /// - /// - /// Initializes a new instance of the class. - /// - /// The element containing the attribute. - /// The attribute to be removed. - /// The reason why the attribute will be removed. - public class RemovingAttributeEventArgs(IElement tag, IAttr attribute, RemoveReason reason) : CancelEventArgs - { - /// - /// Gets the tag containing the attribute to be removed. - /// - /// - /// The tag. - /// - public IElement Tag { get; private set; } = tag; + /// + /// The tag. + /// + public IElement Tag { get; private set; } = tag; - /// - /// Gets the attribute to be removed. - /// - /// - /// The attribute. - /// - public IAttr Attribute { get; private set; } = attribute; + /// + /// Gets the attribute to be removed. + /// + /// + /// The attribute. + /// + public IAttr Attribute { get; private set; } = attribute; - /// - /// Gets the reason why the attribute will be removed. - /// - /// - /// The reason. - /// - public RemoveReason Reason { get; private set; } = reason; - } + /// + /// Gets the reason why the attribute will be removed. + /// + /// + /// The reason. + /// + public RemoveReason Reason { get; private set; } = reason; +} +/// +/// Provides data for the event. +/// +/// +/// Initializes a new instance of the class. +/// +/// The element containing the attribute. +/// The style to be removed. +/// The reason why the attribute will be removed. +public class RemovingStyleEventArgs(IElement tag, ICssProperty style, RemoveReason reason) : CancelEventArgs +{ /// - /// Provides data for the event. + /// Gets the tag containing the style to be removed. /// - /// - /// Initializes a new instance of the class. - /// - /// The element containing the attribute. - /// The style to be removed. - /// The reason why the attribute will be removed. - public class RemovingStyleEventArgs(IElement tag, ICssProperty style, RemoveReason reason) : CancelEventArgs - { - /// - /// Gets the tag containing the style to be removed. - /// - /// - /// The tag. - /// - public IElement Tag { get; private set; } = tag; + /// + /// The tag. + /// + public IElement Tag { get; private set; } = tag; - /// - /// Gets the style to be removed. - /// - /// - /// The style. - /// - public ICssProperty Style { get; private set; } = style; + /// + /// Gets the style to be removed. + /// + /// + /// The style. + /// + public ICssProperty Style { get; private set; } = style; - /// - /// Gets the reason why the style will be removed. - /// - /// - /// The reason. - /// - public RemoveReason Reason { get; private set; } = reason; - } + /// + /// Gets the reason why the style will be removed. + /// + /// + /// The reason. + /// + public RemoveReason Reason { get; private set; } = reason; +} +/// +/// Provides data for the event. +/// +/// +/// Initializes a new instance of the class. +/// +/// The element containing the attribute. +/// The rule to be removed. +public class RemovingAtRuleEventArgs(IElement tag, ICssRule rule) : CancelEventArgs +{ /// - /// Provides data for the event. + /// Gets the tag containing the at-rule to be removed. /// - /// - /// Initializes a new instance of the class. - /// - /// The element containing the attribute. - /// The rule to be removed. - public class RemovingAtRuleEventArgs(IElement tag, ICssRule rule) : CancelEventArgs - { - /// - /// Gets the tag containing the at-rule to be removed. - /// - /// - /// The tag. - /// - public IElement Tag { get; private set; } = tag; + /// + /// The tag. + /// + public IElement Tag { get; private set; } = tag; - /// - /// Gets the rule to be removed. - /// - /// - /// The rule. - /// - public ICssRule Rule { get; private set; } = rule; - } + /// + /// Gets the rule to be removed. + /// + /// + /// The rule. + /// + public ICssRule Rule { get; private set; } = rule; +} +/// +/// Provides data for the event. +/// +/// +/// Initializes a new instance of the class. +/// +/// The comment to be removed. +public class RemovingCommentEventArgs(IComment comment) : CancelEventArgs +{ /// - /// Provides data for the event. + /// Gets the comment node to be removed. /// - /// - /// Initializes a new instance of the class. - /// - /// The comment to be removed. - public class RemovingCommentEventArgs(IComment comment) : CancelEventArgs - { - /// - /// Gets the comment node to be removed. - /// - /// - /// The comment node. - /// - public IComment Comment { get; private set; } = comment; - } + /// + /// The comment node. + /// + public IComment Comment { get; private set; } = comment; +} +/// +/// Provides data for the event. +/// +/// +/// Initializes a new instance of the class. +/// +/// The element containing the attribute. +/// The CSS class to be removed. +/// The reason why the attribute will be removed. +public class RemovingCssClassEventArgs(IElement tag, string cssClass, RemoveReason reason) : CancelEventArgs +{ /// - /// Provides data for the event. + /// Gets the tag containing the CSS class to be removed. /// - /// - /// Initializes a new instance of the class. - /// - /// The element containing the attribute. - /// The CSS class to be removed. - /// The reason why the attribute will be removed. - public class RemovingCssClassEventArgs(IElement tag, string cssClass, RemoveReason reason) : CancelEventArgs - { - /// - /// Gets the tag containing the CSS class to be removed. - /// - /// - /// The tag. - /// - public IElement Tag { get; private set; } = tag; + /// + /// The tag. + /// + public IElement Tag { get; private set; } = tag; - /// - /// Gets the CSS class to be removed. - /// - /// - /// The CSS class. - /// - public string CssClass { get; private set; } = cssClass; + /// + /// Gets the CSS class to be removed. + /// + /// + /// The CSS class. + /// + public string CssClass { get; private set; } = cssClass; - /// - /// Gets the reason why the CSS class will be removed. - /// - /// - /// The reason. - /// - public RemoveReason Reason { get; private set; } = reason; - } + /// + /// Gets the reason why the CSS class will be removed. + /// + /// + /// The reason. + /// + public RemoveReason Reason { get; private set; } = reason; +} +/// +/// Provides data for the event. +/// +/// +/// Initializes a new instance of the class. +/// +/// The tag containing the URI being sanitized. +/// The original URL. +/// The sanitized URL. +public class FilterUrlEventArgs(IElement tag, string originalUrl, string? sanitizedUrl = null) : EventArgs +{ /// - /// Provides data for the event. + /// Gets the original URL. /// - /// - /// Initializes a new instance of the class. - /// - /// The tag containing the URI being sanitized. - /// The original URL. - /// The sanitized URL. - public class FilterUrlEventArgs(IElement tag, string originalUrl, string? sanitizedUrl = null) : EventArgs - { - /// - /// Gets the original URL. - /// - /// - /// The original URL. - /// - public string OriginalUrl { get; private set; } = originalUrl; + /// + /// The original URL. + /// + public string OriginalUrl { get; private set; } = originalUrl; - /// - /// Gets or sets the sanitized URL. - /// - /// - /// The sanitized URL. If it is null, it will be removed. - /// - public string? SanitizedUrl { get; set; } = sanitizedUrl; + /// + /// Gets or sets the sanitized URL. + /// + /// + /// The sanitized URL. If it is null, it will be removed. + /// + public string? SanitizedUrl { get; set; } = sanitizedUrl; - /// - /// Gets the tag containing the URI being sanitized. - /// - /// - /// The tag. - /// - public IElement Tag { get; private set; } = tag; - } + /// + /// Gets the tag containing the URI being sanitized. + /// + /// + /// The tag. + /// + public IElement Tag { get; private set; } = tag; } diff --git a/src/HtmlSanitizer/HtmlFormatter.cs b/src/HtmlSanitizer/HtmlFormatter.cs index 425c5940..7c0a55f7 100644 --- a/src/HtmlSanitizer/HtmlFormatter.cs +++ b/src/HtmlSanitizer/HtmlFormatter.cs @@ -1,74 +1,69 @@ -using AngleSharp; -using AngleSharp.Html; +using AngleSharp.Html; using AngleSharp.Dom; using System; -using System.Collections.Generic; -using System.Linq; using System.Text; -using System.Threading.Tasks; -namespace Ganss.Xss +namespace Ganss.Xss; + +/// +/// HTML5 markup formatter. Identical to except for < and > which are +/// encoded in attribute values. +/// +public class HtmlFormatter: HtmlMarkupFormatter { /// - /// HTML5 markup formatter. Identical to except for < and > which are - /// encoded in attribute values. + /// An instance of . /// - public class HtmlFormatter: HtmlMarkupFormatter - { - /// - /// An instance of . - /// - new public static readonly HtmlFormatter Instance = new (); + new public static readonly HtmlFormatter Instance = new (); - // disable XML comments warnings - #pragma warning disable 1591 + // disable XML comments warnings + #pragma warning disable 1591 - protected override string Attribute(IAttr attr) - { - var namespaceUri = attr.NamespaceUri; - var localName = attr.LocalName; - var value = attr.Value; - var temp = new StringBuilder(); + protected override string Attribute(IAttr attr) + { + var namespaceUri = attr.NamespaceUri; + var localName = attr.LocalName; + var value = attr.Value; + var temp = new StringBuilder(); - if (String.IsNullOrEmpty(namespaceUri)) - { - temp.Append(localName); - } - else if (namespaceUri == NamespaceNames.XmlUri) - { - temp.Append(NamespaceNames.XmlPrefix).Append(':').Append(localName); - } - else if (namespaceUri == NamespaceNames.XLinkUri) - { - temp.Append(NamespaceNames.XLinkPrefix).Append(':').Append(localName); - } - else if (namespaceUri == NamespaceNames.XmlNsUri) - { - temp.Append(XmlNamespaceLocalName(localName)); - } - else - { - temp.Append(attr.Name); - } + if (String.IsNullOrEmpty(namespaceUri)) + { + temp.Append(localName); + } + else if (namespaceUri == NamespaceNames.XmlUri) + { + temp.Append(NamespaceNames.XmlPrefix).Append(':').Append(localName); + } + else if (namespaceUri == NamespaceNames.XLinkUri) + { + temp.Append(NamespaceNames.XLinkPrefix).Append(':').Append(localName); + } + else if (namespaceUri == NamespaceNames.XmlNsUri) + { + temp.Append(XmlNamespaceLocalName(localName)); + } + else + { + temp.Append(attr.Name); + } - temp.Append('=').Append('"'); + temp.Append('=').Append('"'); - for (var i = 0; i < value.Length; i++) + for (var i = 0; i < value.Length; i++) + { + switch (value[i]) { - switch (value[i]) - { - case '&': temp.Append("&"); break; - case '\u00a0': temp.Append(" "); break; - case '"': temp.Append("""); break; - case '<': temp.Append("<"); break; - case '>': temp.Append(">"); break; - default: temp.Append(value[i]); break; - } + case '&': temp.Append("&"); break; + case '\u00a0': temp.Append(" "); break; + case '"': temp.Append("""); break; + case '<': temp.Append("<"); break; + case '>': temp.Append(">"); break; + default: temp.Append(value[i]); break; } - - return temp.Append('"').ToString(); } - #pragma warning restore 1591 + return temp.Append('"').ToString(); } + + #pragma warning restore 1591 } diff --git a/src/HtmlSanitizer/HtmlSanitizer.cs b/src/HtmlSanitizer/HtmlSanitizer.cs index d6125810..a8027861 100644 --- a/src/HtmlSanitizer/HtmlSanitizer.cs +++ b/src/HtmlSanitizer/HtmlSanitizer.cs @@ -13,969 +13,968 @@ using System.Text; using System.Text.RegularExpressions; -namespace Ganss.Xss +namespace Ganss.Xss; + +/// +/// Cleans HTML documents and fragments from constructs that can lead to XSS attacks. +/// +/// +/// XSS attacks can occur at several levels within an HTML document or fragment: +/// +/// HTML tags (e.g. the <script> tag) +/// HTML attributes (e.g. the "onload" attribute) +/// CSS styles (url property values) +/// malformed HTML or HTML that exploits parser bugs in specific browsers +/// +/// +/// The HtmlSanitizer class addresses all of these possible attack vectors by using a sophisticated HTML parser (AngleSharp). +/// +/// +/// In order to facilitate different use cases, HtmlSanitizer can be customized at the levels mentioned above: +/// +/// You can specify the allowed HTML tags through the property . All other tags will be stripped. +/// You can specify the allowed HTML attributes through the property . All other attributes will be stripped. +/// You can specify the allowed CSS property names through the property . All other styles will be stripped. +/// You can specify the allowed URI schemes through the property . All other URIs will be stripped. +/// You can specify the HTML attributes that contain URIs (such as "src", "href" etc.) through the property . +/// +/// +/// +/// +/// +/// alert('xss')
Test
"; +/// var sanitized = sanitizer.Sanitize(html, "http://www.example.com"); +/// // -> "
Test
" +/// ]]> +///
+///
+public class HtmlSanitizer : IHtmlSanitizer { + private const string StyleAttributeName = "style"; + + // from http://genshi.edgewall.org/ + private static readonly Regex CssUnicodeEscapes = new(@"\\([0-9a-fA-F]{1,6})\s?|\\([^\r\n\f0-9a-fA-F'""{};:()#*])", RegexOptions.Compiled); + private static readonly Regex CssComments = new(@"/\*.*?\*/", RegexOptions.Compiled); + // IE6 + private static readonly Regex CssExpression = new(@"[eE\uFF25\uFF45][xX\uFF38\uFF58][pP\uFF30\uFF50][rR\u0280\uFF32\uFF52][eE\uFF25\uFF45][sS\uFF33\uFF53]{2}[iI\u026A\uFF29\uFF49][oO\uFF2F\uFF4F][nN\u0274\uFF2E\uFF4E]", RegexOptions.Compiled); + private static readonly Regex CssUrl = new(@"[Uu][Rr\u0280][Ll\u029F]\((['""]?)([^'"")]+)(['""]?)", RegexOptions.Compiled); + private static readonly Regex WhitespaceRegex = new(@"\s*", RegexOptions.Compiled); + private static readonly IConfiguration defaultConfiguration = Configuration.Default.WithCss(new CssParserOptions + { + IsIncludingUnknownDeclarations = true, + IsIncludingUnknownRules = true, + IsToleratingInvalidSelectors = true, + }); + + private static readonly HtmlParser defaultHtmlParser = new(new HtmlParserOptions { IsScripting = true }, BrowsingContext.New(defaultConfiguration)); + /// - /// Cleans HTML documents and fragments from constructs that can lead to XSS attacks. - /// - /// - /// XSS attacks can occur at several levels within an HTML document or fragment: - /// - /// HTML tags (e.g. the <script> tag) - /// HTML attributes (e.g. the "onload" attribute) - /// CSS styles (url property values) - /// malformed HTML or HTML that exploits parser bugs in specific browsers - /// - /// - /// The HtmlSanitizer class addresses all of these possible attack vectors by using a sophisticated HTML parser (AngleSharp). - /// - /// - /// In order to facilitate different use cases, HtmlSanitizer can be customized at the levels mentioned above: - /// - /// You can specify the allowed HTML tags through the property . All other tags will be stripped. - /// You can specify the allowed HTML attributes through the property . All other attributes will be stripped. - /// You can specify the allowed CSS property names through the property . All other styles will be stripped. - /// You can specify the allowed URI schemes through the property . All other URIs will be stripped. - /// You can specify the HTML attributes that contain URIs (such as "src", "href" etc.) through the property . - /// - /// - /// - /// - /// - /// alert('xss')
Test
"; - /// var sanitized = sanitizer.Sanitize(html, "http://www.example.com"); - /// // -> "
Test
" - /// ]]> - ///
- ///
- public class HtmlSanitizer : IHtmlSanitizer - { - private const string StyleAttributeName = "style"; - - // from http://genshi.edgewall.org/ - private static readonly Regex CssUnicodeEscapes = new(@"\\([0-9a-fA-F]{1,6})\s?|\\([^\r\n\f0-9a-fA-F'""{};:()#*])", RegexOptions.Compiled); - private static readonly Regex CssComments = new(@"/\*.*?\*/", RegexOptions.Compiled); - // IE6 - private static readonly Regex CssExpression = new(@"[eE\uFF25\uFF45][xX\uFF38\uFF58][pP\uFF30\uFF50][rR\u0280\uFF32\uFF52][eE\uFF25\uFF45][sS\uFF33\uFF53]{2}[iI\u026A\uFF29\uFF49][oO\uFF2F\uFF4F][nN\u0274\uFF2E\uFF4E]", RegexOptions.Compiled); - private static readonly Regex CssUrl = new(@"[Uu][Rr\u0280][Ll\u029F]\((['""]?)([^'"")]+)(['""]?)", RegexOptions.Compiled); - private static readonly Regex WhitespaceRegex = new(@"\s*", RegexOptions.Compiled); - private static readonly IConfiguration defaultConfiguration = Configuration.Default.WithCss(new CssParserOptions - { - IsIncludingUnknownDeclarations = true, - IsIncludingUnknownRules = true, - IsToleratingInvalidSelectors = true, - }); + /// Initializes a new instance of the class + /// with the default options. + /// + public HtmlSanitizer() + { + AllowedTags = new HashSet(HtmlSanitizerDefaults.AllowedTags, StringComparer.OrdinalIgnoreCase); + AllowedSchemes = new HashSet(HtmlSanitizerDefaults.AllowedSchemes, StringComparer.OrdinalIgnoreCase); + AllowedAttributes = new HashSet(HtmlSanitizerDefaults.AllowedAttributes, StringComparer.OrdinalIgnoreCase); + UriAttributes = new HashSet(HtmlSanitizerDefaults.UriAttributes, StringComparer.OrdinalIgnoreCase); + AllowedCssProperties = new HashSet(HtmlSanitizerDefaults.AllowedCssProperties, StringComparer.OrdinalIgnoreCase); + AllowedAtRules = new HashSet(HtmlSanitizerDefaults.AllowedAtRules); + AllowedClasses = new HashSet(HtmlSanitizerDefaults.AllowedClasses); + } - private static readonly HtmlParser defaultHtmlParser = new(new HtmlParserOptions { IsScripting = true }, BrowsingContext.New(defaultConfiguration)); + /// + /// Initializes a new instance of the class + /// with the given options. + /// + /// Options to control the sanitizing. + public HtmlSanitizer(HtmlSanitizerOptions options) + { + AllowedTags = new HashSet(options.AllowedTags, StringComparer.OrdinalIgnoreCase); + AllowedSchemes = new HashSet(options.AllowedSchemes, StringComparer.OrdinalIgnoreCase); + AllowedAttributes = new HashSet(options.AllowedAttributes, StringComparer.OrdinalIgnoreCase); + UriAttributes = new HashSet(options.UriAttributes, StringComparer.OrdinalIgnoreCase); + AllowedClasses = new HashSet(options.AllowedCssClasses, StringComparer.OrdinalIgnoreCase); + AllowedCssProperties = new HashSet(options.AllowedCssProperties, StringComparer.OrdinalIgnoreCase); + AllowedAtRules = new HashSet(options.AllowedAtRules); + AllowCssCustomProperties = options.AllowCssCustomProperties; + AllowDataAttributes = options.AllowDataAttributes; + } - /// - /// Initializes a new instance of the class - /// with the default options. - /// - public HtmlSanitizer() - { - AllowedTags = new HashSet(HtmlSanitizerDefaults.AllowedTags, StringComparer.OrdinalIgnoreCase); - AllowedSchemes = new HashSet(HtmlSanitizerDefaults.AllowedSchemes, StringComparer.OrdinalIgnoreCase); - AllowedAttributes = new HashSet(HtmlSanitizerDefaults.AllowedAttributes, StringComparer.OrdinalIgnoreCase); - UriAttributes = new HashSet(HtmlSanitizerDefaults.UriAttributes, StringComparer.OrdinalIgnoreCase); - AllowedCssProperties = new HashSet(HtmlSanitizerDefaults.AllowedCssProperties, StringComparer.OrdinalIgnoreCase); - AllowedAtRules = new HashSet(HtmlSanitizerDefaults.AllowedAtRules); - AllowedClasses = new HashSet(HtmlSanitizerDefaults.AllowedClasses); - } + /// + /// Gets or sets the default method that encodes comments. + /// + public Action EncodeComment { get; set; } = DefaultEncodeComment; - /// - /// Initializes a new instance of the class - /// with the given options. - /// - /// Options to control the sanitizing. - public HtmlSanitizer(HtmlSanitizerOptions options) - { - AllowedTags = new HashSet(options.AllowedTags, StringComparer.OrdinalIgnoreCase); - AllowedSchemes = new HashSet(options.AllowedSchemes, StringComparer.OrdinalIgnoreCase); - AllowedAttributes = new HashSet(options.AllowedAttributes, StringComparer.OrdinalIgnoreCase); - UriAttributes = new HashSet(options.UriAttributes, StringComparer.OrdinalIgnoreCase); - AllowedClasses = new HashSet(options.AllowedCssClasses, StringComparer.OrdinalIgnoreCase); - AllowedCssProperties = new HashSet(options.AllowedCssProperties, StringComparer.OrdinalIgnoreCase); - AllowedAtRules = new HashSet(options.AllowedAtRules); - AllowCssCustomProperties = options.AllowCssCustomProperties; - AllowDataAttributes = options.AllowDataAttributes; - } + /// + /// Gets or sets the default method that encodes literal text content. + /// + public Action EncodeLiteralTextElementContent { get; set; } = DefaultEncodeLiteralTextElementContent; - /// - /// Gets or sets the default method that encodes comments. - /// - public Action EncodeComment { get; set; } = DefaultEncodeComment; - - /// - /// Gets or sets the default method that encodes literal text content. - /// - public Action EncodeLiteralTextElementContent { get; set; } = DefaultEncodeLiteralTextElementContent; - - /// - /// Gets or sets the default value indicating whether to keep child nodes of elements that are removed. Default is false. - /// - public static bool DefaultKeepChildNodes { get; set; } = false; - - /// - /// Gets or sets a value indicating whether to keep child nodes of elements that are removed. Default is . - /// - public bool KeepChildNodes { get; set; } = DefaultKeepChildNodes; - - /// - /// Gets or sets the default object that creates the parser used for parsing the input. - /// - public static Func DefaultHtmlParserFactory { get; set; } = () => defaultHtmlParser; - - /// - /// Gets or sets the object the creates the parser used for parsing the input. - /// - public Func HtmlParserFactory { get; set; } = DefaultHtmlParserFactory; - - /// - /// Gets or sets the default object used for generating output. Default is . - /// - public static IMarkupFormatter DefaultOutputFormatter { get; set; } = HtmlFormatter.Instance; - - /// - /// Gets or sets the object used for generating output. Default is . - /// - public IMarkupFormatter OutputFormatter { get; set; } = DefaultOutputFormatter; - - /// - /// Gets or sets the default object used for generating CSS output. Default is . - /// - public static IStyleFormatter DefaultStyleFormatter { get; set; } = CssStyleFormatter.Instance; - - /// - /// Gets or sets the object used for generating CSS output. Default is . - /// - public IStyleFormatter StyleFormatter { get; set; } = DefaultStyleFormatter; - - /// - /// Gets or sets the allowed CSS at-rules such as "@media" and "@font-face". - /// - /// - /// The allowed CSS at-rules. - /// - public ISet AllowedAtRules { get; private set; } - - /// - /// Gets or sets the allowed URI schemes such as "http" and "https". - /// - /// - /// The allowed URI schemes. - /// - public ISet AllowedSchemes { get; private set; } - - /// - /// Gets or sets the allowed HTML tag names such as "a" and "div". - /// - /// - /// The allowed tag names. - /// - public ISet AllowedTags { get; private set; } - - /// - /// Gets or sets the allowed HTML attributes such as "href" and "alt". - /// - /// - /// The allowed HTML attributes. - /// - public ISet AllowedAttributes { get; private set; } - - /// - /// Allow all HTML5 data attributes; the attributes prefixed with data-. - /// - public bool AllowDataAttributes { get; set; } - - /// - /// Gets or sets the HTML attributes that can contain a URI such as "href". - /// - /// - /// The URI attributes. - /// - public ISet UriAttributes { get; private set; } - - /// - /// Gets or sets the allowed CSS properties such as "font" and "margin". - /// - /// - /// The allowed CSS properties. - /// - public ISet AllowedCssProperties { get; private set; } - - /// - /// Allow all custom CSS properties (variables) prefixed with --. - /// - public bool AllowCssCustomProperties { get; set; } - - /// - /// Gets or sets a regex that must not match for legal CSS property values. - /// - /// - /// The regex. - /// - public Regex DisallowCssPropertyValue { get; set; } = DefaultDisallowedCssPropertyValue; - - /// - /// Gets or sets the allowed CSS classes. If the set is empty, all classes will be allowed. - /// - /// - /// The allowed CSS classes. An empty set means all classes are allowed. - /// - public ISet AllowedClasses { get; private set; } - - /// - /// Occurs after sanitizing the document and post processing nodes. - /// - public event EventHandler? PostProcessDom; - /// - /// Occurs for every node after sanitizing. - /// - public event EventHandler? PostProcessNode; - /// - /// Occurs before a tag is removed. - /// - public event EventHandler? RemovingTag; - /// - /// Occurs before an attribute is removed. - /// - public event EventHandler? RemovingAttribute; - /// - /// Occurs before a style is removed. - /// - public event EventHandler? RemovingStyle; - /// - /// Occurs before an at-rule is removed. - /// - public event EventHandler? RemovingAtRule; - /// - /// Occurs before a comment is removed. - /// - public event EventHandler? RemovingComment; - /// - /// Occurs before a CSS class is removed. - /// - public event EventHandler? RemovingCssClass; - /// - /// Occurs when a URL is being sanitized. - /// - public event EventHandler? FilterUrl; - - /// - /// Raises the event. - /// - /// The instance containing the event data. - protected virtual void OnPostProcessDom(PostProcessDomEventArgs e) - { - PostProcessDom?.Invoke(this, e); - } + /// + /// Gets or sets the default value indicating whether to keep child nodes of elements that are removed. Default is false. + /// + public static bool DefaultKeepChildNodes { get; set; } = false; - /// - /// Raises the event. - /// - /// The instance containing the event data. - protected virtual void OnPostProcessNode(PostProcessNodeEventArgs e) - { - PostProcessNode?.Invoke(this, e); - } + /// + /// Gets or sets a value indicating whether to keep child nodes of elements that are removed. Default is . + /// + public bool KeepChildNodes { get; set; } = DefaultKeepChildNodes; - /// - /// Raises the event. - /// - /// The instance containing the event data. - protected virtual void OnRemovingTag(RemovingTagEventArgs e) - { - RemovingTag?.Invoke(this, e); - } + /// + /// Gets or sets the default object that creates the parser used for parsing the input. + /// + public static Func DefaultHtmlParserFactory { get; set; } = () => defaultHtmlParser; - /// - /// Raises the event. - /// - /// The instance containing the event data. - protected virtual void OnRemovingAttribute(RemovingAttributeEventArgs e) - { - RemovingAttribute?.Invoke(this, e); - } + /// + /// Gets or sets the object the creates the parser used for parsing the input. + /// + public Func HtmlParserFactory { get; set; } = DefaultHtmlParserFactory; - /// - /// Raises the event. - /// - /// The instance containing the event data. - protected virtual void OnRemovingStyle(RemovingStyleEventArgs e) - { - RemovingStyle?.Invoke(this, e); - } + /// + /// Gets or sets the default object used for generating output. Default is . + /// + public static IMarkupFormatter DefaultOutputFormatter { get; set; } = HtmlFormatter.Instance; - /// - /// Raises the event. - /// - /// The instance containing the event data. - protected virtual void OnRemovingAtRule(RemovingAtRuleEventArgs e) - { - RemovingAtRule?.Invoke(this, e); - } + /// + /// Gets or sets the object used for generating output. Default is . + /// + public IMarkupFormatter OutputFormatter { get; set; } = DefaultOutputFormatter; - /// - /// Raises the event. - /// - /// The instance containing the event data. - protected virtual void OnRemovingComment(RemovingCommentEventArgs e) - { - RemovingComment?.Invoke(this, e); - } + /// + /// Gets or sets the default object used for generating CSS output. Default is . + /// + public static IStyleFormatter DefaultStyleFormatter { get; set; } = CssStyleFormatter.Instance; + + /// + /// Gets or sets the object used for generating CSS output. Default is . + /// + public IStyleFormatter StyleFormatter { get; set; } = DefaultStyleFormatter; - /// - /// The default regex for disallowed CSS property values. - /// - public static readonly Regex DefaultDisallowedCssPropertyValue = new(@"[<>]", RegexOptions.Compiled); + /// + /// Gets or sets the allowed CSS at-rules such as "@media" and "@font-face". + /// + /// + /// The allowed CSS at-rules. + /// + public ISet AllowedAtRules { get; private set; } - /// - /// Raises the event. - /// - /// The instance containing the event data. - protected virtual void OnRemovingCssClass(RemovingCssClassEventArgs e) - { - RemovingCssClass?.Invoke(this, e); - } + /// + /// Gets or sets the allowed URI schemes such as "http" and "https". + /// + /// + /// The allowed URI schemes. + /// + public ISet AllowedSchemes { get; private set; } + + /// + /// Gets or sets the allowed HTML tag names such as "a" and "div". + /// + /// + /// The allowed tag names. + /// + public ISet AllowedTags { get; private set; } + + /// + /// Gets or sets the allowed HTML attributes such as "href" and "alt". + /// + /// + /// The allowed HTML attributes. + /// + public ISet AllowedAttributes { get; private set; } + + /// + /// Allow all HTML5 data attributes; the attributes prefixed with data-. + /// + public bool AllowDataAttributes { get; set; } + + /// + /// Gets or sets the HTML attributes that can contain a URI such as "href". + /// + /// + /// The URI attributes. + /// + public ISet UriAttributes { get; private set; } + + /// + /// Gets or sets the allowed CSS properties such as "font" and "margin". + /// + /// + /// The allowed CSS properties. + /// + public ISet AllowedCssProperties { get; private set; } + + /// + /// Allow all custom CSS properties (variables) prefixed with --. + /// + public bool AllowCssCustomProperties { get; set; } + + /// + /// Gets or sets a regex that must not match for legal CSS property values. + /// + /// + /// The regex. + /// + public Regex DisallowCssPropertyValue { get; set; } = DefaultDisallowedCssPropertyValue; + + /// + /// Gets or sets the allowed CSS classes. If the set is empty, all classes will be allowed. + /// + /// + /// The allowed CSS classes. An empty set means all classes are allowed. + /// + public ISet AllowedClasses { get; private set; } + + /// + /// Occurs after sanitizing the document and post processing nodes. + /// + public event EventHandler? PostProcessDom; + /// + /// Occurs for every node after sanitizing. + /// + public event EventHandler? PostProcessNode; + /// + /// Occurs before a tag is removed. + /// + public event EventHandler? RemovingTag; + /// + /// Occurs before an attribute is removed. + /// + public event EventHandler? RemovingAttribute; + /// + /// Occurs before a style is removed. + /// + public event EventHandler? RemovingStyle; + /// + /// Occurs before an at-rule is removed. + /// + public event EventHandler? RemovingAtRule; + /// + /// Occurs before a comment is removed. + /// + public event EventHandler? RemovingComment; + /// + /// Occurs before a CSS class is removed. + /// + public event EventHandler? RemovingCssClass; + /// + /// Occurs when a URL is being sanitized. + /// + public event EventHandler? FilterUrl; + + /// + /// Raises the event. + /// + /// The instance containing the event data. + protected virtual void OnPostProcessDom(PostProcessDomEventArgs e) + { + PostProcessDom?.Invoke(this, e); + } + + /// + /// Raises the event. + /// + /// The instance containing the event data. + protected virtual void OnPostProcessNode(PostProcessNodeEventArgs e) + { + PostProcessNode?.Invoke(this, e); + } + + /// + /// Raises the event. + /// + /// The instance containing the event data. + protected virtual void OnRemovingTag(RemovingTagEventArgs e) + { + RemovingTag?.Invoke(this, e); + } + + /// + /// Raises the event. + /// + /// The instance containing the event data. + protected virtual void OnRemovingAttribute(RemovingAttributeEventArgs e) + { + RemovingAttribute?.Invoke(this, e); + } + + /// + /// Raises the event. + /// + /// The instance containing the event data. + protected virtual void OnRemovingStyle(RemovingStyleEventArgs e) + { + RemovingStyle?.Invoke(this, e); + } + + /// + /// Raises the event. + /// + /// The instance containing the event data. + protected virtual void OnRemovingAtRule(RemovingAtRuleEventArgs e) + { + RemovingAtRule?.Invoke(this, e); + } + + /// + /// Raises the event. + /// + /// The instance containing the event data. + protected virtual void OnRemovingComment(RemovingCommentEventArgs e) + { + RemovingComment?.Invoke(this, e); + } + + /// + /// The default regex for disallowed CSS property values. + /// + public static readonly Regex DefaultDisallowedCssPropertyValue = new(@"[<>]", RegexOptions.Compiled); + + /// + /// Raises the event. + /// + /// The instance containing the event data. + protected virtual void OnRemovingCssClass(RemovingCssClassEventArgs e) + { + RemovingCssClass?.Invoke(this, e); + } + + /// + /// Raises the event. + /// + /// The instance containing the event data. + protected virtual void OnFilteringUrl(FilterUrlEventArgs e) + { + FilterUrl?.Invoke(this, e); + } - /// - /// Raises the event. - /// - /// The instance containing the event data. - protected virtual void OnFilteringUrl(FilterUrlEventArgs e) + /// + /// Return all nested subnodes of a node. The nodes are returned in DOM order. + /// + /// The root node. + /// All nested subnodes. + private static IEnumerable GetAllNodes(INode dom) + { + if (dom.ChildNodes.Length == 0) yield break; + + var s = new Stack(); + for (var i = dom.ChildNodes.Length - 1; i >= 0; i--) { - FilterUrl?.Invoke(this, e); + s.Push(dom.ChildNodes[i]); } - /// - /// Return all nested subnodes of a node. The nodes are returned in DOM order. - /// - /// The root node. - /// All nested subnodes. - private static IEnumerable GetAllNodes(INode dom) + while (s.Count > 0) { - if (dom.ChildNodes.Length == 0) yield break; + var n = s.Pop(); + yield return n; - var s = new Stack(); - for (var i = dom.ChildNodes.Length - 1; i >= 0; i--) + for (var i = n.ChildNodes.Length - 1; i >= 0; i--) { - s.Push(dom.ChildNodes[i]); - } - - while (s.Count > 0) - { - var n = s.Pop(); - yield return n; - - for (var i = n.ChildNodes.Length - 1; i >= 0; i--) - { - s.Push(n.ChildNodes[i]); - } + s.Push(n.ChildNodes[i]); } } + } - /// - /// Sanitizes the specified HTML body fragment. If a document is given, only the body part will be returned. - /// - /// The HTML body fragment to sanitize. - /// The base URL relative URLs are resolved against. No resolution if empty. - /// The formatter used to render the DOM. Using the if null. - /// The sanitized HTML body fragment. - public string Sanitize(string html, string baseUrl = "", IMarkupFormatter? outputFormatter = null) - { - using var dom = SanitizeDom(html, baseUrl); - if (dom.Body == null) return string.Empty; - var output = dom.Body.ChildNodes.ToHtml(outputFormatter ?? OutputFormatter); + /// + /// Sanitizes the specified HTML body fragment. If a document is given, only the body part will be returned. + /// + /// The HTML body fragment to sanitize. + /// The base URL relative URLs are resolved against. No resolution if empty. + /// The formatter used to render the DOM. Using the if null. + /// The sanitized HTML body fragment. + public string Sanitize(string html, string baseUrl = "", IMarkupFormatter? outputFormatter = null) + { + using var dom = SanitizeDom(html, baseUrl); + if (dom.Body == null) return string.Empty; + var output = dom.Body.ChildNodes.ToHtml(outputFormatter ?? OutputFormatter); - return output; - } + return output; + } - /// - /// Sanitizes the specified HTML body fragment. If a document is given, only the body part will be returned. - /// - /// The HTML body fragment to sanitize. - /// The base URL relative URLs are resolved against. No resolution if empty. - /// The sanitized HTML document. - public IHtmlDocument SanitizeDom(string html, string baseUrl = "") - { - var parser = HtmlParserFactory(); - var dom = parser.ParseDocument("" + html); + /// + /// Sanitizes the specified HTML body fragment. If a document is given, only the body part will be returned. + /// + /// The HTML body fragment to sanitize. + /// The base URL relative URLs are resolved against. No resolution if empty. + /// The sanitized HTML document. + public IHtmlDocument SanitizeDom(string html, string baseUrl = "") + { + var parser = HtmlParserFactory(); + var dom = parser.ParseDocument("" + html); - if (dom.Body != null) - DoSanitize(dom, dom.Body, baseUrl); + if (dom.Body != null) + DoSanitize(dom, dom.Body, baseUrl); - return dom; - } + return dom; + } - /// - /// Sanitizes the specified parsed HTML body fragment. - /// If the document has not been parsed with CSS support then all styles will be removed. - /// - /// The parsed HTML document. - /// The node within which to sanitize. - /// The base URL relative URLs are resolved against. No resolution if empty. - /// The sanitized HTML document. - public IHtmlDocument SanitizeDom(IHtmlDocument document, IHtmlElement? context = null, string baseUrl = "") - { - DoSanitize(document, context ?? (IParentNode)document, baseUrl); + /// + /// Sanitizes the specified parsed HTML body fragment. + /// If the document has not been parsed with CSS support then all styles will be removed. + /// + /// The parsed HTML document. + /// The node within which to sanitize. + /// The base URL relative URLs are resolved against. No resolution if empty. + /// The sanitized HTML document. + public IHtmlDocument SanitizeDom(IHtmlDocument document, IHtmlElement? context = null, string baseUrl = "") + { + DoSanitize(document, context ?? (IParentNode)document, baseUrl); - return document; - } + return document; + } - /// - /// Sanitizes the specified HTML document. Even if only a fragment is given, a whole document will be returned. - /// - /// The HTML document to sanitize. - /// The base URL relative URLs are resolved against. No resolution if empty. - /// The formatter used to render the DOM. Using the if null. - /// The sanitized HTML document. - public string SanitizeDocument(string html, string baseUrl = "", IMarkupFormatter? outputFormatter = null) - { - var parser = HtmlParserFactory(); - using var dom = parser.ParseDocument(html); + /// + /// Sanitizes the specified HTML document. Even if only a fragment is given, a whole document will be returned. + /// + /// The HTML document to sanitize. + /// The base URL relative URLs are resolved against. No resolution if empty. + /// The formatter used to render the DOM. Using the if null. + /// The sanitized HTML document. + public string SanitizeDocument(string html, string baseUrl = "", IMarkupFormatter? outputFormatter = null) + { + var parser = HtmlParserFactory(); + using var dom = parser.ParseDocument(html); - DoSanitize(dom, dom, baseUrl); + DoSanitize(dom, dom, baseUrl); - var output = dom.ToHtml(outputFormatter ?? OutputFormatter); + var output = dom.ToHtml(outputFormatter ?? OutputFormatter); - return output; - } + return output; + } - /// - /// Sanitizes the specified HTML document. Even if only a fragment is given, a whole document will be returned. - /// - /// The HTML document to sanitize. - /// The base URL relative URLs are resolved against. No resolution if empty. - /// The formatter used to render the DOM. Using the if null. - /// The sanitized HTML document. - public string SanitizeDocument(Stream html, string baseUrl = "", IMarkupFormatter? outputFormatter = null) - { - var parser = HtmlParserFactory(); - using var dom = parser.ParseDocument(html); + /// + /// Sanitizes the specified HTML document. Even if only a fragment is given, a whole document will be returned. + /// + /// The HTML document to sanitize. + /// The base URL relative URLs are resolved against. No resolution if empty. + /// The formatter used to render the DOM. Using the if null. + /// The sanitized HTML document. + public string SanitizeDocument(Stream html, string baseUrl = "", IMarkupFormatter? outputFormatter = null) + { + var parser = HtmlParserFactory(); + using var dom = parser.ParseDocument(html); - DoSanitize(dom, dom, baseUrl); + DoSanitize(dom, dom, baseUrl); - var output = dom.ToHtml(outputFormatter ?? OutputFormatter); + var output = dom.ToHtml(outputFormatter ?? OutputFormatter); - return output; - } + return output; + } - /// - /// Removes all comment nodes from a list of nodes. - /// - /// The node within which to remove comments. - /// true if any comments were removed; otherwise, false. - private void RemoveComments(INode context) + /// + /// Removes all comment nodes from a list of nodes. + /// + /// The node within which to remove comments. + /// true if any comments were removed; otherwise, false. + private void RemoveComments(INode context) + { + foreach (var comment in GetAllNodes(context).OfType().ToList()) { - foreach (var comment in GetAllNodes(context).OfType().ToList()) - { - EncodeComment(comment); + EncodeComment(comment); - var e = new RemovingCommentEventArgs(comment); - OnRemovingComment(e); + var e = new RemovingCommentEventArgs(comment); + OnRemovingComment(e); - if (!e.Cancel) - comment.Remove(); - } + if (!e.Cancel) + comment.Remove(); } + } - private static void DefaultEncodeComment(IComment comment) + private static void DefaultEncodeComment(IComment comment) + { + var escapedText = comment.TextContent.Replace("<", "<").Replace(">", ">"); + if (escapedText != comment.TextContent) + comment.TextContent = escapedText; + } + + private static void DefaultEncodeLiteralTextElementContent(IElement tag) + { + var escapedHtml = tag.InnerHtml.Replace("<", "<").Replace(">", ">"); + if (escapedHtml != tag.InnerHtml) + tag.InnerHtml = escapedHtml; + if (tag.InnerHtml != escapedHtml) // setting InnerHtml does not work for noscript + tag.SetInnerText(escapedHtml); + } + + private void DoSanitize(IHtmlDocument dom, IParentNode context, string baseUrl = "") + { + // remove disallowed tags + foreach (var tag in context.QuerySelectorAll("*").Where(t => !IsAllowedTag(t)).ToList()) { - var escapedText = comment.TextContent.Replace("<", "<").Replace(">", ">"); - if (escapedText != comment.TextContent) - comment.TextContent = escapedText; + RemoveTag(tag, RemoveReason.NotAllowedTag); } - private static void DefaultEncodeLiteralTextElementContent(IElement tag) + // always encode text in raw data content + foreach (var tag in context.QuerySelectorAll("*") + .Where(t => t is not IHtmlStyleElement + && t.Flags.HasFlag(NodeFlags.LiteralText) + && !string.IsNullOrWhiteSpace(t.InnerHtml))) { - var escapedHtml = tag.InnerHtml.Replace("<", "<").Replace(">", ">"); - if (escapedHtml != tag.InnerHtml) - tag.InnerHtml = escapedHtml; - if (tag.InnerHtml != escapedHtml) // setting InnerHtml does not work for noscript - tag.SetInnerText(escapedHtml); + EncodeLiteralTextElementContent(tag); } - private void DoSanitize(IHtmlDocument dom, IParentNode context, string baseUrl = "") + SanitizeStyleSheets(dom, baseUrl); + + // cleanup attributes + foreach (var tag in context.QuerySelectorAll("*").ToList()) { - // remove disallowed tags - foreach (var tag in context.QuerySelectorAll("*").Where(t => !IsAllowedTag(t)).ToList()) + // remove disallowed attributes + foreach (var attribute in tag.Attributes.Where(a => !IsAllowedAttribute(a)).ToList()) { - RemoveTag(tag, RemoveReason.NotAllowedTag); + RemoveAttribute(tag, attribute, RemoveReason.NotAllowedAttribute); } - // always encode text in raw data content - foreach (var tag in context.QuerySelectorAll("*") - .Where(t => t is not IHtmlStyleElement - && t.Flags.HasFlag(NodeFlags.LiteralText) - && !string.IsNullOrWhiteSpace(t.InnerHtml))) + // sanitize URLs in URL-marked attributes + foreach (var attribute in tag.Attributes.Where(IsUriAttribute).ToList()) { - EncodeLiteralTextElementContent(tag); + var url = SanitizeUrl(tag, attribute.Value, baseUrl); + + if (url == null) + RemoveAttribute(tag, attribute, RemoveReason.NotAllowedUrlValue); + else + tag.SetAttribute(attribute.Name, url); } - SanitizeStyleSheets(dom, baseUrl); + // sanitize the style attribute + var oldStyleEmpty = string.IsNullOrEmpty(tag.GetAttribute(StyleAttributeName)); + SanitizeStyle(tag, baseUrl); - // cleanup attributes - foreach (var tag in context.QuerySelectorAll("*").ToList()) + // sanitize the value of the attributes + foreach (var attribute in tag.Attributes.ToList()) { - // remove disallowed attributes - foreach (var attribute in tag.Attributes.Where(a => !IsAllowedAttribute(a)).ToList()) + // The '& Javascript include' is a possible method to execute Javascript and can lead to XSS. + // (see https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet#.26_JavaScript_includes) + if (attribute.Value.Contains("&{")) { - RemoveAttribute(tag, attribute, RemoveReason.NotAllowedAttribute); + RemoveAttribute(tag, attribute, RemoveReason.NotAllowedValue); } - - // sanitize URLs in URL-marked attributes - foreach (var attribute in tag.Attributes.Where(IsUriAttribute).ToList()) + else { - var url = SanitizeUrl(tag, attribute.Value, baseUrl); - - if (url == null) - RemoveAttribute(tag, attribute, RemoveReason.NotAllowedUrlValue); - else - tag.SetAttribute(attribute.Name, url); - } + if (AllowedClasses.Any() && attribute.Name == "class") + { + var removedClasses = tag.ClassList.Except(AllowedClasses).ToArray(); - // sanitize the style attribute - var oldStyleEmpty = string.IsNullOrEmpty(tag.GetAttribute(StyleAttributeName)); - SanitizeStyle(tag, baseUrl); + foreach (var removedClass in removedClasses) + RemoveCssClass(tag, removedClass, RemoveReason.NotAllowedCssClass); - // sanitize the value of the attributes - foreach (var attribute in tag.Attributes.ToList()) - { - // The '& Javascript include' is a possible method to execute Javascript and can lead to XSS. - // (see https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet#.26_JavaScript_includes) - if (attribute.Value.Contains("&{")) - { - RemoveAttribute(tag, attribute, RemoveReason.NotAllowedValue); + if (!tag.ClassList.Any()) + RemoveAttribute(tag, attribute, RemoveReason.ClassAttributeEmpty); } - else + else if (!oldStyleEmpty && attribute.Name == StyleAttributeName && string.IsNullOrEmpty(attribute.Value)) { - if (AllowedClasses.Any() && attribute.Name == "class") - { - var removedClasses = tag.ClassList.Except(AllowedClasses).ToArray(); - - foreach (var removedClass in removedClasses) - RemoveCssClass(tag, removedClass, RemoveReason.NotAllowedCssClass); - - if (!tag.ClassList.Any()) - RemoveAttribute(tag, attribute, RemoveReason.ClassAttributeEmpty); - } - else if (!oldStyleEmpty && attribute.Name == StyleAttributeName && string.IsNullOrEmpty(attribute.Value)) - { - RemoveAttribute(tag, attribute, RemoveReason.StyleAttributeEmpty); - } + RemoveAttribute(tag, attribute, RemoveReason.StyleAttributeEmpty); } } } + } - if (context is INode node) + if (context is INode node) + { + RemoveComments(node); + } + + DoPostProcess(dom, context as INode); + } + + private void SanitizeStyleSheets(IHtmlDocument dom, string baseUrl) + { + foreach (var styleSheet in dom.StyleSheets.OfType()) + { + var styleTag = styleSheet.OwnerNode; + var i = 0; + + while (i < styleSheet.Rules.Length) { - RemoveComments(node); + var rule = styleSheet.Rules[i]; + if (!SanitizeStyleRule(rule, styleTag, baseUrl) && RemoveAtRule(styleTag, rule)) + styleSheet.RemoveAt(i); + else i++; } - DoPostProcess(dom, context as INode); + styleTag.InnerHtml = styleSheet.ToCss(StyleFormatter).Replace("<", "\\3c "); } + } + + private bool SanitizeStyleRule(ICssRule rule, IElement styleTag, string baseUrl) + { + if (!AllowedAtRules.Contains(rule.Type)) return false; - private void SanitizeStyleSheets(IHtmlDocument dom, string baseUrl) + if (rule is ICssStyleRule styleRule) + { + SanitizeStyleDeclaration(styleTag, styleRule.Style, baseUrl); + } + else { - foreach (var styleSheet in dom.StyleSheets.OfType()) + if (rule is ICssGroupingRule groupingRule) { - var styleTag = styleSheet.OwnerNode; var i = 0; - while (i < styleSheet.Rules.Length) + while (i < groupingRule.Rules.Length) { - var rule = styleSheet.Rules[i]; - if (!SanitizeStyleRule(rule, styleTag, baseUrl) && RemoveAtRule(styleTag, rule)) - styleSheet.RemoveAt(i); + var childRule = groupingRule.Rules[i]; + if (!SanitizeStyleRule(childRule, styleTag, baseUrl) && RemoveAtRule(styleTag, childRule)) + groupingRule.RemoveAt(i); else i++; } - - styleTag.InnerHtml = styleSheet.ToCss(StyleFormatter).Replace("<", "\\3c "); } - } - - private bool SanitizeStyleRule(ICssRule rule, IElement styleTag, string baseUrl) - { - if (!AllowedAtRules.Contains(rule.Type)) return false; - - if (rule is ICssStyleRule styleRule) + else if (rule is ICssPageRule pageRule) { - SanitizeStyleDeclaration(styleTag, styleRule.Style, baseUrl); + SanitizeStyleDeclaration(styleTag, pageRule.Style, baseUrl); } - else + else if (rule is ICssKeyframesRule keyFramesRule) { - if (rule is ICssGroupingRule groupingRule) - { - var i = 0; - - while (i < groupingRule.Rules.Length) - { - var childRule = groupingRule.Rules[i]; - if (!SanitizeStyleRule(childRule, styleTag, baseUrl) && RemoveAtRule(styleTag, childRule)) - groupingRule.RemoveAt(i); - else i++; - } - } - else if (rule is ICssPageRule pageRule) - { - SanitizeStyleDeclaration(styleTag, pageRule.Style, baseUrl); - } - else if (rule is ICssKeyframesRule keyFramesRule) + foreach (var childRule in keyFramesRule.Rules.OfType().ToList()) { - foreach (var childRule in keyFramesRule.Rules.OfType().ToList()) - { - if (!SanitizeStyleRule(childRule, styleTag, baseUrl) && RemoveAtRule(styleTag, childRule)) - keyFramesRule.Remove(childRule.KeyText); - } - } - else if (rule is ICssKeyframeRule keyFrameRule) - { - SanitizeStyleDeclaration(styleTag, keyFrameRule.Style, baseUrl); + if (!SanitizeStyleRule(childRule, styleTag, baseUrl) && RemoveAtRule(styleTag, childRule)) + keyFramesRule.Remove(childRule.KeyText); } } - - return true; + else if (rule is ICssKeyframeRule keyFrameRule) + { + SanitizeStyleDeclaration(styleTag, keyFrameRule.Style, baseUrl); + } } - /// - /// Performs post processing on all nodes in the document. - /// - /// The HTML document. - /// The node within which to post process all nodes. - private void DoPostProcess(IHtmlDocument dom, INode? context) + return true; + } + + /// + /// Performs post processing on all nodes in the document. + /// + /// The HTML document. + /// The node within which to post process all nodes. + private void DoPostProcess(IHtmlDocument dom, INode? context) + { + if (PostProcessNode != null) { - if (PostProcessNode != null) - { - dom.Normalize(); + dom.Normalize(); - if (context != null) + if (context != null) + { + var nodes = GetAllNodes(context).ToList(); + foreach (var node in nodes) { - var nodes = GetAllNodes(context).ToList(); - foreach (var node in nodes) + var e = new PostProcessNodeEventArgs(dom, node); + OnPostProcessNode(e); + if (e.ReplacementNodes.Any()) { - var e = new PostProcessNodeEventArgs(dom, node); - OnPostProcessNode(e); - if (e.ReplacementNodes.Any()) - { - ((IChildNode)node).Replace([.. e.ReplacementNodes]); - } + ((IChildNode)node).Replace([.. e.ReplacementNodes]); } } } - - if (PostProcessDom != null) - { - var e = new PostProcessDomEventArgs(dom); - OnPostProcessDom(e); - } } - /// - /// Determines whether the specified attribute can contain a URI. - /// - /// The attribute. - /// true if the attribute can contain a URI; otherwise, false. - private bool IsUriAttribute(IAttr attribute) + if (PostProcessDom != null) { - return UriAttributes.Contains(attribute.Name); + var e = new PostProcessDomEventArgs(dom); + OnPostProcessDom(e); } + } - /// - /// Determines whether the specified tag is allowed. - /// - /// The tag. - /// true if the tag is allowed; otherwise, false. - private bool IsAllowedTag(IElement tag) - { - return AllowedTags.Contains(tag.NodeName); - } + /// + /// Determines whether the specified attribute can contain a URI. + /// + /// The attribute. + /// true if the attribute can contain a URI; otherwise, false. + private bool IsUriAttribute(IAttr attribute) + { + return UriAttributes.Contains(attribute.Name); + } - /// - /// Determines whether the specified attribute is allowed. - /// - /// The attribute. - /// true if the attribute is allowed; otherwise, false. - private bool IsAllowedAttribute(IAttr attribute) - { - return AllowedAttributes.Contains(attribute.Name) - // test html5 data- attributes - || (AllowDataAttributes && attribute.Name != null && attribute.Name.StartsWith("data-", StringComparison.OrdinalIgnoreCase)); - } + /// + /// Determines whether the specified tag is allowed. + /// + /// The tag. + /// true if the tag is allowed; otherwise, false. + private bool IsAllowedTag(IElement tag) + { + return AllowedTags.Contains(tag.NodeName); + } + + /// + /// Determines whether the specified attribute is allowed. + /// + /// The attribute. + /// true if the attribute is allowed; otherwise, false. + private bool IsAllowedAttribute(IAttr attribute) + { + return AllowedAttributes.Contains(attribute.Name) + // test html5 data- attributes + || (AllowDataAttributes && attribute.Name != null && attribute.Name.StartsWith("data-", StringComparison.OrdinalIgnoreCase)); + } + + /// + /// Sanitizes the style. + /// + /// The element. + /// The base URL. + protected void SanitizeStyle(IElement element, string baseUrl) + { + // filter out invalid CSS declarations + // see https://github.com/AngleSharp/AngleSharp/issues/101 + var attribute = element.GetAttribute(StyleAttributeName); + if (attribute == null) + return; - /// - /// Sanitizes the style. - /// - /// The element. - /// The base URL. - protected void SanitizeStyle(IElement element, string baseUrl) + if (element.GetStyle() == null) { - // filter out invalid CSS declarations - // see https://github.com/AngleSharp/AngleSharp/issues/101 - var attribute = element.GetAttribute(StyleAttributeName); - if (attribute == null) - return; + element.RemoveAttribute(StyleAttributeName); + return; + } - if (element.GetStyle() == null) - { - element.RemoveAttribute(StyleAttributeName); - return; - } + element.SetAttribute(StyleAttributeName, element.GetStyle().ToCss(StyleFormatter)); - element.SetAttribute(StyleAttributeName, element.GetStyle().ToCss(StyleFormatter)); + var styles = element.GetStyle(); + if (styles == null || styles.Length == 0) + return; - var styles = element.GetStyle(); - if (styles == null || styles.Length == 0) - return; + SanitizeStyleDeclaration(element, styles, baseUrl); + } - SanitizeStyleDeclaration(element, styles, baseUrl); - } + /// + /// Verify if the given CSS property name is allowed. By default this will + /// check if the property is in the set, + /// or if the property is a custom property and is true. + /// + /// The name of the CSS property. + /// True if the property is allowed or not. + protected virtual bool IsAllowedCssProperty(string propertyName) + { + return AllowedCssProperties.Contains(propertyName) + || AllowCssCustomProperties && propertyName != null && propertyName.StartsWith("--"); + } - /// - /// Verify if the given CSS property name is allowed. By default this will - /// check if the property is in the set, - /// or if the property is a custom property and is true. - /// - /// The name of the CSS property. - /// True if the property is allowed or not. - protected virtual bool IsAllowedCssProperty(string propertyName) - { - return AllowedCssProperties.Contains(propertyName) - || AllowCssCustomProperties && propertyName != null && propertyName.StartsWith("--"); - } + private void SanitizeStyleDeclaration(IElement element, ICssStyleDeclaration styles, string baseUrl) + { + var removeStyles = new List>(); + var setStyles = new Dictionary(); - private void SanitizeStyleDeclaration(IElement element, ICssStyleDeclaration styles, string baseUrl) + foreach (var style in styles) { - var removeStyles = new List>(); - var setStyles = new Dictionary(); + var key = DecodeCss(style.Name); + var val = DecodeCss(style.Value); - foreach (var style in styles) + if (!IsAllowedCssProperty(key)) { - var key = DecodeCss(style.Name); - var val = DecodeCss(style.Value); - - if (!IsAllowedCssProperty(key)) - { - removeStyles.Add(new Tuple(style, RemoveReason.NotAllowedStyle)); - continue; - } + removeStyles.Add(new Tuple(style, RemoveReason.NotAllowedStyle)); + continue; + } - if (CssExpression.IsMatch(val) || DisallowCssPropertyValue.IsMatch(val)) - { - removeStyles.Add(new Tuple(style, RemoveReason.NotAllowedValue)); - continue; - } + if (CssExpression.IsMatch(val) || DisallowCssPropertyValue.IsMatch(val)) + { + removeStyles.Add(new Tuple(style, RemoveReason.NotAllowedValue)); + continue; + } - val = WhitespaceRegex.Replace(val, string.Empty); + val = WhitespaceRegex.Replace(val, string.Empty); - var urls = CssUrl.Matches(val).Cast().Select(m => (Match: m, Url: SanitizeUrl(element, m.Groups[2].Value, baseUrl))); + var urls = CssUrl.Matches(val).Cast().Select(m => (Match: m, Url: SanitizeUrl(element, m.Groups[2].Value, baseUrl))); - if (urls.Any()) + if (urls.Any()) + { + if (urls.Any(u => u.Url == null)) + removeStyles.Add(new Tuple(style, RemoveReason.NotAllowedUrlValue)); + else { - if (urls.Any(u => u.Url == null)) - removeStyles.Add(new Tuple(style, RemoveReason.NotAllowedUrlValue)); - else - { - var sb = new StringBuilder(); - var ix = 0; + var sb = new StringBuilder(); + var ix = 0; - foreach (var url in urls) - { - sb.Append(val, ix, url.Match.Index - ix); - sb.Append("url("); - sb.Append(url.Match.Groups[1].Value); - sb.Append(url.Url); - sb.Append(url.Match.Groups[3].Value); - ix = url.Match.Index + url.Match.Length; - } + foreach (var url in urls) + { + sb.Append(val, ix, url.Match.Index - ix); + sb.Append("url("); + sb.Append(url.Match.Groups[1].Value); + sb.Append(url.Url); + sb.Append(url.Match.Groups[3].Value); + ix = url.Match.Index + url.Match.Length; + } - sb.Append(val, ix, val.Length - ix); + sb.Append(val, ix, val.Length - ix); - var s = sb.ToString(); + var s = sb.ToString(); - if (s != val) + if (s != val) + { + if (key != style.Name) { - if (key != style.Name) - { - removeStyles.Add(new Tuple(style, RemoveReason.NotAllowedUrlValue)); - } - setStyles[key] = s; + removeStyles.Add(new Tuple(style, RemoveReason.NotAllowedUrlValue)); } + setStyles[key] = s; } } } + } - foreach (var style in setStyles) - { - styles.SetProperty(style.Key, style.Value); - } - - foreach (var style in removeStyles) - { - RemoveStyle(element, styles, style.Item1, style.Item2); - } + foreach (var style in setStyles) + { + styles.SetProperty(style.Key, style.Value); } - /// - /// Decodes CSS Unicode escapes and removes comments. - /// - /// The CSS string. - /// The decoded CSS string. - protected static string DecodeCss(string css) + foreach (var style in removeStyles) { - var r = CssUnicodeEscapes.Replace(css, m => - { - if (m.Groups[1].Success) - return ((char)int.Parse(m.Groups[1].Value, NumberStyles.HexNumber)).ToString(); - var t = m.Groups[2].Value; - return t == "\\" ? @"\\" : t; - }); + RemoveStyle(element, styles, style.Item1, style.Item2); + } + } - r = CssComments.Replace(r, m => ""); + /// + /// Decodes CSS Unicode escapes and removes comments. + /// + /// The CSS string. + /// The decoded CSS string. + protected static string DecodeCss(string css) + { + var r = CssUnicodeEscapes.Replace(css, m => + { + if (m.Groups[1].Success) + return ((char)int.Parse(m.Groups[1].Value, NumberStyles.HexNumber)).ToString(); + var t = m.Groups[2].Value; + return t == "\\" ? @"\\" : t; + }); - return r; - } + r = CssComments.Replace(r, m => ""); - private static readonly Regex SchemeRegex = new(@"^([^\/#]*?)(?:\:|�*58|�*3a)", RegexOptions.Compiled | RegexOptions.IgnoreCase); + return r; + } - /// - /// Tries to create a safe object from a string. - /// - /// The URL. - /// The object or null if no safe can be created. - protected Iri? GetSafeIri(string url) - { - url = url.TrimStart(); + private static readonly Regex SchemeRegex = new(@"^([^\/#]*?)(?:\:|�*58|�*3a)", RegexOptions.Compiled | RegexOptions.IgnoreCase); - var schemeMatch = SchemeRegex.Match(url); + /// + /// Tries to create a safe object from a string. + /// + /// The URL. + /// The object or null if no safe can be created. + protected Iri? GetSafeIri(string url) + { + url = url.TrimStart(); - if (schemeMatch.Success) - { - var scheme = schemeMatch.Groups[1].Value; - return AllowedSchemes.Contains(scheme, StringComparer.OrdinalIgnoreCase) ? new Iri(url, scheme) : null; - } + var schemeMatch = SchemeRegex.Match(url); - return new Iri(url); + if (schemeMatch.Success) + { + var scheme = schemeMatch.Groups[1].Value; + return AllowedSchemes.Contains(scheme, StringComparer.OrdinalIgnoreCase) ? new Iri(url, scheme) : null; } - /// - /// Sanitizes a URL. - /// - /// The tag containing the URL being sanitized. - /// The URL. - /// The base URL relative URLs are resolved against (empty or null for no resolution). - /// The sanitized URL or null if no safe URL can be created. - protected virtual string? SanitizeUrl(IElement element, string url, string baseUrl) - { - var iri = GetSafeIri(url); + return new Iri(url); + } + + /// + /// Sanitizes a URL. + /// + /// The tag containing the URL being sanitized. + /// The URL. + /// The base URL relative URLs are resolved against (empty or null for no resolution). + /// The sanitized URL or null if no safe URL can be created. + protected virtual string? SanitizeUrl(IElement element, string url, string baseUrl) + { + var iri = GetSafeIri(url); - if (iri != null && !iri.IsAbsolute && !string.IsNullOrEmpty(baseUrl)) + if (iri != null && !iri.IsAbsolute && !string.IsNullOrEmpty(baseUrl)) + { + // resolve relative URI + if (Uri.TryCreate(baseUrl, UriKind.Absolute, out Uri baseUri)) { - // resolve relative URI - if (Uri.TryCreate(baseUrl, UriKind.Absolute, out Uri baseUri)) + try { - try - { - var sanitizedUrl = new Uri(baseUri, iri.Value).AbsoluteUri; - var ev = new FilterUrlEventArgs(element, url, sanitizedUrl); + var sanitizedUrl = new Uri(baseUri, iri.Value).AbsoluteUri; + var ev = new FilterUrlEventArgs(element, url, sanitizedUrl); - OnFilteringUrl(ev); + OnFilteringUrl(ev); - return ev.SanitizedUrl; - } - catch (UriFormatException) - { - iri = null; - } + return ev.SanitizedUrl; + } + catch (UriFormatException) + { + iri = null; } - else iri = null; } + else iri = null; + } - var e = new FilterUrlEventArgs(element, url, iri?.Value); - OnFilteringUrl(e); + var e = new FilterUrlEventArgs(element, url, iri?.Value); + OnFilteringUrl(e); - return e.SanitizedUrl; - } + return e.SanitizedUrl; + } - /// - /// Removes a tag from the document. - /// - /// Tag to be removed. - /// Reason for removal. - private void RemoveTag(IElement tag, RemoveReason reason) - { - var e = new RemovingTagEventArgs(tag, reason); - OnRemovingTag(e); + /// + /// Removes a tag from the document. + /// + /// Tag to be removed. + /// Reason for removal. + private void RemoveTag(IElement tag, RemoveReason reason) + { + var e = new RemovingTagEventArgs(tag, reason); + OnRemovingTag(e); - if (!e.Cancel) - { - if (KeepChildNodes && tag.HasChildNodes) - tag.Replace([.. tag.ChildNodes]); - else - tag.Remove(); - } + if (!e.Cancel) + { + if (KeepChildNodes && tag.HasChildNodes) + tag.Replace([.. tag.ChildNodes]); + else + tag.Remove(); } + } - /// - /// Removes an attribute from the document. - /// - /// Tag the attribute belongs to. - /// Attribute to be removed. - /// Reason for removal. - private void RemoveAttribute(IElement tag, IAttr attribute, RemoveReason reason) - { - var e = new RemovingAttributeEventArgs(tag, attribute, reason); - OnRemovingAttribute(e); + /// + /// Removes an attribute from the document. + /// + /// Tag the attribute belongs to. + /// Attribute to be removed. + /// Reason for removal. + private void RemoveAttribute(IElement tag, IAttr attribute, RemoveReason reason) + { + var e = new RemovingAttributeEventArgs(tag, attribute, reason); + OnRemovingAttribute(e); - if (!e.Cancel) - tag.RemoveAttribute(attribute.Name); - } + if (!e.Cancel) + tag.RemoveAttribute(attribute.Name); + } - /// - /// Removes a style from the document. - /// - /// Tag the style belongs to. - /// Style rule that contains the style to be removed. - /// Style to be removed. - /// Reason for removal. - private void RemoveStyle(IElement tag, ICssStyleDeclaration styles, ICssProperty style, RemoveReason reason) - { - var e = new RemovingStyleEventArgs(tag, style, reason); - OnRemovingStyle(e); + /// + /// Removes a style from the document. + /// + /// Tag the style belongs to. + /// Style rule that contains the style to be removed. + /// Style to be removed. + /// Reason for removal. + private void RemoveStyle(IElement tag, ICssStyleDeclaration styles, ICssProperty style, RemoveReason reason) + { + var e = new RemovingStyleEventArgs(tag, style, reason); + OnRemovingStyle(e); - if (!e.Cancel) - styles.RemoveProperty(style.Name); - } + if (!e.Cancel) + styles.RemoveProperty(style.Name); + } - /// - /// Removes an at-rule from the document. - /// - /// Tag the style belongs to. - /// Rule to be removed. - /// true, if the rule can be removed; false, otherwise. - private bool RemoveAtRule(IElement tag, ICssRule rule) - { - var e = new RemovingAtRuleEventArgs(tag, rule); - OnRemovingAtRule(e); + /// + /// Removes an at-rule from the document. + /// + /// Tag the style belongs to. + /// Rule to be removed. + /// true, if the rule can be removed; false, otherwise. + private bool RemoveAtRule(IElement tag, ICssRule rule) + { + var e = new RemovingAtRuleEventArgs(tag, rule); + OnRemovingAtRule(e); - return !e.Cancel; - } + return !e.Cancel; + } - /// - /// Removes a CSS class from a class attribute. - /// - /// Tag the style belongs to. - /// Class to be removed. - /// Reason for removal. - private void RemoveCssClass(IElement tag, string cssClass, RemoveReason reason) - { - var e = new RemovingCssClassEventArgs(tag, cssClass, reason); - OnRemovingCssClass(e); + /// + /// Removes a CSS class from a class attribute. + /// + /// Tag the style belongs to. + /// Class to be removed. + /// Reason for removal. + private void RemoveCssClass(IElement tag, string cssClass, RemoveReason reason) + { + var e = new RemovingCssClassEventArgs(tag, cssClass, reason); + OnRemovingCssClass(e); - if (!e.Cancel) - tag.ClassList.Remove(cssClass); - } + if (!e.Cancel) + tag.ClassList.Remove(cssClass); } } diff --git a/src/HtmlSanitizer/HtmlSanitizerDefaults.cs b/src/HtmlSanitizer/HtmlSanitizerDefaults.cs index 90ee3738..084f8c18 100644 --- a/src/HtmlSanitizer/HtmlSanitizerDefaults.cs +++ b/src/HtmlSanitizer/HtmlSanitizerDefaults.cs @@ -3,362 +3,361 @@ using System.Collections.Generic; using System.Collections.Immutable; -namespace Ganss.Xss +namespace Ganss.Xss; + +/// +/// Default options. +/// +public static class HtmlSanitizerDefaults { /// - /// Default options. + /// The default allowed CSS at-rules. /// - public static class HtmlSanitizerDefaults + public static ISet AllowedAtRules { get; } = new HashSet() { - /// - /// The default allowed CSS at-rules. - /// - public static ISet AllowedAtRules { get; } = new HashSet() - { - CssRuleType.Style, CssRuleType.Namespace - }.ToImmutableHashSet(); + CssRuleType.Style, CssRuleType.Namespace + }.ToImmutableHashSet(); - /// - /// The default allowed URI schemes. - /// - public static ISet AllowedSchemes { get; } = new HashSet(StringComparer.OrdinalIgnoreCase) - { - "http", "https" - }.ToImmutableHashSet(StringComparer.OrdinalIgnoreCase); + /// + /// The default allowed URI schemes. + /// + public static ISet AllowedSchemes { get; } = new HashSet(StringComparer.OrdinalIgnoreCase) + { + "http", "https" + }.ToImmutableHashSet(StringComparer.OrdinalIgnoreCase); - /// - /// The default allowed HTML tag names. - /// - public static ISet AllowedTags { get; } = new HashSet(StringComparer.OrdinalIgnoreCase) - { - // https://developer.mozilla.org/en/docs/Web/Guide/HTML/HTML5/HTML5_element_list - "a", "abbr", "acronym", "address", "area", "b", - "big", "blockquote", "br", "button", "caption", "center", "cite", - "code", "col", "colgroup", "dd", "del", "dfn", "dir", "div", "dl", "dt", - "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6", - "hr", "i", "img", "input", "ins", "kbd", "label", "legend", "li", "map", - "menu", "ol", "optgroup", "option", "p", "pre", "q", "s", "samp", - "select", "small", "span", "strike", "strong", "sub", "sup", "table", - "tbody", "td", "textarea", "tfoot", "th", "thead", "tr", "tt", "u", - "ul", "var", - // HTML5 - // Sections - "section", "nav", "article", "aside", "header", "footer", "main", - // Grouping content - "figure", "figcaption", - // Text-level semantics - "data", "time", "mark", "ruby", "rt", "rp", "bdi", "wbr", - // Forms - "datalist", "keygen", "output", "progress", "meter", - // Interactive elements - "details", "summary", "menuitem", - // document elements - "html", "head", "body" - }.ToImmutableHashSet(StringComparer.OrdinalIgnoreCase); + /// + /// The default allowed HTML tag names. + /// + public static ISet AllowedTags { get; } = new HashSet(StringComparer.OrdinalIgnoreCase) + { + // https://developer.mozilla.org/en/docs/Web/Guide/HTML/HTML5/HTML5_element_list + "a", "abbr", "acronym", "address", "area", "b", + "big", "blockquote", "br", "button", "caption", "center", "cite", + "code", "col", "colgroup", "dd", "del", "dfn", "dir", "div", "dl", "dt", + "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6", + "hr", "i", "img", "input", "ins", "kbd", "label", "legend", "li", "map", + "menu", "ol", "optgroup", "option", "p", "pre", "q", "s", "samp", + "select", "small", "span", "strike", "strong", "sub", "sup", "table", + "tbody", "td", "textarea", "tfoot", "th", "thead", "tr", "tt", "u", + "ul", "var", + // HTML5 + // Sections + "section", "nav", "article", "aside", "header", "footer", "main", + // Grouping content + "figure", "figcaption", + // Text-level semantics + "data", "time", "mark", "ruby", "rt", "rp", "bdi", "wbr", + // Forms + "datalist", "keygen", "output", "progress", "meter", + // Interactive elements + "details", "summary", "menuitem", + // document elements + "html", "head", "body" + }.ToImmutableHashSet(StringComparer.OrdinalIgnoreCase); - /// - /// The default allowed HTML attributes. - /// - public static ISet AllowedAttributes { get; } = new HashSet(StringComparer.OrdinalIgnoreCase) - { - // https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes - "abbr", "accept", "accept-charset", "accesskey", - "action", "align", "alt", "axis", "bgcolor", "border", "cellpadding", - "cellspacing", "char", "charoff", "charset", "checked", "cite", /* "class", */ - "clear", "cols", "colspan", "color", "compact", "coords", "datetime", - "dir", "disabled", "enctype", "for", "frame", "headers", "height", - "href", "hreflang", "hspace", /* "id", */ "ismap", "label", "lang", - "longdesc", "maxlength", "media", "method", "multiple", "name", - "nohref", "noshade", "nowrap", "prompt", "readonly", "rel", "rev", - "rows", "rowspan", "rules", "scope", "selected", "shape", "size", - "span", "src", "start", "style", "summary", "tabindex", "target", "title", - "type", "usemap", "valign", "value", "vspace", "width", - // HTML5 - "high", // - "keytype", // - "list", // - "low", // - "max", // - "min", // - "novalidate", //
- "open", //
- "optimum", // - "pattern", // - "placeholder", //