diff --git a/src/HtmlSanitizer/HtmlFormatter.cs b/src/HtmlSanitizer/HtmlFormatter.cs new file mode 100644 index 0000000..76551f7 --- /dev/null +++ b/src/HtmlSanitizer/HtmlFormatter.cs @@ -0,0 +1,128 @@ +using AngleSharp; +using AngleSharp.Html; +using AngleSharp.Dom; +using AngleSharp.Extensions; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Ganss.XSS +{ + /// + /// HTML5 markup formatter. Identical to except for < and > which are + /// encoded in attribute values. + /// + public class HtmlFormatter: IMarkupFormatter + { + /// + /// An instance of . + /// + public static readonly HtmlFormatter Instance = new HtmlFormatter(); + + // disable XML comments warnings + #pragma warning disable 1591 + + public string Attribute(IAttr attr) + { + var namespaceUri = attr.NamespaceUri; + var localName = attr.LocalName; + var value = attr.Value; + var temp = new StringBuilder(); + + if (String.IsNullOrEmpty(namespaceUri)) + { + temp.Append(localName); + } + else if (namespaceUri == NamespaceNames.XmlUri) + { + temp.Append(NamespaceNames.XmlPrefix).Append(':').Append(localName); + } + else if (namespaceUri == NamespaceNames.XLinkUri) + { + temp.Append(NamespaceNames.XLinkPrefix).Append(':').Append(localName); + } + else if (namespaceUri == NamespaceNames.XmlNsUri) + { + temp.Append(XmlNamespaceLocalName(localName)); + } + else + { + temp.Append(attr.Name); + } + + temp.Append('=').Append('"'); + + for (var i = 0; i < value.Length; i++) + { + switch (value[i]) + { + case '&': temp.Append("&"); break; + case '\u00a0': temp.Append(" "); break; + case '"': temp.Append("""); break; + case '<': temp.Append("<"); break; + case '>': temp.Append(">"); break; + default: temp.Append(value[i]); break; + } + } + + return temp.Append('"').ToString(); + } + + static string XmlNamespaceLocalName(string name) + { + return name != NamespaceNames.XmlNsPrefix ? (NamespaceNames.XmlNsPrefix + ":") : name; + } + + public string CloseTag(IElement element, bool selfClosing) + { + return HtmlMarkupFormatter.Instance.CloseTag(element, selfClosing); + } + + public string Comment(IComment comment) + { + return HtmlMarkupFormatter.Instance.Comment(comment); + } + + public string Doctype(IDocumentType doctype) + { + return HtmlMarkupFormatter.Instance.Doctype(doctype); + } + + public string OpenTag(IElement element, bool selfClosing) + { + var temp = new StringBuilder(); + + temp.Append('<'); + + if (!string.IsNullOrEmpty(element.Prefix)) + { + temp.Append(element.Prefix).Append(':'); + } + + temp.Append(element.LocalName); + + foreach (var attribute in element.Attributes) + { + temp.Append(' ').Append(Attribute(attribute)); + } + + temp.Append('>'); + + return temp.ToString(); + } + + public string Processing(IProcessingInstruction processing) + { + return HtmlMarkupFormatter.Instance.Processing(processing); + } + + public string Text(string text) + { + return HtmlMarkupFormatter.Instance.Text(text); + } + + #pragma warning restore 1591 + } +} diff --git a/src/HtmlSanitizer/HtmlSanitizer.cs b/src/HtmlSanitizer/HtmlSanitizer.cs index ca41742..f62adeb 100644 --- a/src/HtmlSanitizer/HtmlSanitizer.cs +++ b/src/HtmlSanitizer/HtmlSanitizer.cs @@ -81,9 +81,9 @@ public HtmlSanitizer(IEnumerable allowedTags = null, IEnumerable public Func HtmlParserFactory { get; set; } = DefaultHtmlParserFactory; /// - /// Gets or sets the default object used for generating output. Default is . + /// Gets or sets the default object used for generating output. Default is . /// - public static IMarkupFormatter DefaultOutputFormatter { get; set; } = HtmlMarkupFormatter.Instance; + public static IMarkupFormatter DefaultOutputFormatter { get; set; } = HtmlFormatter.Instance; /// /// Gets or sets the object used for generating output. Default is . @@ -468,11 +468,7 @@ private void DoSanitize(IHtmlDocument dom, IElement context, string baseUrl = "" if (attribute.Value.Contains("&{")) RemoveAttribute(tag, attribute, RemoveReason.NotAllowedValue); else - { - // escape attribute value - var val = attribute.Value.Replace("<", "<").Replace(">", ">"); - tag.SetAttribute(attribute.Name, val); - } + tag.SetAttribute(attribute.Name, attribute.Value); } } diff --git a/src/HtmlSanitizer/HtmlSanitizer.csproj b/src/HtmlSanitizer/HtmlSanitizer.csproj index 0fbb577..b978d72 100644 --- a/src/HtmlSanitizer/HtmlSanitizer.csproj +++ b/src/HtmlSanitizer/HtmlSanitizer.csproj @@ -62,6 +62,7 @@ + diff --git a/test/HtmlSanitizer.Tests/Tests.cs b/test/HtmlSanitizer.Tests/Tests.cs index 68ee5bb..79af669 100644 --- a/test/HtmlSanitizer.Tests/Tests.cs +++ b/test/HtmlSanitizer.Tests/Tests.cs @@ -1808,7 +1808,7 @@ public void SanitizeEscapeAttrTest() { var sanitizer = new HtmlSanitizer(); var html = @"
"; - Assert.Equal(@"
", sanitizer.Sanitize(html), ignoreCase: true); + Assert.Equal(@"
", sanitizer.Sanitize(html), ignoreCase: true); } [Fact] @@ -2731,6 +2731,19 @@ public void NullStyleTest() Assert.Equal("

xyz

", actual); } + + [Fact] + public void EscapeEntityInAttributeValueTest() + { + // https://github.com/mganss/HtmlSanitizer/issues/84 + + var s = new HtmlSanitizer { HtmlParserFactory = () => new HtmlParser(new Configuration()) }; + var html = @""">"; + + var actual = s.Sanitize(html); + + Assert.Equal(@"", actual); + } } }