Skip to content

Commit

Permalink
Add HtmlFormatter.cs (fixes #84)
Browse files Browse the repository at this point in the history
  • Loading branch information
mganss committed Aug 22, 2016
1 parent bf1b96c commit 553b8dd
Show file tree
Hide file tree
Showing 4 changed files with 146 additions and 8 deletions.
128 changes: 128 additions & 0 deletions src/HtmlSanitizer/HtmlFormatter.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
using AngleSharp;
using AngleSharp.Html;
using AngleSharp.Dom;
using AngleSharp.Extensions;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace Ganss.XSS
{
/// <summary>
/// HTML5 markup formatter. Identical to <see cref="HtmlMarkupFormatter"/> except for &lt; and &gt; which are
/// encoded in attribute values.
/// </summary>
public class HtmlFormatter: IMarkupFormatter
{
/// <summary>
/// An instance of <see cref="HtmlFormatter"/>.
/// </summary>
public static readonly HtmlFormatter Instance = new HtmlFormatter();

// disable XML comments warnings
#pragma warning disable 1591

public string Attribute(IAttr attr)
{
var namespaceUri = attr.NamespaceUri;
var localName = attr.LocalName;
var value = attr.Value;
var temp = new StringBuilder();

if (String.IsNullOrEmpty(namespaceUri))
{
temp.Append(localName);
}
else if (namespaceUri == NamespaceNames.XmlUri)
{
temp.Append(NamespaceNames.XmlPrefix).Append(':').Append(localName);
}
else if (namespaceUri == NamespaceNames.XLinkUri)
{
temp.Append(NamespaceNames.XLinkPrefix).Append(':').Append(localName);
}
else if (namespaceUri == NamespaceNames.XmlNsUri)
{
temp.Append(XmlNamespaceLocalName(localName));
}
else
{
temp.Append(attr.Name);
}

temp.Append('=').Append('"');

for (var i = 0; i < value.Length; i++)
{
switch (value[i])
{
case '&': temp.Append("&amp;"); break;
case '\u00a0': temp.Append("&nbsp;"); break;
case '"': temp.Append("&quot;"); break;
case '<': temp.Append("&lt;"); break;
case '>': temp.Append("&gt;"); break;
default: temp.Append(value[i]); break;
}
}

return temp.Append('"').ToString();
}

static string XmlNamespaceLocalName(string name)
{
return name != NamespaceNames.XmlNsPrefix ? (NamespaceNames.XmlNsPrefix + ":") : name;
}

public string CloseTag(IElement element, bool selfClosing)
{
return HtmlMarkupFormatter.Instance.CloseTag(element, selfClosing);
}

public string Comment(IComment comment)
{
return HtmlMarkupFormatter.Instance.Comment(comment);
}

public string Doctype(IDocumentType doctype)
{
return HtmlMarkupFormatter.Instance.Doctype(doctype);
}

public string OpenTag(IElement element, bool selfClosing)
{
var temp = new StringBuilder();

temp.Append('<');

if (!string.IsNullOrEmpty(element.Prefix))
{
temp.Append(element.Prefix).Append(':');
}

temp.Append(element.LocalName);

foreach (var attribute in element.Attributes)
{
temp.Append(' ').Append(Attribute(attribute));
}

temp.Append('>');

return temp.ToString();
}

public string Processing(IProcessingInstruction processing)
{
return HtmlMarkupFormatter.Instance.Processing(processing);
}

public string Text(string text)
{
return HtmlMarkupFormatter.Instance.Text(text);
}

#pragma warning restore 1591
}
}
10 changes: 3 additions & 7 deletions src/HtmlSanitizer/HtmlSanitizer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,9 @@ public HtmlSanitizer(IEnumerable<string> allowedTags = null, IEnumerable<string>
public Func<HtmlParser> HtmlParserFactory { get; set; } = DefaultHtmlParserFactory;

/// <summary>
/// Gets or sets the default <see cref="IMarkupFormatter"/> object used for generating output. Default is <see cref="HtmlMarkupFormatter.Instance"/>.
/// Gets or sets the default <see cref="IMarkupFormatter"/> object used for generating output. Default is <see cref="HtmlFormatter.Instance"/>.
/// </summary>
public static IMarkupFormatter DefaultOutputFormatter { get; set; } = HtmlMarkupFormatter.Instance;
public static IMarkupFormatter DefaultOutputFormatter { get; set; } = HtmlFormatter.Instance;

/// <summary>
/// Gets or sets the <see cref="IMarkupFormatter"/> object used for generating output. Default is <see cref="DefaultOutputFormatter"/>.
Expand Down Expand Up @@ -468,11 +468,7 @@ private void DoSanitize(IHtmlDocument dom, IElement context, string baseUrl = ""
if (attribute.Value.Contains("&{"))
RemoveAttribute(tag, attribute, RemoveReason.NotAllowedValue);
else
{
// escape attribute value
var val = attribute.Value.Replace("<", "&lt;").Replace(">", "&gt;");
tag.SetAttribute(attribute.Name, val);
}
tag.SetAttribute(attribute.Name, attribute.Value);
}
}

Expand Down
1 change: 1 addition & 0 deletions src/HtmlSanitizer/HtmlSanitizer.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
</ItemGroup>
<ItemGroup>
<Compile Include="EventArgs.cs" />
<Compile Include="HtmlFormatter.cs" />
<Compile Include="HtmlSanitizer.cs" />
<Compile Include="IHtmlSanitizer.cs" />
<Compile Include="Properties\AssemblyInfo.cs" />
Expand Down
15 changes: 14 additions & 1 deletion test/HtmlSanitizer.Tests/Tests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1808,7 +1808,7 @@ public void SanitizeEscapeAttrTest()
{
var sanitizer = new HtmlSanitizer();
var html = @"<div title=""&lt;foo&gt;""></div>";
Assert.Equal(@"<div title=""&amp;lt;foo&amp;gt;""></div>", sanitizer.Sanitize(html), ignoreCase: true);
Assert.Equal(@"<div title=""&lt;foo&gt;""></div>", sanitizer.Sanitize(html), ignoreCase: true);
}

[Fact]
Expand Down Expand Up @@ -2731,6 +2731,19 @@ public void NullStyleTest()

Assert.Equal("<p>xyz</p>", actual);
}

[Fact]
public void EscapeEntityInAttributeValueTest()
{
// https://github.com/mganss/HtmlSanitizer/issues/84

var s = new HtmlSanitizer { HtmlParserFactory = () => new HtmlParser(new Configuration()) };
var html = @"<input type=""text"" name=""my_name"" value=""<insert name>"">";

var actual = s.Sanitize(html);

Assert.Equal(@"<input type=""text"" name=""my_name"" value=""&lt;insert name&gt;"">", actual);
}
}
}

Expand Down

0 comments on commit 553b8dd

Please sign in to comment.