Skip to content

Commit

Permalink
cumulative improvements and additional tests
Browse files Browse the repository at this point in the history
  • Loading branch information
CanerPatir committed May 7, 2018
1 parent 18a7897 commit a84dffa
Show file tree
Hide file tree
Showing 8 changed files with 2,566 additions and 85 deletions.
2 changes: 1 addition & 1 deletion common.props
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<Project>
<PropertyGroup>
<VersionPrefix>1.0.1</VersionPrefix>
<VersionPrefix>1.0.2</VersionPrefix>
<NoWarn>$(NoWarn);CS1591</NoWarn>
<PackageIconUrl>https://raw.githubusercontent.com/canerpatir/AntiSamy.NET/master/icon.png</PackageIconUrl>
<PackageProjectUrl>https://github.com/canerpatir/AntiSamy.NET</PackageProjectUrl>
Expand Down
2 changes: 1 addition & 1 deletion src/AntiSamy/AntiSamy.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
<TargetFramework>netstandard2.0</TargetFramework>
<GeneratePackageOnBuild>true</GeneratePackageOnBuild>
<Authors>Caner Patır</Authors>
<Version>1.0.1</Version>
<Version>1.0.2</Version>
<FileVersion>1.0.1.0</FileVersion>
</PropertyGroup>

Expand Down
106 changes: 31 additions & 75 deletions src/AntiSamy/AntiSamyDomScanner.cs
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@ public sealed class AntiSamyDomScanner

private readonly Policy _policy;

private int _num;

public AntiSamyDomScanner(Policy policy) => _policy = policy;

public AntiySamyResult Scan(string html)
Expand All @@ -27,18 +25,10 @@ public AntiySamyResult Scan(string html)
throw new ArgumentNullException(nameof(html));
}

//had problems with the &nbsp; getting double encoded, so this converts it to a literal space.
//this may need to be changed.
html = html.Replace("&nbsp;", char.Parse("\u00a0").ToString());

//We have to replace any invalid XML characters

html = StripNonValidXmlCharacters(html);

//holds the maximum input size for the incoming fragment
int maxInputSize = Policy.DefaultMaxInputSize;

//grab the size specified in the config file
try
{
maxInputSize = _policy.GetDirectiveAsInt("maxInputSize", int.MaxValue);
Expand All @@ -48,64 +38,53 @@ public AntiySamyResult Scan(string html)
Console.WriteLine("Format Exception: " + fe);
}

//ensure our input is less than the max
if (maxInputSize < html.Length)
{
throw new ScanException("File size [" + html.Length + "] is larger than maximum [" + maxInputSize + "]");
}

//grab start time (to be put in the result set along with end time)
DateTime start = DateTime.Now;

//fixes some weirdness in HTML agility
if (!HtmlNode.ElementsFlags.ContainsKey("iframe"))
{
HtmlNode.ElementsFlags.Add("iframe", HtmlElementFlag.Empty);
}
HtmlNode.ElementsFlags.Remove("form");

//Let's parse the incoming HTML
var doc = new HtmlDocument();
doc.LoadHtml(html);
doc.LoadHtml(html.Replace(Environment.NewLine, string.Empty).Replace("\t", string.Empty));

//add closing tags
doc.OptionAutoCloseOnEnd = true;

//enforces XML rules, encodes big 5
doc.OptionOutputAsXml = true;

//loop through every node now, and enforce the rules held in the policy object
for (var i = 0; i < doc.DocumentNode.ChildNodes.Count; i++)
{
//grab current node
HtmlNode tmp = doc.DocumentNode.ChildNodes[i];
EvaluateNodeCollection(doc.DocumentNode.ChildNodes);

string finalCleanHtml = doc.DocumentNode.InnerHtml;

//this node can hold other nodes, so recursively validate
RecursiveValidateTag(tmp);
return new AntiySamyResult(start, finalCleanHtml, _errorMessages);
}

if (tmp.ParentNode == null)
private void EvaluateNodeCollection(HtmlNodeCollection nodes)
{
for (var i = 0; i < nodes.Count; i++)
{
HtmlNode node = nodes[i];
EvaluateNode(node);

if (node.ParentNode == null)
{
i--;
}
}

string finalCleanHtml = doc.DocumentNode.InnerHtml;

return new AntiySamyResult(start, finalCleanHtml, _errorMessages);
}

private void RecursiveValidateTag(HtmlNode node)
private void EvaluateNode(HtmlNode node)
{
int maxinputsize = _policy.GetDirectiveAsInt("maxInputSize", int.MaxValue);

_num++;

HtmlNode parentNode = node.ParentNode;
HtmlNode tmp = null;
string tagName = node.Name;

//check this out
//might not be robust enough
if (tagName.ToLower().Equals("#text")) // || tagName.ToLower().Equals("#comment"))
{
return;
Expand All @@ -122,24 +101,16 @@ private void RecursiveValidateTag(HtmlNode node)
}
else
{
errBuff.Append("The <b>" + HtmlEntityEncoder.HtmlEntityEncode(tagName.ToLower()) + "</b> ");
errBuff.Append("The \"" + HtmlEntityEncoder.HtmlEntityEncode(tagName.ToLower()) + "\" ");
}

errBuff.Append("tag has been filtered for security reasons. The contents of the tag will ");
errBuff.Append("remain in place.");

_errorMessages.Add(errBuff.ToString());

for (var i = 0; i < node.ChildNodes.Count; i++)
{
tmp = node.ChildNodes[i];
RecursiveValidateTag(tmp);

if (tmp.ParentNode == null)
{
i--;
}
}
EvaluateNodeCollection(node.ChildNodes);

PromoteChildren(node);
}
else if (Consts.TagActions.VALIDATE.Equals(tag.Action))
Expand All @@ -162,8 +133,8 @@ private void RecursiveValidateTag(HtmlNode node)
{
var errBuff = new StringBuilder();

errBuff.Append("The <b>" + HtmlEntityEncoder.HtmlEntityEncode(name));
errBuff.Append("</b> attribute of the <b>" + HtmlEntityEncoder.HtmlEntityEncode(tagName) + "</b> tag has been removed for security reasons. ");
errBuff.Append("The \"" + HtmlEntityEncoder.HtmlEntityEncode(name));
errBuff.Append("\" attribute of the \"" + HtmlEntityEncoder.HtmlEntityEncode(tagName) + "\" tag has been removed for security reasons. ");
errBuff.Append("This removal should not affect the display of the HTML submitted.");

_errorMessages.Add(errBuff.ToString());
Expand Down Expand Up @@ -215,38 +186,31 @@ private void RecursiveValidateTag(HtmlNode node)
string onInvalidAction = allowwdAttr.OnInvalid;
var errBuff = new StringBuilder();

errBuff.Append("The <b>" + HtmlEntityEncoder.HtmlEntityEncode(tagName) + "</b> tag contained an attribute that we couldn't process. ");
errBuff.Append("The <b>" + HtmlEntityEncoder.HtmlEntityEncode(name) + "</b> attribute had a value of <u>" + HtmlEntityEncoder.HtmlEntityEncode(value) + "</u>. ");
errBuff.Append("The \"" + HtmlEntityEncoder.HtmlEntityEncode(tagName) + "\" tag contained an attribute that we couldn't process. ");
errBuff.Append("The \"" + HtmlEntityEncoder.HtmlEntityEncode(name) + "\" attribute had a value of <u>" + HtmlEntityEncoder.HtmlEntityEncode(value) + "</u>. ");
errBuff.Append("This value could not be accepted for security reasons. We have chosen to ");

//Console.WriteLine(policy);

if (Consts.OnInvalidActions.REMOVE_TAG.Equals(onInvalidAction))
{
parentNode.RemoveChild(node);
errBuff.Append("remove the <b>" + HtmlEntityEncoder.HtmlEntityEncode(tagName) + "</b> tag and its contents in order to process this input. ");
errBuff.Append("remove the \"" + HtmlEntityEncoder.HtmlEntityEncode(tagName) + "\" tag and its contents in order to process this input. ");
}
else if (Consts.OnInvalidActions.FILTER_TAG.Equals(onInvalidAction))
{
for (var i = 0; i < node.ChildNodes.Count; i++)
{
tmp = node.ChildNodes[i];
RecursiveValidateTag(tmp);
if (tmp.ParentNode == null)
{
i--;
}
}

EvaluateNodeCollection(node.ChildNodes);

PromoteChildren(node);

errBuff.Append("filter the <b>" + HtmlEntityEncoder.HtmlEntityEncode(tagName) + "</b> tag and leave its contents in place so that we could process this input.");
errBuff.Append("filter the \"" + HtmlEntityEncoder.HtmlEntityEncode(tagName) + "\" tag and leave its contents in place so that we could process this input.");
}
else
{
node.Attributes.Remove(allowwdAttr.Name);
currentAttributeIndex--;
errBuff.Append("remove the <b>" + HtmlEntityEncoder.HtmlEntityEncode(name) + "</b> attribute from the tag and leave everything else in place so that we could process this input.");
errBuff.Append("remove the \"" + HtmlEntityEncoder.HtmlEntityEncode(name) + "\" attribute from the tag and leave everything else in place so that we could process this input.");
}

_errorMessages.Add(errBuff.ToString());
Expand All @@ -260,15 +224,7 @@ private void RecursiveValidateTag(HtmlNode node)
}
}

for (var i = 0; i < node.ChildNodes.Count; i++)
{
tmp = node.ChildNodes[i];
RecursiveValidateTag(tmp);
if (tmp.ParentNode == null)
{
i--;
}
}
EvaluateNodeCollection(node.ChildNodes);
}
else if ("truncate".Equals(tag.Action))// || Consts.TagActions.REMOVE.Equals(tag.Action))
{
Expand All @@ -279,8 +235,8 @@ private void RecursiveValidateTag(HtmlNode node)
{
var errBuff = new StringBuilder();

errBuff.Append("The <b>" + HtmlEntityEncoder.HtmlEntityEncode(nnmap[0].Name));
errBuff.Append("</b> attribute of the <b>" + HtmlEntityEncoder.HtmlEntityEncode(tagName) + "</b> tag has been removed for security reasons. ");
errBuff.Append("The \"" + HtmlEntityEncoder.HtmlEntityEncode(nnmap[0].Name));
errBuff.Append("\" attribute of the \"" + HtmlEntityEncoder.HtmlEntityEncode(tagName) + "\" tag has been removed for security reasons. ");
errBuff.Append("This removal should not affect the display of the HTML submitted.");
node.Attributes.Remove(nnmap[0].Name);
_errorMessages.Add(errBuff.ToString());
Expand Down Expand Up @@ -308,7 +264,7 @@ private void RecursiveValidateTag(HtmlNode node)
}
else
{
_errorMessages.Add("The <b>" + HtmlEntityEncoder.HtmlEntityEncode(tagName) + "</b> tag has been removed for security reasons.");
_errorMessages.Add("The \"" + HtmlEntityEncoder.HtmlEntityEncode(tagName) + "\" tag has been removed for security reasons.");
parentNode.RemoveChild(node);
}
}
Expand Down
21 changes: 14 additions & 7 deletions src/AntiSamy/Policy.cs
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,8 @@ public string GetRegularExpression(string name)

public DocumentAttribute GetGlobalAttribute(string name) => GlobalTagAttributes.TryGetValue(name, out DocumentAttribute val) ? val : null;

public DocumentAttribute GetCommonAttribute(string name) => CommonAttributes.TryGetValue(name, out DocumentAttribute val) ? val : null;

public DocumentTag GetTag(string tagName) => TagRules.TryGetValue(tagName, out DocumentTag value) ? value : null;

public CssProperty GetCssProperty(string propertyName) => CssRules.TryGetValue(propertyName, out CssProperty value) ? value : null;
Expand Down Expand Up @@ -131,7 +133,7 @@ private Dictionary<string, DocumentAttribute> ParseGlobalAttributes(XmlNode glob
{
string name = node.Attributes[0].Value;

DocumentAttribute toAdd = CommonAttributes[name];
DocumentAttribute toAdd = GetCommonAttribute(name);
if (toAdd != null)
{
globalAttributes.Add(name, toAdd);
Expand Down Expand Up @@ -249,24 +251,24 @@ private Dictionary<string, DocumentTag> ParseTagRules(XmlNode tagAttributeListNo
XmlNodeList attributeList = tagNode.SelectNodes("attribute");
foreach (XmlNode attributeNode in attributeList)
{
if (!attributeNode.HasChildNodes)
if (IsCommonAttributeRule(attributeNode))
{
CommonAttributes.TryGetValue(attributeNode.Attributes["name"].Value, out DocumentAttribute attribute);
DocumentAttribute commonAttribute = GetCommonAttribute(attributeNode.Attributes["name"].Value);

if (attribute != null)
if (commonAttribute != null)
{
string onInvalid = attributeNode.Attributes["onInvalid"]?.Value;
string description = attributeNode.Attributes["description"]?.Value;
if (!string.IsNullOrEmpty(onInvalid))
{
attribute.OnInvalid = onInvalid;
commonAttribute.OnInvalid = onInvalid;
}
if (!string.IsNullOrEmpty(description))
{
attribute.Description = description;
commonAttribute.Description = description;
}

tag.AddAllowedAttribute((DocumentAttribute)attribute.Clone());
tag.AddAllowedAttribute((DocumentAttribute)commonAttribute.Clone());
}
}
else
Expand Down Expand Up @@ -337,6 +339,11 @@ private Dictionary<string, DocumentTag> ParseTagRules(XmlNode tagAttributeListNo
return tags;
}

private static bool IsCommonAttributeRule(XmlNode attributeNode)
{
return !attributeNode.HasChildNodes;
}

private Dictionary<string, CssProperty> ParseCssRules(XmlNode cssNodeList)
{
var properties = new Dictionary<string, CssProperty>(StringComparer.InvariantCultureIgnoreCase);
Expand Down
3 changes: 3 additions & 0 deletions test/AntiSamy.Tests/AntiSamy.Tests.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,9 @@
<None Update="resources\antisamy.xsd">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Update="resources\antisamy1.xml">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
</ItemGroup>

<ItemGroup>
Expand Down
3 changes: 2 additions & 1 deletion test/AntiSamy.Tests/TestBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@ namespace AntiSamy.Tests
{
public abstract class TestBase
{
private const string DefaultAntiSamyFile = "antisamy.xml";
protected readonly Policy TestPolicy;

protected virtual string DefaultAntiSamyFile => "antisamy.xml";

protected TestBase()
{
TestPolicy = GetPolicy(DefaultAntiSamyFile);
Expand Down
Loading

0 comments on commit a84dffa

Please sign in to comment.