diff --git a/common.props b/common.props index 8fda700..2f7940d 100644 --- a/common.props +++ b/common.props @@ -1,6 +1,6 @@ - 1.0.1 + 1.0.2 $(NoWarn);CS1591 https://raw.githubusercontent.com/canerpatir/AntiSamy.NET/master/icon.png https://github.com/canerpatir/AntiSamy.NET diff --git a/src/AntiSamy/AntiSamy.csproj b/src/AntiSamy/AntiSamy.csproj index efb84ca..e7f5f48 100644 --- a/src/AntiSamy/AntiSamy.csproj +++ b/src/AntiSamy/AntiSamy.csproj @@ -4,7 +4,7 @@ netstandard2.0 true Caner Patır - 1.0.1 + 1.0.2 1.0.1.0 diff --git a/src/AntiSamy/AntiSamyDomScanner.cs b/src/AntiSamy/AntiSamyDomScanner.cs index 9f8b3be..f11e6bf 100644 --- a/src/AntiSamy/AntiSamyDomScanner.cs +++ b/src/AntiSamy/AntiSamyDomScanner.cs @@ -16,8 +16,6 @@ public sealed class AntiSamyDomScanner private readonly Policy _policy; - private int _num; - public AntiSamyDomScanner(Policy policy) => _policy = policy; public AntiySamyResult Scan(string html) @@ -27,18 +25,10 @@ public AntiySamyResult Scan(string html) throw new ArgumentNullException(nameof(html)); } - //had problems with the   getting double encoded, so this converts it to a literal space. - //this may need to be changed. html = html.Replace(" ", char.Parse("\u00a0").ToString()); - - //We have to replace any invalid XML characters - html = StripNonValidXmlCharacters(html); - - //holds the maximum input size for the incoming fragment int maxInputSize = Policy.DefaultMaxInputSize; - //grab the size specified in the config file try { maxInputSize = _policy.GetDirectiveAsInt("maxInputSize", int.MaxValue); @@ -48,64 +38,53 @@ public AntiySamyResult Scan(string html) Console.WriteLine("Format Exception: " + fe); } - //ensure our input is less than the max if (maxInputSize < html.Length) { throw new ScanException("File size [" + html.Length + "] is larger than maximum [" + maxInputSize + "]"); } - //grab start time (to be put in the result set along with end time) DateTime start = DateTime.Now; - //fixes some weirdness in HTML agility if (!HtmlNode.ElementsFlags.ContainsKey("iframe")) { HtmlNode.ElementsFlags.Add("iframe", HtmlElementFlag.Empty); } HtmlNode.ElementsFlags.Remove("form"); - //Let's parse the incoming HTML var doc = new HtmlDocument(); - doc.LoadHtml(html); + doc.LoadHtml(html.Replace(Environment.NewLine, string.Empty).Replace("\t", string.Empty)); - //add closing tags doc.OptionAutoCloseOnEnd = true; - - //enforces XML rules, encodes big 5 doc.OptionOutputAsXml = true; - //loop through every node now, and enforce the rules held in the policy object - for (var i = 0; i < doc.DocumentNode.ChildNodes.Count; i++) - { - //grab current node - HtmlNode tmp = doc.DocumentNode.ChildNodes[i]; + EvaluateNodeCollection(doc.DocumentNode.ChildNodes); + + string finalCleanHtml = doc.DocumentNode.InnerHtml; - //this node can hold other nodes, so recursively validate - RecursiveValidateTag(tmp); + return new AntiySamyResult(start, finalCleanHtml, _errorMessages); + } - if (tmp.ParentNode == null) + private void EvaluateNodeCollection(HtmlNodeCollection nodes) + { + for (var i = 0; i < nodes.Count; i++) + { + HtmlNode node = nodes[i]; + EvaluateNode(node); + + if (node.ParentNode == null) { i--; } } - - string finalCleanHtml = doc.DocumentNode.InnerHtml; - - return new AntiySamyResult(start, finalCleanHtml, _errorMessages); } - private void RecursiveValidateTag(HtmlNode node) + private void EvaluateNode(HtmlNode node) { int maxinputsize = _policy.GetDirectiveAsInt("maxInputSize", int.MaxValue); - _num++; - HtmlNode parentNode = node.ParentNode; - HtmlNode tmp = null; string tagName = node.Name; - //check this out - //might not be robust enough if (tagName.ToLower().Equals("#text")) // || tagName.ToLower().Equals("#comment")) { return; @@ -122,7 +101,7 @@ private void RecursiveValidateTag(HtmlNode node) } else { - errBuff.Append("The " + HtmlEntityEncoder.HtmlEntityEncode(tagName.ToLower()) + " "); + errBuff.Append("The \"" + HtmlEntityEncoder.HtmlEntityEncode(tagName.ToLower()) + "\" "); } errBuff.Append("tag has been filtered for security reasons. The contents of the tag will "); @@ -130,16 +109,8 @@ private void RecursiveValidateTag(HtmlNode node) _errorMessages.Add(errBuff.ToString()); - for (var i = 0; i < node.ChildNodes.Count; i++) - { - tmp = node.ChildNodes[i]; - RecursiveValidateTag(tmp); - - if (tmp.ParentNode == null) - { - i--; - } - } + EvaluateNodeCollection(node.ChildNodes); + PromoteChildren(node); } else if (Consts.TagActions.VALIDATE.Equals(tag.Action)) @@ -162,8 +133,8 @@ private void RecursiveValidateTag(HtmlNode node) { var errBuff = new StringBuilder(); - errBuff.Append("The " + HtmlEntityEncoder.HtmlEntityEncode(name)); - errBuff.Append(" attribute of the " + HtmlEntityEncoder.HtmlEntityEncode(tagName) + " tag has been removed for security reasons. "); + errBuff.Append("The \"" + HtmlEntityEncoder.HtmlEntityEncode(name)); + errBuff.Append("\" attribute of the \"" + HtmlEntityEncoder.HtmlEntityEncode(tagName) + "\" tag has been removed for security reasons. "); errBuff.Append("This removal should not affect the display of the HTML submitted."); _errorMessages.Add(errBuff.ToString()); @@ -215,8 +186,8 @@ private void RecursiveValidateTag(HtmlNode node) string onInvalidAction = allowwdAttr.OnInvalid; var errBuff = new StringBuilder(); - errBuff.Append("The " + HtmlEntityEncoder.HtmlEntityEncode(tagName) + " tag contained an attribute that we couldn't process. "); - errBuff.Append("The " + HtmlEntityEncoder.HtmlEntityEncode(name) + " attribute had a value of " + HtmlEntityEncoder.HtmlEntityEncode(value) + ". "); + errBuff.Append("The \"" + HtmlEntityEncoder.HtmlEntityEncode(tagName) + "\" tag contained an attribute that we couldn't process. "); + errBuff.Append("The \"" + HtmlEntityEncoder.HtmlEntityEncode(name) + "\" attribute had a value of " + HtmlEntityEncoder.HtmlEntityEncode(value) + ". "); errBuff.Append("This value could not be accepted for security reasons. We have chosen to "); //Console.WriteLine(policy); @@ -224,29 +195,22 @@ private void RecursiveValidateTag(HtmlNode node) if (Consts.OnInvalidActions.REMOVE_TAG.Equals(onInvalidAction)) { parentNode.RemoveChild(node); - errBuff.Append("remove the " + HtmlEntityEncoder.HtmlEntityEncode(tagName) + " tag and its contents in order to process this input. "); + errBuff.Append("remove the \"" + HtmlEntityEncoder.HtmlEntityEncode(tagName) + "\" tag and its contents in order to process this input. "); } else if (Consts.OnInvalidActions.FILTER_TAG.Equals(onInvalidAction)) { - for (var i = 0; i < node.ChildNodes.Count; i++) - { - tmp = node.ChildNodes[i]; - RecursiveValidateTag(tmp); - if (tmp.ParentNode == null) - { - i--; - } - } + + EvaluateNodeCollection(node.ChildNodes); PromoteChildren(node); - errBuff.Append("filter the " + HtmlEntityEncoder.HtmlEntityEncode(tagName) + " tag and leave its contents in place so that we could process this input."); + errBuff.Append("filter the \"" + HtmlEntityEncoder.HtmlEntityEncode(tagName) + "\" tag and leave its contents in place so that we could process this input."); } else { node.Attributes.Remove(allowwdAttr.Name); currentAttributeIndex--; - errBuff.Append("remove the " + HtmlEntityEncoder.HtmlEntityEncode(name) + " attribute from the tag and leave everything else in place so that we could process this input."); + errBuff.Append("remove the \"" + HtmlEntityEncoder.HtmlEntityEncode(name) + "\" attribute from the tag and leave everything else in place so that we could process this input."); } _errorMessages.Add(errBuff.ToString()); @@ -260,15 +224,7 @@ private void RecursiveValidateTag(HtmlNode node) } } - for (var i = 0; i < node.ChildNodes.Count; i++) - { - tmp = node.ChildNodes[i]; - RecursiveValidateTag(tmp); - if (tmp.ParentNode == null) - { - i--; - } - } + EvaluateNodeCollection(node.ChildNodes); } else if ("truncate".Equals(tag.Action))// || Consts.TagActions.REMOVE.Equals(tag.Action)) { @@ -279,8 +235,8 @@ private void RecursiveValidateTag(HtmlNode node) { var errBuff = new StringBuilder(); - errBuff.Append("The " + HtmlEntityEncoder.HtmlEntityEncode(nnmap[0].Name)); - errBuff.Append(" attribute of the " + HtmlEntityEncoder.HtmlEntityEncode(tagName) + " tag has been removed for security reasons. "); + errBuff.Append("The \"" + HtmlEntityEncoder.HtmlEntityEncode(nnmap[0].Name)); + errBuff.Append("\" attribute of the \"" + HtmlEntityEncoder.HtmlEntityEncode(tagName) + "\" tag has been removed for security reasons. "); errBuff.Append("This removal should not affect the display of the HTML submitted."); node.Attributes.Remove(nnmap[0].Name); _errorMessages.Add(errBuff.ToString()); @@ -308,7 +264,7 @@ private void RecursiveValidateTag(HtmlNode node) } else { - _errorMessages.Add("The " + HtmlEntityEncoder.HtmlEntityEncode(tagName) + " tag has been removed for security reasons."); + _errorMessages.Add("The \"" + HtmlEntityEncoder.HtmlEntityEncode(tagName) + "\" tag has been removed for security reasons."); parentNode.RemoveChild(node); } } diff --git a/src/AntiSamy/Policy.cs b/src/AntiSamy/Policy.cs index e177c8e..cddc021 100644 --- a/src/AntiSamy/Policy.cs +++ b/src/AntiSamy/Policy.cs @@ -89,6 +89,8 @@ public string GetRegularExpression(string name) public DocumentAttribute GetGlobalAttribute(string name) => GlobalTagAttributes.TryGetValue(name, out DocumentAttribute val) ? val : null; + public DocumentAttribute GetCommonAttribute(string name) => CommonAttributes.TryGetValue(name, out DocumentAttribute val) ? val : null; + public DocumentTag GetTag(string tagName) => TagRules.TryGetValue(tagName, out DocumentTag value) ? value : null; public CssProperty GetCssProperty(string propertyName) => CssRules.TryGetValue(propertyName, out CssProperty value) ? value : null; @@ -131,7 +133,7 @@ private Dictionary ParseGlobalAttributes(XmlNode glob { string name = node.Attributes[0].Value; - DocumentAttribute toAdd = CommonAttributes[name]; + DocumentAttribute toAdd = GetCommonAttribute(name); if (toAdd != null) { globalAttributes.Add(name, toAdd); @@ -249,24 +251,24 @@ private Dictionary ParseTagRules(XmlNode tagAttributeListNo XmlNodeList attributeList = tagNode.SelectNodes("attribute"); foreach (XmlNode attributeNode in attributeList) { - if (!attributeNode.HasChildNodes) + if (IsCommonAttributeRule(attributeNode)) { - CommonAttributes.TryGetValue(attributeNode.Attributes["name"].Value, out DocumentAttribute attribute); + DocumentAttribute commonAttribute = GetCommonAttribute(attributeNode.Attributes["name"].Value); - if (attribute != null) + if (commonAttribute != null) { string onInvalid = attributeNode.Attributes["onInvalid"]?.Value; string description = attributeNode.Attributes["description"]?.Value; if (!string.IsNullOrEmpty(onInvalid)) { - attribute.OnInvalid = onInvalid; + commonAttribute.OnInvalid = onInvalid; } if (!string.IsNullOrEmpty(description)) { - attribute.Description = description; + commonAttribute.Description = description; } - tag.AddAllowedAttribute((DocumentAttribute)attribute.Clone()); + tag.AddAllowedAttribute((DocumentAttribute)commonAttribute.Clone()); } } else @@ -337,6 +339,11 @@ private Dictionary ParseTagRules(XmlNode tagAttributeListNo return tags; } + private static bool IsCommonAttributeRule(XmlNode attributeNode) + { + return !attributeNode.HasChildNodes; + } + private Dictionary ParseCssRules(XmlNode cssNodeList) { var properties = new Dictionary(StringComparer.InvariantCultureIgnoreCase); diff --git a/test/AntiSamy.Tests/AntiSamy.Tests.csproj b/test/AntiSamy.Tests/AntiSamy.Tests.csproj index 6661db5..1a468f3 100644 --- a/test/AntiSamy.Tests/AntiSamy.Tests.csproj +++ b/test/AntiSamy.Tests/AntiSamy.Tests.csproj @@ -44,6 +44,9 @@ Always + + Always + diff --git a/test/AntiSamy.Tests/TestBase.cs b/test/AntiSamy.Tests/TestBase.cs index d857540..66d776e 100644 --- a/test/AntiSamy.Tests/TestBase.cs +++ b/test/AntiSamy.Tests/TestBase.cs @@ -4,9 +4,10 @@ namespace AntiSamy.Tests { public abstract class TestBase { - private const string DefaultAntiSamyFile = "antisamy.xml"; protected readonly Policy TestPolicy; + protected virtual string DefaultAntiSamyFile => "antisamy.xml"; + protected TestBase() { TestPolicy = GetPolicy(DefaultAntiSamyFile); diff --git a/test/AntiSamy.Tests/UseCaseTests.cs b/test/AntiSamy.Tests/UseCaseTests.cs new file mode 100644 index 0000000..96f0851 --- /dev/null +++ b/test/AntiSamy.Tests/UseCaseTests.cs @@ -0,0 +1,132 @@ +using FluentAssertions; + +using Xunit; + +namespace AntiSamy.Tests +{ + public class UseCaseTests : TestBase + { + protected override string DefaultAntiSamyFile => "antisamy1.xml"; + + [Fact] + public void invalid_img_urls_should_be_filtered() + { + var scanner = new AntiSamy(); + + /* + * remove non-allowed image srcs + */ + + var input = @"
+ + Some description + +
"; + + AntiySamyResult result = scanner.Scan(input, TestPolicy); + + // safe - allowed url pattern in the antisamy1.xml + result.CleanHtml.Should().Contain("Some description"); + result.CleanHtml.Should().Contain(" + Some description +