Skip to content

Commit

Permalink
Introduced the concept of standardized elements, which require less e…
Browse files Browse the repository at this point in the history
…ffort to produce.
  • Loading branch information
RyanLamansky committed Aug 3, 2024
1 parent 8151443 commit a8e8360
Show file tree
Hide file tree
Showing 11 changed files with 193 additions and 7 deletions.
4 changes: 3 additions & 1 deletion Examples/AspNetPageEmitter/Program.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
using HtmlUtilities;
using HtmlUtilities.Validated;
using HtmlUtilities.Validated.Standardized;

var builder = WebApplication.CreateBuilder(args);

Expand All @@ -9,7 +10,7 @@

app.Use((context, next) =>
{
context.Response.GetTypedHeaders().CacheControl = new Microsoft.Net.Http.Headers.CacheControlHeaderValue()
context.Response.GetTypedHeaders().CacheControl = new()
{
Private = true,
NoCache = true,
Expand All @@ -26,6 +27,7 @@ class TestDocument : IHtmlDocument
ValidatedAttributeValue IHtmlDocument.Language => "en-us";
ValidatedText IHtmlDocument.Title => "Hello World!";
ValidatedAttributeValue IHtmlDocument.Description => "Test page for HTML Utilities";
IReadOnlyCollection<Style> IHtmlDocument.Styles { get; } = [new() { Content = string.Join('\n', File.ReadAllLines("styles.css")) }];

Task IHtmlDocument.WriteBodyContentsAsync(HtmlWriter writer, CancellationToken cancellationToken)
{
Expand Down
4 changes: 4 additions & 0 deletions Examples/AspNetPageEmitter/styles.css
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
*, ::before, ::after {
box-sizing: border-box;
font-family: sans-serif;
}
27 changes: 27 additions & 0 deletions HtmlUtilities/AttributeWriter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,33 @@ public readonly struct AttributeWriter

internal AttributeWriter(IBufferWriter<byte> writer) => this.writer = writer;

/// <summary>
/// Writes an attribute without a value.
/// </summary>
/// <param name="nameWithLeadingSpace">The name of the attribute, including a leading space to separate it.</param>
internal void Write(ReadOnlySpan<byte> nameWithLeadingSpace)
{
System.Diagnostics.Debug.Assert(nameWithLeadingSpace.Length >= 2 && nameWithLeadingSpace[0] == ' ');

writer.Write(nameWithLeadingSpace);
}

/// <summary>
/// Writes an attribute if a value is provided.
/// </summary>
/// <param name="nameWithLeadingSpace">The name of the attribute, including a leading space to separate it.</param>
/// <param name="value">The value to write. If null, the attribute is completely omitted.</param>
internal void Write(ReadOnlySpan<byte> nameWithLeadingSpace, ValidatedAttributeValue? value)
{
System.Diagnostics.Debug.Assert(nameWithLeadingSpace.Length >= 2 && nameWithLeadingSpace[0] == ' ');

if (!value.HasValue)
return;

writer.Write(nameWithLeadingSpace);
writer.Write(value.Value.value);
}

/// <summary>
/// Writes a validated attribute name with no value.
/// </summary>
Expand Down
14 changes: 11 additions & 3 deletions HtmlUtilities/HtmlDocumentExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ public static Task WriteToAsync(this IHtmlDocument document, HttpContext context
response.ContentType = "text/html; charset=utf-8";
Span<char> cspNonceUtf16 = stackalloc char[32];
System.Security.Cryptography.RandomNumberGenerator.GetHexString(cspNonceUtf16, true);
response.Headers.ContentSecurityPolicy = $"base-uri {request.Scheme}://{request.Host}/;default-src 'none';script-src 'unsafe-inline' 'nonce-{cspNonceUtf16}'";
// unsafe-inline only applies to browsers that don't support nonce. Can be removed someday.
response.Headers.ContentSecurityPolicy = $"base-uri {request.Scheme}://{request.Host}/;default-src 'unsafe-inline' 'nonce-{cspNonceUtf16}'";
// unsafe-inline only applies to browsers that don't support nonce. Can be removed when security scanners stop asking for it.

var writer = context.Response.BodyWriter;

Expand Down Expand Up @@ -55,9 +55,17 @@ public static Task WriteToAsync(this IHtmlDocument document, HttpContext context
writer.Write(">"u8);
}

var htmlWriter = new HtmlWriter(writer, new Validated.ValidatedAttribute("nonce", cspNonceUtf16));

foreach (var link in document.Links ?? [])
link.Write(htmlWriter);

foreach (var style in document.Styles ?? [])
style.Write(htmlWriter);

writer.Write("</head><body>"u8);

return document.WriteBodyContentsAsync(new HtmlWriter(writer, new Validated.ValidatedAttribute("nonce", cspNonceUtf16)), context.RequestAborted);
return document.WriteBodyContentsAsync(htmlWriter, context.RequestAborted);

// HTML5 spec doesn't require </body></html>, so that simplifies things a bit here.
}
Expand Down
40 changes: 39 additions & 1 deletion HtmlUtilities/HtmlWriter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ public sealed class HtmlWriter
"</html>"u8.ToArray());

private readonly IBufferWriter<byte> writer;
private readonly ValidatedAttribute cspNonce;
internal readonly ValidatedAttribute cspNonce;

internal HtmlWriter(IBufferWriter<byte> writer)
: this(writer, new ValidatedAttribute())
Expand Down Expand Up @@ -121,6 +121,32 @@ private static void WriteLessThan(IBufferWriter<byte> writer)
writer.Advance(1);
}

internal void WriteElement(ReadOnlySpan<byte> nameWithAngleBrackets, Action<AttributeWriter>? attributes = null, Action<HtmlWriter>? children = null)
{
var writer = this.writer;
var start = nameWithAngleBrackets;
Span<byte> end = stackalloc byte[nameWithAngleBrackets.Length + 1];
end[0] = (byte)'<';
end[1] = (byte)'/';
nameWithAngleBrackets[1..].CopyTo(end[2..]);

if (attributes is null)
writer.Write(start);
else
{
writer.Write(start[..^1]);

attributes(new AttributeWriter(this.writer));

WriteGreaterThan(writer);
}

if (children is not null)
children(this);

writer.Write(end);
}

/// <summary>
/// Writes a validated element with optional attributes and child content.
/// </summary>
Expand Down Expand Up @@ -255,6 +281,18 @@ public void WriteElementSelfClosing(ValidatedElement element, Action<AttributeWr
}
}

internal void WriteElementSelfClosing(ReadOnlySpan<byte> nameWithAngleBrackets, Action<AttributeWriter>? attributes = null)
{
System.Diagnostics.Debug.Assert(nameWithAngleBrackets[0] == (byte)'<' && nameWithAngleBrackets[nameWithAngleBrackets.Length - 1] == (byte)'>');

writer.Write(nameWithAngleBrackets[..^1]);

if (attributes is not null)
attributes(new AttributeWriter(writer));

WriteGreaterThan(writer);
}

/// <summary>
/// Writes an element without an end tag.
/// </summary>
Expand Down
13 changes: 13 additions & 0 deletions HtmlUtilities/IHtmlDocument.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using HtmlUtilities.Validated;
using HtmlUtilities.Validated.Standardized;

namespace HtmlUtilities;

Expand All @@ -25,6 +26,18 @@ public interface IHtmlDocument
/// </summary>
ValidatedAttributeValue Description => new();

/// <summary>
/// The document's "link" elements.
/// Empty by default.
/// </summary>
IReadOnlyCollection<Link> Links => [];

/// <summary>
/// The document's "link" elements.
/// Empty by default.
/// </summary>
IReadOnlyCollection<Style> Styles => [];

/// <summary>
/// Writes the content of an HTML document's body.
/// </summary>
Expand Down
37 changes: 37 additions & 0 deletions HtmlUtilities/Validated/StandardElement.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
namespace HtmlUtilities.Validated;

/// <summary>
/// Provides friendly syntax for use of standardized HTML elements.
/// </summary>
public abstract class StandardElement
{
// Global attribute list from https://html.spec.whatwg.org/#global-attributes.

/// <summary>
/// Uniquely identifies an element.
/// Source: https://dom.spec.whatwg.org/#concept-id
/// </summary>
public ValidatedAttributeValue? Id { get; set; }

/// <summary>
/// The title attribute represents advisory information for the element, such as would be appropriate for a tooltip.
/// Source: https://html.spec.whatwg.org/#attr-title
/// </summary>
public ValidatedAttributeValue? Title { get; set; }

private protected StandardElement()
{
}

internal abstract void Write(HtmlWriter writer);

/// <summary>
/// Writes global attributes to the provided <see cref="AttributeWriter"/>.
/// </summary>
/// <param name="writer">Receives the attributes.</param>
private protected void Write(AttributeWriter writer)
{
writer.Write(" id"u8, Id);
writer.Write(" title"u8, Title);
}
}
28 changes: 28 additions & 0 deletions HtmlUtilities/Validated/Standardized/Link.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
namespace HtmlUtilities.Validated.Standardized;

/// <summary>
/// The HTML "link" element, from https://html.spec.whatwg.org/#the-link-element.
/// </summary>
public class Link : StandardElement
{
/// <summary>
/// Relationship between the document containing the hyperlink and the destination resource.
/// </summary>
public string? Rel { get; set; }

/// <summary>
/// Address of the hyperlink.
/// </summary>
public string? Href { get; set; }

internal sealed override void Write(HtmlWriter writer)
{
writer.WriteElementSelfClosing("<link>"u8, attributes =>
{
base.Write(attributes);

attributes.Write(" rel"u8, Rel);
attributes.Write(" href"u8, Href);
});
}
}
28 changes: 28 additions & 0 deletions HtmlUtilities/Validated/Standardized/Style.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
namespace HtmlUtilities.Validated.Standardized;

/// <summary>
/// Source: https://html.spec.whatwg.org/#the-style-element
/// </summary>
public class Style : StandardElement
{
/// <summary>
/// Text that gives a conformant style sheet.
/// </summary>
public ValidatedText Content { get; set; }

/// <summary>
/// Applicable media.
/// </summary>
public ValidatedAttributeValue? Media { get; set; }

internal sealed override void Write(HtmlWriter writer)
{
writer.WriteElement("<style>"u8, attributes =>
{
Write(attributes);
attributes.Write(writer.cspNonce);

attributes.Write(" media"u8, Media);
}, children => children.WriteText(Content));
}
}
2 changes: 1 addition & 1 deletion HtmlUtilities/Validated/ValidatedElement.cs
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ public ValidatedElement(ValidatedElementName name, params ValidatedAttribute[]?
}

// Internal fast path for known-safe tag pairs.
internal ValidatedElement(byte[] start, byte[] end)
internal ValidatedElement(ReadOnlyMemory<byte> start, ReadOnlyMemory<byte> end)
{
this.start = start;
this.end = end;
Expand Down
3 changes: 2 additions & 1 deletion HtmlUtilities/Validated/ValidatedText.cs
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ internal static void Validate(ReadOnlySpan<char> text, ref ArrayBuilder<byte> wr
foreach (var codePoint in CodePoint.GetEnumerable(text))
{
var categories = codePoint.InfraCategories;
if ((categories & CodePointInfraCategory.AsciiWhitespace) == 0 && (categories & (CodePointInfraCategory.Surrogate | CodePointInfraCategory.Control)) != 0)
if ((categories & CodePointInfraCategory.AsciiWhitespace) == 0 &&
(categories & (CodePointInfraCategory.Surrogate | CodePointInfraCategory.Control | CodePointInfraCategory.NonCharacter)) != 0)
continue;

switch (codePoint.Value)
Expand Down

0 comments on commit a8e8360

Please sign in to comment.