Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix issue #242 Malformed URL validator for attribute 'href' in tag A #245

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/Pass/StandardFixPass.php
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ class StandardFixPass extends BasePass

protected $remove_attributes_for_codes = [
ValidationErrorCode::INVALID_URL_PROTOCOL,
ValidationErrorCode::INVALID_URL_HOST,
ValidationErrorCode::INVALID_URL,
ValidationErrorCode::INVALID_ATTR_VALUE,
ValidationErrorCode::DISALLOWED_ATTR,
Expand Down
5 changes: 5 additions & 0 deletions src/Spec/validator-generated.php
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,7 @@ class ValidationErrorCode {
const INVALID_PROPERTY_VALUE_IN_ATTR_VALUE = 'INVALID_PROPERTY_VALUE_IN_ATTR_VALUE';
const MISSING_URL = 'MISSING_URL';
const INVALID_URL = 'INVALID_URL';
const INVALID_URL_HOST = 'INVALID_URL_HOST';
const INVALID_URL_PROTOCOL = 'INVALID_URL_PROTOCOL';
const DISALLOWED_RELATIVE_URL = 'DISALLOWED_RELATIVE_URL';
const DISALLOWED_PROPERTY_IN_ATTR_VALUE = 'DISALLOWED_PROPERTY_IN_ATTR_VALUE';
Expand Down Expand Up @@ -5270,6 +5271,10 @@ public static function createValidationRules() {
$o_1287->code = ValidationErrorCode::CSS_SYNTAX_INVALID_ATTR_SELECTOR;
$o_1287->format = 'CSS syntax error in tag \'%1\' - invalid attribute selector.';
$o_0->error_formats[] = $o_1287;
$o_1288 = new ErrorFormat();
$o_1288->code = ValidationErrorCode::INVALID_URL_HOST;
$o_1288->format = 'Invalid URL host \'%3\' for attribute \'%1\' in tag \'%2\'.';
$o_0->error_formats[] = $o_1288;
return $o_0;
}
}
Expand Down
11 changes: 10 additions & 1 deletion src/Validate/ParsedUrlSpec.php
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,15 @@ public function validateUrlAndProtocol($adapter, Context $context, $url, TagSpec
$adapter->disallowedRelativeUrl($context, $url, $tagspec, $validation_result, $line_delta);
return;
}

if (!empty($url_components['host'])) {
$host = $url_components['host'];
// valid host using rfc952 with ipv6 and ipv4 ips and support utf-8 hosts
if (preg_match("/^(((?:(?:[a-z0-9\\x{00a1}-\\x{ffff}](?:-)*)*(?:[a-z0-9\\x{00a1}-\\x{ffff}])+)(?:\\.(?:[a-z0-9\\x{00a1}-\\x{ffff}](?:-)*)*(?:[a-z0-9\\x{00a1}-\\x{ffff}])+)*(?:\\.(?:[a-z0-9\\x{00a1}-\\x{ffff}]){2,}))|\\[(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))\\]|(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5]))$/u", $host) !== 1) {
$adapter->invalidUrlHost($context, $host, $tagspec, $validation_result, $line_delta);
return;
}
}
}

}
}
13 changes: 13 additions & 0 deletions src/Validate/ParsedUrlSpecAttrErrorAdapter.php
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,19 @@ public function invalidUrl(Context $context, $url, TagSpec $tagspec, SValidation
[$this->attr_name, ParsedTagSpec::getTagSpecName($tagspec), $url], $tagspec->spec_url, $result, $this->attr_name);
}

/**
* @param Context $context
* @param string $uri_host
* @param TagSpec $tagspec
* @param SValidationResult $result
* @param int $line_delta
*/
public function invalidUrlHost(Context $context, $uri_host, TagSpec $tagspec, SValidationResult $result, $line_delta = 0)
{
$context->addError(ValidationErrorCode::INVALID_URL_HOST,
[$this->attr_name, ParsedTagSpec::getTagSpecName($tagspec), $uri_host], $tagspec->spec_url, $result, $this->attr_name);
}

/**
* @param Context $context
* @param string $uri_scheme
Expand Down
1 change: 1 addition & 0 deletions src/Validate/RenderValidationResult.php
Original file line number Diff line number Diff line change
Expand Up @@ -475,6 +475,7 @@ public function categorizeError(ValidationError $error)
if ((in_array($error->code, [ValidationErrorCode::MISSING_URL,
ValidationErrorCode::INVALID_URL,
ValidationErrorCode::INVALID_URL_PROTOCOL,
ValidationErrorCode::INVALID_URL_HOST,
ValidationErrorCode::DISALLOWED_RELATIVE_URL]))
) {
if (isset($error->params[1]) && strpos($error->params[1], 'amp-') === 0) {
Expand Down
2 changes: 2 additions & 0 deletions src/Validate/SValidationResult.php
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,8 @@ public static function specificity($code)
return 101;
case ValidationErrorCode::DEPRECATED_TAG:
return 102;
case ValidationErrorCode::INVALID_URL_HOST:
return 103;
default:
throw new \Exception('Unknown error code');
}
Expand Down
4 changes: 2 additions & 2 deletions tests/test-data/full-html/urls.html
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@
<a href="foo">Valid URL</a>
<a href="/bar">Valid URL</a>
<a href="#foobar">Valid URL</a>
<a href="https://⚡">Valid URL</a>
<a href="https://">Valid URL</a>
<a href="https://⚡">Invalid URL host</a>
<a href="https://">Malformed URL</a>
<a href="HtTpS://Google.com/">Valid URL</a>
<a href="javascript:alert('boo')">Invalid protocol</a>
<a href="JavaScript:alert('boo')">Invalid protocol</a>
Expand Down
13 changes: 9 additions & 4 deletions tests/test-data/full-html/urls.html.out
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@
<a href="foo">Valid URL</a>
<a href="/bar">Valid URL</a>
<a href="#foobar">Valid URL</a>
<a href="https://⚡">Valid URL</a>
<a>Valid URL</a>
<a>Invalid URL host</a>
<a>Malformed URL</a>
<a href="HtTpS://Google.com/">Valid URL</a>
<a>Invalid protocol</a>
<a>Invalid protocol</a>
Expand Down Expand Up @@ -121,8 +121,8 @@ Line 32: <a href=" http://google.com/ ">Valid URL</a>
Line 33: <a href="foo">Valid URL</a>
Line 34: <a href="/bar">Valid URL</a>
Line 35: <a href="#foobar">Valid URL</a>
Line 36: <a href="https://⚡">Valid URL</a>
Line 37: <a href="https://">Valid URL</a>
Line 36: <a href="https://⚡">Invalid URL host</a>
Line 37: <a href="https://">Malformed URL</a>
Line 38: <a href="HtTpS://Google.com/">Valid URL</a>
Line 39: <a href="javascript:alert('boo')">Invalid protocol</a>
Line 40: <a href="JavaScript:alert('boo')">Invalid protocol</a>
Expand Down Expand Up @@ -185,6 +185,11 @@ FAIL
ACTION TAKEN: link.href attribute was removed due to validation issues.
- FINAL ACTION TAKEN: link tag removed from head as it still does not validate. Could not fix tag validation problems.

<a href="https://⚡"> on line 36
- Invalid URL host '⚡:' for attribute 'href' in tag 'a'.
[code: INVALID_URL_HOST category: DISALLOWED_HTML]
ACTION TAKEN: a.href attribute was removed due to validation issues.

<a href="https://"> on line 37
- Malformed URL 'https://' for attribute 'href' in tag 'a'.
[code: INVALID_URL category: DISALLOWED_HTML]
Expand Down