Skip to content

Commit

Permalink
Add basic validation error logging
Browse files Browse the repository at this point in the history
Based on the draft whatwg/url#502
  • Loading branch information
TRowbotham committed Aug 26, 2022
1 parent 04e172d commit 0ef0c6a
Show file tree
Hide file tree
Showing 29 changed files with 199 additions and 37 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
### Added

- Performance improvements
- Basic validation error logging

### Changed

- Forbid C0 control code points and U+007F DEL code point in non-opaque domain names per [whatwg/url#685](https://github.com/whatwg/url/pull/685)
Expand Down
1 change: 1 addition & 0 deletions composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
"ext-json": "*",
"ext-mbstring": "*",
"brick/math": "^0.8.13 || ^0.9",
"psr/log": "^3.0",
"rowbot/idna": "^0.1.5"
},
"require-dev": {
Expand Down
4 changes: 4 additions & 0 deletions phpstan.neon
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,7 @@ parameters:
path: src/String/Utf8String.php

# Note to self: You can't escape single quotes in neon. Another possible alternative would be to replace single quotes with \x27.
-
message: '#Instanceof between Psr\\Log\\LoggerInterface and Psr\\Log\\LoggerInterface will always evaluate to true\.#'
path: src/URL.php
count: 1
25 changes: 23 additions & 2 deletions src/BasicURLParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,22 @@

namespace Rowbot\URL;

use Psr\Log\LoggerAwareInterface;
use Psr\Log\LoggerAwareTrait;
use Psr\Log\LoggerInterface;
use Rowbot\URL\State\State;
use Rowbot\URL\String\StringBuffer;
use Rowbot\URL\String\USVStringInterface;

class BasicURLParser
class BasicURLParser implements LoggerAwareInterface
{
use LoggerAwareTrait;

public function __construct(?LoggerInterface $logger = null)
{
$this->logger = $logger;
}

/**
* The parser can parse both absolute and relative URLs. If a relative URL is given, a base URL must also be given
* so that an absolute URL can be resolved. It can also parse individual parts of a URL when the default starting
Expand Down Expand Up @@ -50,21 +60,32 @@ public function parse(

if ($count !== 0) {
// Validation error.
$this->logger?->notice('unexpected-c0-control-or-space');
}
}

$input = $input->replaceRegex('/[\x09\x0A\x0D]+/u', '', -1, $count);

if ($count !== 0) {
// Validation error.
$this->logger?->notice('unexpected-ascii-tab-or-newline');
}

$iter = $input->getIterator();
$iter->rewind();
// length + imaginary eof character
$length = $input->length() + 1;
$buffer = new StringBuffer();
$context = new ParserContext($input, $iter, $buffer, $url, $base, $stateOverride, $encodingOverride);
$context = new ParserContext(
$input,
$iter,
$buffer,
$url,
$base,
$stateOverride,
$encodingOverride,
$this->logger
);

do {
$status = $context->state->handle($context, $iter->current());
Expand Down
34 changes: 26 additions & 8 deletions src/Component/Host/HostParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
namespace Rowbot\URL\Component\Host;

use Rowbot\Idna\Idna;
use Rowbot\URL\ParserContext;
use Rowbot\URL\String\CodePoint;
use Rowbot\URL\String\EncodeSet;
use Rowbot\URL\String\PercentEncodeTrait;
Expand Down Expand Up @@ -34,37 +35,46 @@ class HostParser
*
* @return \Rowbot\URL\Component\Host\HostInterface|false The returned Host can never be a null host.
*/
public function parse(USVStringInterface $input, bool $isNotSpecial = false): HostInterface|false
{
public function parse(
ParserContext $context,
USVStringInterface $input,
bool $isNotSpecial = false
): HostInterface|false {
if ($input->startsWith('[')) {
if (!$input->endsWith(']')) {
// Validation error.
$context->logger?->warning('unclosed-ipv6-address');

return false;
}

return IPv6AddressParser::parse($input->substr(1, -1));
return IPv6AddressParser::parse($context, $input->substr(1, -1));
}

if ($isNotSpecial) {
return $this->parseOpaqueHost($input);
return $this->parseOpaqueHost($context, $input);
}

assert(!$input->isEmpty());
$domain = rawurldecode((string) $input);
$asciiDomain = $this->domainToAscii($domain);
$asciiDomain = $this->domainToAscii($context, $domain);

if ($asciiDomain === false) {
// Validation error.
$context->logger?->warning('domain-to-ascii-failure');

return false;
}

if ($asciiDomain->matches('/[' . self::FORBIDDEN_DOMAIN_CODEPOINTS . ']/u')) {
// Validation error.
$context->logger?->warning('domain-forbidden-code-point');

return false;
}

if (IPv4AddressParser::endsInIPv4Number($asciiDomain)) {
return IPv4AddressParser::parse($asciiDomain);
return IPv4AddressParser::parse($context, $asciiDomain);
}

return $asciiDomain;
Expand All @@ -73,7 +83,7 @@ public function parse(USVStringInterface $input, bool $isNotSpecial = false): Ho
/**
* @see https://url.spec.whatwg.org/#concept-domain-to-ascii
*/
private function domainToAscii(string $domain, bool $beStrict = false): StringHost|false
private function domainToAscii(ParserContext $context, string $domain, bool $beStrict = false): StringHost|false
{
$result = Idna::toAscii($domain, [
'CheckHyphens' => false,
Expand All @@ -87,11 +97,15 @@ private function domainToAscii(string $domain, bool $beStrict = false): StringHo

if ($convertedDomain === '') {
// Validation error.
$context->logger?->warning('domain-to-ascii-empty-domain-failure');

return false;
}

if ($result->hasErrors()) {
// Validation error.
$context->logger?->warning('domain-to-ascii-failure');

return false;
}

Expand All @@ -103,20 +117,24 @@ private function domainToAscii(string $domain, bool $beStrict = false): StringHo
*
* @see https://url.spec.whatwg.org/#concept-opaque-host-parser
*/
private function parseOpaqueHost(USVStringInterface $input): HostInterface|false
private function parseOpaqueHost(ParserContext $context, USVStringInterface $input): HostInterface|false
{
if ($input->matches('/[' . self::FORBIDDEN_HOST_CODEPOINTS . ']/u')) {
// Validation error.
$context->logger?->warning('opaque-host-forbidden-code-point');

return false;
}

foreach ($input as $i => $codePoint) {
if (!CodePoint::isUrlCodePoint($codePoint) && $codePoint !== '%') {
// Validation error.
$context->logger?->notice('invalid-url-code-point');
}

if ($codePoint === '%' && !$input->substr($i + 1)->startsWithTwoAsciiHexDigits()) {
// Validation error.
$context->logger?->notice('unescaped-percent-sign');
}
}

Expand Down
12 changes: 11 additions & 1 deletion src/Component/Host/IPv4AddressParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
namespace Rowbot\URL\Component\Host;

use Rowbot\URL\Component\Host\Math\NumberFactory;
use Rowbot\URL\ParserContext;
use Rowbot\URL\String\CodePoint;
use Rowbot\URL\String\USVStringInterface;

Expand All @@ -18,7 +19,7 @@
*/
class IPv4AddressParser
{
public static function parse(USVStringInterface $input): IPv4Address|false
public static function parse(ParserContext $context, USVStringInterface $input): IPv4Address|false
{
// 1. Let validationError be false.
//
Expand All @@ -44,6 +45,8 @@ public static function parse(USVStringInterface $input): IPv4Address|false

// 4. If parts’s size is greater than 4, validation error, return failure.
if ($count > 4) {
$context->logger?->warning('ipv4-too-many-parts');

return false;
}

Expand All @@ -57,6 +60,8 @@ public static function parse(USVStringInterface $input): IPv4Address|false

// 6.2. If result is failure, validation error, return failure.
if ($result === false) {
$context->logger?->warning('ipv4-invalid-radix-digit');

return false;
}

Expand All @@ -75,11 +80,14 @@ public static function parse(USVStringInterface $input): IPv4Address|false
// And therefore error reporting resumes.
if ($validationError) {
// Validation error.
$context->logger?->notice('unexpected-non-decimal-number');
}

// 8. If any item in numbers is greater than 255, validation error.
foreach ($numbers as $number) {
if ($number->isGreaterThan(255)) {
$context->logger?->warning('ipv4-part-out-of-range');

break;
}
}
Expand All @@ -98,6 +106,8 @@ public static function parse(USVStringInterface $input): IPv4Address|false
// 10. If the last item in numbers is greater than or equal to 256 ** (5 − numbers’s size), validation error,
// return failure.
if ($numbers[$size - 1]->isGreaterThanOrEqualTo($limit)) {
$context->logger?->warning('ipv4-part-out-of-range');

return false;
}

Expand Down
32 changes: 30 additions & 2 deletions src/Component/Host/IPv6AddressParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

namespace Rowbot\URL\Component\Host;

use Rowbot\URL\ParserContext;
use Rowbot\URL\String\CodePoint;
use Rowbot\URL\String\StringIteratorInterface;
use Rowbot\URL\String\USVStringInterface;
Expand All @@ -16,7 +17,7 @@
*/
class IPv6AddressParser
{
public static function parse(USVStringInterface $input): IPv6Address|false
public static function parse(ParserContext $context, USVStringInterface $input): IPv6Address|false
{
// 1. Let address be a new IPv6 address whose IPv6 pieces are all 0.
$address = [0, 0, 0, 0, 0, 0, 0, 0];
Expand All @@ -35,6 +36,8 @@ public static function parse(USVStringInterface $input): IPv6Address|false
if ($iter->current() === ':') {
// 5.1. If remaining does not start with U+003A (:), validation error, return failure.
if ($iter->peek() !== ':') {
$context->logger?->warning('invalid-compressed-ipv6-address');

return false;
}

Expand All @@ -49,13 +52,17 @@ public static function parse(USVStringInterface $input): IPv6Address|false
while ($iter->valid()) {
// 6.1. If pieceIndex is 8, validation error, return failure.
if ($pieceIndex === 8) {
$context->logger?->warning('ipv6-too-many-pieces');

return false;
}

// 6.2. If c is U+003A (:), then:
if ($iter->current() === ':') {
// 6.2.1. If compress is non-null, validation error, return failure.
if ($compress !== null) {
$context->logger?->warning('ipv6-multiple-compression');

return false;
}

Expand Down Expand Up @@ -84,6 +91,8 @@ public static function parse(USVStringInterface $input): IPv6Address|false
if ($iter->current() === '.') {
// 6.5.1. If length is 0, validation error, return failure.
if ($length === 0) {
$context->logger?->warning('ipv4-in-ipv6-empty-part');

return false;
}

Expand All @@ -92,10 +101,12 @@ public static function parse(USVStringInterface $input): IPv6Address|false

// 6.5.3. If pieceIndex is greater than 6, validation error, return failure.
if ($pieceIndex > 6) {
$context->logger?->warning('ipv4-in-ipv6-too-many-pieces');

return false;
}

$result = self::parseIPv4Address($iter, $address, $pieceIndex);
$result = self::parseIPv4Address($context, $iter, $address, $pieceIndex);

if ($result === false) {
return false;
Expand All @@ -114,11 +125,15 @@ public static function parse(USVStringInterface $input): IPv6Address|false

// 6.2.2. If c is the EOF code point, validation error, return failure.
if (!$iter->valid()) {
$context->logger?->warning('ipv6-unexpected-eof');

return false;
}

// 6.7. Otherwise, if c is not the EOF code point, validation error, return failure.
} elseif ($iter->valid()) {
$context->logger?->warning('ipv6-unexpected-delimiter');

return false;
}

Expand Down Expand Up @@ -147,6 +162,8 @@ public static function parse(USVStringInterface $input): IPv6Address|false

// Otherwise, if compress is null and pieceIndex is not 8, validation error, return failure.
} elseif ($pieceIndex !== 8) {
$context->logger?->warning('ipv6-too-few-pieces');

return false;
}

Expand All @@ -160,6 +177,7 @@ public static function parse(USVStringInterface $input): IPv6Address|false
* @return array{0: list<int>, 1: int}|false
*/
private static function parseIPv4Address(
ParserContext $context,
StringIteratorInterface $iter,
array $address,
int $pieceIndex
Expand All @@ -177,6 +195,8 @@ private static function parseIPv4Address(
// 6.5.5.2.2 Otherwise, validation error, return failure.
if ($iter->current() !== '.' || $numbersSeen >= 4) {
// Validation error.
$context->logger?->warning('ipv4-in-ipv6-too-many-parts');

return false;
}

Expand All @@ -189,6 +209,8 @@ private static function parseIPv4Address(
// 6.5.5.3. If c is not an ASCII digit, validation error, return failure.
if (strpbrk($current, CodePoint::ASCII_DIGIT_MASK) !== $current) {
// Validation error.
$context->logger?->warning('ipv4-in-ipv6-unexpected-code-point');

return false;
}

Expand All @@ -204,6 +226,8 @@ private static function parseIPv4Address(
// Otherwise, if ipv4Piece is 0, validation error, return failure.
} elseif ($ipv4Piece === 0) {
// Validation error.
$context->logger?->warning('ipv4-in-ipv6-invalid-first-part');

return false;

// Otherwise, set ipv4Piece to ipv4Piece × 10 + number.
Expand All @@ -214,6 +238,8 @@ private static function parseIPv4Address(
// 6.5.5.4.3. If ipv4Piece is greater than 255, validation error, return failure.
if ($ipv4Piece > 255) {
// Validation error.
$context->logger?->warning('ipv4-in-ipv6-part-out-of-range');

return false;
}

Expand All @@ -238,6 +264,8 @@ private static function parseIPv4Address(
// 6.5.6. If numbersSeen is not 4, validation error, return failure.
if ($numbersSeen !== 4) {
// Validation error.
$context->logger?->warning('ipv4-in-ipv6-too-few-parts');

return false;
}

Expand Down
Loading

0 comments on commit 0ef0c6a

Please sign in to comment.