From 79f35e832b28e405f5b5a42ccf838b222694bd19 Mon Sep 17 00:00:00 2001 From: Yanick Witschi Date: Thu, 9 Nov 2023 16:43:12 +0100 Subject: [PATCH] Updated to PHP 8 features using Rector <3 --- src/BaseUriCollection.php | 2 +- src/CrawlUri.php | 50 ++++-------- src/Escargot.php | 100 ++++++----------------- src/Queue/DoctrineQueue.php | 6 +- src/Queue/InMemoryQueue.php | 4 +- src/Queue/LazyQueue.php | 25 ++---- src/Subscriber/HtmlCrawlerSubscriber.php | 6 +- src/Subscriber/RobotsSubscriber.php | 14 ++-- src/SubscriberLogger.php | 18 ++-- tests/EscargotTest.php | 10 +-- tests/Queue/DoctrineQueueTest.php | 5 +- tests/Queue/LazyQueueTest.php | 5 +- tests/Scenario/MockResponseFactory.php | 2 +- tests/Scenario/Scenario.php | 44 ++-------- 14 files changed, 83 insertions(+), 208 deletions(-) diff --git a/src/BaseUriCollection.php b/src/BaseUriCollection.php index 5617d4d..5b323a3 100644 --- a/src/BaseUriCollection.php +++ b/src/BaseUriCollection.php @@ -19,7 +19,7 @@ final class BaseUriCollection implements \IteratorAggregate, \Countable /** * @var array */ - private $baseUris = []; + private array $baseUris = []; /** * @param array $baseUris diff --git a/src/CrawlUri.php b/src/CrawlUri.php index b1bb38a..a75af57 100644 --- a/src/CrawlUri.php +++ b/src/CrawlUri.php @@ -14,43 +14,23 @@ use Psr\Http\Message\UriInterface; -final class CrawlUri +final class CrawlUri implements \Stringable { - /** - * @var UriInterface - */ - private $uri; - - /** - * @var int - */ - private $level; - - /** - * @var bool - */ - private $processed = false; - - /** - * @var bool - */ - private $wasMarkedProcessed = false; - - /** - * @var UriInterface|null - */ - private $foundOn; - - /** - * @var array - */ - private $tags = []; - - public function __construct(UriInterface $uri, int $level, bool $processed = false, UriInterface|null $foundOn = null) - { + private readonly UriInterface $uri; + + private bool $wasMarkedProcessed = false; + + private UriInterface|null $foundOn = null; + + private array $tags = []; + + public function __construct( + UriInterface $uri, + private readonly int $level, + private bool $processed = false, + UriInterface|null $foundOn = null, + ) { $this->uri = self::normalizeUri($uri); - $this->level = $level; - $this->processed = $processed; if (null !== $foundOn) { $this->foundOn = self::normalizeUri($foundOn); diff --git a/src/Escargot.php b/src/Escargot.php index a08bc27..439e1d1 100644 --- a/src/Escargot.php +++ b/src/Escargot.php @@ -37,118 +37,72 @@ final class Escargot { private const DEFAULT_USER_AGENT = 'terminal42/escargot'; - /** - * @var QueueInterface - */ - private $queue; - - /** - * @var ClockInterface - */ - private $clock; + private ClockInterface $clock; - /** - * @var string - */ - private $jobId; + private HttpClientInterface|null $client = null; - /** - * @var BaseUriCollection - */ - private $baseUris; - - /** - * @var HttpClientInterface|null - */ - private $client; - - /** - * @var LoggerInterface|null - */ - private $logger; + private LoggerInterface|null $logger = null; /** * @var array */ - private $subscribers = []; + private array $subscribers = []; - /** - * @var string - */ - private $userAgent; + private string $userAgent; /** * Maximum number of requests * Escargot is going to * execute. * 0 means no limit. - * - * @var int */ - private $maxRequests = 0; + private int $maxRequests = 0; /** * Maximum number of duration in seconds * Escargot is going to work on requests. * * 0 means no limit. - * - * @var int */ - private $maxDurationInSeconds = 0; + private int $maxDurationInSeconds = 0; /** * Request delay in microseconds. * 0 means no delay. - * - * @var int */ - private $requestDelay = 0; + private int $requestDelay = 0; /** * Maximum concurrent requests * that are being sent. - * - * @var int */ - private $concurrency = 10; + private int $concurrency = 10; /** * Maximum depth Escargot * is going to crawl. * 0 means no limit. - * - * @var int */ - private $maxDepth = 0; + private int $maxDepth = 0; - /** - * @var int - */ - private $requestsSent = 0; + private int $requestsSent = 0; - /** - * @var array - */ - private $runningRequests = []; + private array $runningRequests = []; /** * Keeps track of all the decisions * for all the subscribers for * every CrawlUri instance. - * - * @var array */ - private $decisionMap = ['shouldRequest' => [], 'needsContent' => []]; + private array $decisionMap = ['shouldRequest' => [], 'needsContent' => []]; private \DateTimeImmutable $startTime; - private function __construct(QueueInterface $queue, string $jobId, BaseUriCollection $baseUris) - { - $this->queue = $queue; - $this->jobId = $jobId; - $this->baseUris = $baseUris; - + private function __construct( + private readonly QueueInterface $queue, + private readonly string $jobId, + private readonly BaseUriCollection $baseUris, + ) { $this->clock = new NativeClock(); $this->userAgent = self::DEFAULT_USER_AGENT; } @@ -567,6 +521,7 @@ private function processResponseChunk(ResponseInterface $response, ChunkInterfac */ private function prepareResponses(): array { + $response = null; $responses = []; $hasMaxRequestsReached = $this->isMaxRequestsReached(); @@ -622,7 +577,7 @@ private function prepareResponses(): array // Request delay if (0 !== $this->requestDelay) { - $this->clock->sleep($this->requestDelay / 1000000); + $this->clock->sleep($this->requestDelay / 1_000_000); } try { @@ -698,16 +653,11 @@ private function handleException(ExceptionInterface $exception, CrawlUri $crawlU continue; } - switch (true) { - case $exception instanceof TransportExceptionInterface: - $subscriber->onTransportException($crawlUri, $exception, $response); - break; - case $exception instanceof HttpExceptionInterface: - $subscriber->onHttpException($crawlUri, $exception, $response, $chunk); - break; - default: - throw new \RuntimeException('Unknown exception type!'); - } + match (true) { + $exception instanceof TransportExceptionInterface => $subscriber->onTransportException($crawlUri, $exception, $response), + $exception instanceof HttpExceptionInterface => $subscriber->onHttpException($crawlUri, $exception, $response, $chunk), + default => throw new \RuntimeException('Unknown exception type!'), + }; } } diff --git a/src/Queue/DoctrineQueue.php b/src/Queue/DoctrineQueue.php index b8c6304..a685015 100644 --- a/src/Queue/DoctrineQueue.php +++ b/src/Queue/DoctrineQueue.php @@ -23,9 +23,9 @@ final class DoctrineQueue implements QueueInterface { public function __construct( - private Connection $connection, - private \Closure $jobIdGenerator, - private string $tableName = 'escargot', + private readonly Connection $connection, + private readonly \Closure $jobIdGenerator, + private readonly string $tableName = 'escargot', ) { } diff --git a/src/Queue/InMemoryQueue.php b/src/Queue/InMemoryQueue.php index ec7a1cc..203f3b8 100644 --- a/src/Queue/InMemoryQueue.php +++ b/src/Queue/InMemoryQueue.php @@ -21,12 +21,12 @@ final class InMemoryQueue implements QueueInterface /** * @var array> */ - private $baseUris = []; + private array $baseUris = []; /** * @var array> */ - private $queue = []; + private array $queue = []; public function createJobId(BaseUriCollection $baseUris): string { diff --git a/src/Queue/LazyQueue.php b/src/Queue/LazyQueue.php index 17b9552..ef94d2c 100644 --- a/src/Queue/LazyQueue.php +++ b/src/Queue/LazyQueue.php @@ -18,30 +18,17 @@ final class LazyQueue implements QueueInterface { - /** - * @var QueueInterface - */ - private $primaryQueue; - - /** - * @var QueueInterface - */ - private $secondaryQueue; - /** * @var array */ - private $jobIdMapper = []; + private array $jobIdMapper = []; - /** - * @var int - */ - private $toSkip = 0; + private int $toSkip = 0; - public function __construct(QueueInterface $primaryQueue, QueueInterface $secondaryQueue) - { - $this->primaryQueue = $primaryQueue; - $this->secondaryQueue = $secondaryQueue; + public function __construct( + private readonly QueueInterface $primaryQueue, + private readonly QueueInterface $secondaryQueue, + ) { } public function createJobId(BaseUriCollection $baseUris): string diff --git a/src/Subscriber/HtmlCrawlerSubscriber.php b/src/Subscriber/HtmlCrawlerSubscriber.php index b602c69..f919f19 100644 --- a/src/Subscriber/HtmlCrawlerSubscriber.php +++ b/src/Subscriber/HtmlCrawlerSubscriber.php @@ -84,7 +84,7 @@ private function addNewUriToQueueFromNode(CrawlUri $crawlUri, string $uri, \DOME try { $uri = HttpUriFactory::create($uri); - } catch (\InvalidArgumentException $e) { + } catch (\InvalidArgumentException) { $this->logWithCrawlUri( $crawlUri, LogLevel::DEBUG, @@ -119,8 +119,8 @@ private function addNewUriToQueueFromNode(CrawlUri $crawlUri, string $uri, \DOME // Add all data attributes as tags for e.g. other subscribers if ($node->hasAttributes()) { foreach ($node->attributes as $attribute) { - if (str_starts_with($attribute->name, 'data-')) { - $newCrawlUri->addTag(substr($attribute->name, 5)); + if (str_starts_with((string) $attribute->name, 'data-')) { + $newCrawlUri->addTag(substr((string) $attribute->name, 5)); } } } diff --git a/src/Subscriber/RobotsSubscriber.php b/src/Subscriber/RobotsSubscriber.php index 6e1ff44..4a8bf81 100644 --- a/src/Subscriber/RobotsSubscriber.php +++ b/src/Subscriber/RobotsSubscriber.php @@ -47,7 +47,7 @@ final class RobotsSubscriber implements SubscriberInterface, EscargotAwareInterf /** * @var array */ - private $robotsTxtCache = []; + private array $robotsTxtCache = []; public function shouldRequest(CrawlUri $crawlUri): string { @@ -176,7 +176,7 @@ private function getRobotsTxtFile(CrawlUri $crawlUri): File|null try { $robotsTxtContent = $response->getContent(); - } catch (HttpExceptionInterface $e) { + } catch (HttpExceptionInterface) { return $this->robotsTxtCache[(string) $robotsTxtUri] = null; } @@ -184,7 +184,7 @@ private function getRobotsTxtFile(CrawlUri $crawlUri): File|null $parser->setSource($robotsTxtContent); return $this->robotsTxtCache[(string) $robotsTxtUri] = $parser->getFile(); - } catch (TransportExceptionInterface $exception) { + } catch (TransportExceptionInterface) { return $this->robotsTxtCache[(string) $robotsTxtUri] = null; } } @@ -209,7 +209,7 @@ private function handleSitemap(CrawlUri $crawlUri, File $robotsTxt): void foreach ($robotsTxt->getNonGroupDirectives()->getByField('sitemap')->getDirectives() as $directive) { try { $sitemapUri = HttpUriFactory::create($directive->getValue()->get()); - } catch (\InvalidArgumentException $e) { + } catch (\InvalidArgumentException) { $this->logWithCrawlUri( $crawlUri, LogLevel::DEBUG, @@ -239,14 +239,14 @@ private function extractUrisFromSitemap(CrawlUri $sitemapUri, string $content): } set_error_handler( - static function ($errno, $errstr): void { + static function ($errno, $errstr): never { throw new \Exception($errstr, $errno); }, ); try { $urls = new \SimpleXMLElement($content); - } catch (\Exception $exception) { + } catch (\Exception) { return; } finally { restore_error_handler(); @@ -258,7 +258,7 @@ static function ($errno, $errstr): void { // Add it to the queue if not present already try { $uri = HttpUriFactory::create((string) $url->loc); - } catch (\InvalidArgumentException $e) { + } catch (\InvalidArgumentException) { $this->logWithCrawlUri( $sitemapUri, LogLevel::DEBUG, diff --git a/src/SubscriberLogger.php b/src/SubscriberLogger.php index 5c6e98e..94d656f 100644 --- a/src/SubscriberLogger.php +++ b/src/SubscriberLogger.php @@ -17,25 +17,17 @@ class SubscriberLogger extends AbstractLogger { - /** - * @var LoggerInterface - */ - private $decorated; + private string|null $subscriberClass = null; - /** - * @var string - */ - private $subscriberClass; - - public function __construct(LoggerInterface $decorated, string $subscriberClass) - { + public function __construct( + private readonly LoggerInterface $decorated, + string $subscriberClass, + ) { // Anonymous class names contain null bytes so let's standardize them a little if (str_contains($subscriberClass, '@anonymous')) { $subscriberClass = 'class@anonymous:'.basename($subscriberClass); $subscriberClass = preg_replace('/\.php(.+)$/', '', $subscriberClass); } - - $this->decorated = $decorated; $this->subscriberClass = $subscriberClass; } diff --git a/tests/EscargotTest.php b/tests/EscargotTest.php index b541385..64c9c80 100644 --- a/tests/EscargotTest.php +++ b/tests/EscargotTest.php @@ -79,10 +79,8 @@ public function testWithers(): void ->expects($this->once()) ->method('setLogger') ->with($this->callback( - static function (LoggerInterface $logger) { - // Must be decorated - return $logger instanceof SubscriberLogger; - }, + // Must be decorated + static fn (LoggerInterface $logger) => $logger instanceof SubscriberLogger, )) ; @@ -249,7 +247,7 @@ public function testCrawlAsWebCrawler(\Closure $responseFactory, array $expected $escargot = Escargot::create($baseUris, $queue); $escargot = $escargot->withHttpClient(new MockHttpClient($responseFactory)); - if (0 !== \count($options)) { + if (0 !== (is_countable($options) ? \count($options) : 0)) { if (\array_key_exists('max_requests', $options)) { $escargot = $escargot->withMaxRequests((int) $options['max_requests']); } @@ -324,7 +322,7 @@ private function getSearchIndexSubscriber(): SubscriberInterface use LoggerAwareTrait; use SubscriberLoggerTrait; - private $uris = []; + private array $uris = []; public function getUris(): array { diff --git a/tests/Queue/DoctrineQueueTest.php b/tests/Queue/DoctrineQueueTest.php index a8b7f74..aca9a16 100644 --- a/tests/Queue/DoctrineQueueTest.php +++ b/tests/Queue/DoctrineQueueTest.php @@ -22,10 +22,7 @@ class DoctrineQueueTest extends AbstractQueueTest { - /** - * @var DoctrineQueue - */ - private $queue; + private DoctrineQueue $queue; protected function setUp(): void { diff --git a/tests/Queue/LazyQueueTest.php b/tests/Queue/LazyQueueTest.php index 2bd3937..5c97128 100644 --- a/tests/Queue/LazyQueueTest.php +++ b/tests/Queue/LazyQueueTest.php @@ -23,10 +23,7 @@ class LazyQueueTest extends AbstractQueueTest { - /** - * @var DoctrineQueue - */ - private $queue; + private DoctrineQueue $queue; protected function setUp(): void { diff --git a/tests/Scenario/MockResponseFactory.php b/tests/Scenario/MockResponseFactory.php index 77f06ab..91e2ef1 100644 --- a/tests/Scenario/MockResponseFactory.php +++ b/tests/Scenario/MockResponseFactory.php @@ -35,7 +35,7 @@ public static function createFromString(string $contents): MockResponse } if (isset($mappedHeaders['x-escargottest-info'])) { - $info = array_merge($info, json_decode($mappedHeaders['x-escargottest-info'][0], true)); + $info = array_merge($info, json_decode($mappedHeaders['x-escargottest-info'][0], true, 512, JSON_THROW_ON_ERROR)); unset($mappedHeaders['x-escargottest-info']); } diff --git a/tests/Scenario/Scenario.php b/tests/Scenario/Scenario.php index 85b371b..c3ec5a6 100644 --- a/tests/Scenario/Scenario.php +++ b/tests/Scenario/Scenario.php @@ -16,49 +16,23 @@ class Scenario { - /** - * @var string - */ - private $name; - - /** - * @var string - */ - private $path; + private array|null $responses = null; - /** - * @var array - */ - private $responses; - - /** - * @var array - */ - private $requests = []; + private array $requests = []; - /** - * @var array - */ - private $logs = []; + private array $logs = []; - /** - * @var array - */ - private $options = []; + private array $options = []; - /** - * @var string - */ - private $description = 'No scenario description given'; + private string $description = 'No scenario description given'; /** * Scenario constructor. */ - public function __construct(string $name, string $path) - { - $this->name = $name; - $this->path = $path; - + public function __construct( + private readonly string $name, + private readonly string $path, + ) { $this->build(); }