Skip to content

Commit

Permalink
Updated to PHP 8 features using Rector <3
Browse files Browse the repository at this point in the history
  • Loading branch information
Toflar committed Nov 9, 2023
1 parent 2b7b625 commit 79f35e8
Show file tree
Hide file tree
Showing 14 changed files with 83 additions and 208 deletions.
2 changes: 1 addition & 1 deletion src/BaseUriCollection.php
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ final class BaseUriCollection implements \IteratorAggregate, \Countable
/**
* @var array<UriInterface>
*/
private $baseUris = [];
private array $baseUris = [];

/**
* @param array<UriInterface> $baseUris
Expand Down
50 changes: 15 additions & 35 deletions src/CrawlUri.php
Original file line number Diff line number Diff line change
Expand Up @@ -14,43 +14,23 @@

use Psr\Http\Message\UriInterface;

final class CrawlUri
final class CrawlUri implements \Stringable
{
/**
* @var UriInterface
*/
private $uri;

/**
* @var int
*/
private $level;

/**
* @var bool
*/
private $processed = false;

/**
* @var bool
*/
private $wasMarkedProcessed = false;

/**
* @var UriInterface|null
*/
private $foundOn;

/**
* @var array
*/
private $tags = [];

public function __construct(UriInterface $uri, int $level, bool $processed = false, UriInterface|null $foundOn = null)
{
private readonly UriInterface $uri;

private bool $wasMarkedProcessed = false;

private UriInterface|null $foundOn = null;

private array $tags = [];

public function __construct(
UriInterface $uri,
private readonly int $level,
private bool $processed = false,
UriInterface|null $foundOn = null,
) {
$this->uri = self::normalizeUri($uri);
$this->level = $level;
$this->processed = $processed;

if (null !== $foundOn) {
$this->foundOn = self::normalizeUri($foundOn);
Expand Down
100 changes: 25 additions & 75 deletions src/Escargot.php
Original file line number Diff line number Diff line change
Expand Up @@ -37,118 +37,72 @@ final class Escargot
{
private const DEFAULT_USER_AGENT = 'terminal42/escargot';

/**
* @var QueueInterface
*/
private $queue;

/**
* @var ClockInterface
*/
private $clock;
private ClockInterface $clock;

/**
* @var string
*/
private $jobId;
private HttpClientInterface|null $client = null;

/**
* @var BaseUriCollection
*/
private $baseUris;

/**
* @var HttpClientInterface|null
*/
private $client;

/**
* @var LoggerInterface|null
*/
private $logger;
private LoggerInterface|null $logger = null;

/**
* @var array<SubscriberInterface>
*/
private $subscribers = [];
private array $subscribers = [];

/**
* @var string
*/
private $userAgent;
private string $userAgent;

/**
* Maximum number of requests
* Escargot is going to
* execute.
* 0 means no limit.
*
* @var int
*/
private $maxRequests = 0;
private int $maxRequests = 0;

/**
* Maximum number of duration in seconds
* Escargot is going to work on requests.
*
* 0 means no limit.
*
* @var int
*/
private $maxDurationInSeconds = 0;
private int $maxDurationInSeconds = 0;

/**
* Request delay in microseconds.
* 0 means no delay.
*
* @var int
*/
private $requestDelay = 0;
private int $requestDelay = 0;

/**
* Maximum concurrent requests
* that are being sent.
*
* @var int
*/
private $concurrency = 10;
private int $concurrency = 10;

/**
* Maximum depth Escargot
* is going to crawl.
* 0 means no limit.
*
* @var int
*/
private $maxDepth = 0;
private int $maxDepth = 0;

/**
* @var int
*/
private $requestsSent = 0;
private int $requestsSent = 0;

/**
* @var array
*/
private $runningRequests = [];
private array $runningRequests = [];

/**
* Keeps track of all the decisions
* for all the subscribers for
* every CrawlUri instance.
*
* @var array
*/
private $decisionMap = ['shouldRequest' => [], 'needsContent' => []];
private array $decisionMap = ['shouldRequest' => [], 'needsContent' => []];

private \DateTimeImmutable $startTime;

private function __construct(QueueInterface $queue, string $jobId, BaseUriCollection $baseUris)
{
$this->queue = $queue;
$this->jobId = $jobId;
$this->baseUris = $baseUris;

private function __construct(
private readonly QueueInterface $queue,
private readonly string $jobId,
private readonly BaseUriCollection $baseUris,
) {
$this->clock = new NativeClock();
$this->userAgent = self::DEFAULT_USER_AGENT;
}
Expand Down Expand Up @@ -567,6 +521,7 @@ private function processResponseChunk(ResponseInterface $response, ChunkInterfac
*/
private function prepareResponses(): array
{
$response = null;
$responses = [];

$hasMaxRequestsReached = $this->isMaxRequestsReached();
Expand Down Expand Up @@ -622,7 +577,7 @@ private function prepareResponses(): array

// Request delay
if (0 !== $this->requestDelay) {
$this->clock->sleep($this->requestDelay / 1000000);
$this->clock->sleep($this->requestDelay / 1_000_000);
}

try {
Expand Down Expand Up @@ -698,16 +653,11 @@ private function handleException(ExceptionInterface $exception, CrawlUri $crawlU
continue;
}

switch (true) {
case $exception instanceof TransportExceptionInterface:
$subscriber->onTransportException($crawlUri, $exception, $response);
break;
case $exception instanceof HttpExceptionInterface:
$subscriber->onHttpException($crawlUri, $exception, $response, $chunk);
break;
default:
throw new \RuntimeException('Unknown exception type!');
}
match (true) {
$exception instanceof TransportExceptionInterface => $subscriber->onTransportException($crawlUri, $exception, $response),
$exception instanceof HttpExceptionInterface => $subscriber->onHttpException($crawlUri, $exception, $response, $chunk),
default => throw new \RuntimeException('Unknown exception type!'),
};
}
}

Expand Down
6 changes: 3 additions & 3 deletions src/Queue/DoctrineQueue.php
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@
final class DoctrineQueue implements QueueInterface
{
public function __construct(
private Connection $connection,
private \Closure $jobIdGenerator,
private string $tableName = 'escargot',
private readonly Connection $connection,
private readonly \Closure $jobIdGenerator,
private readonly string $tableName = 'escargot',
) {
}

Expand Down
4 changes: 2 additions & 2 deletions src/Queue/InMemoryQueue.php
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,12 @@ final class InMemoryQueue implements QueueInterface
/**
* @var array<string, array<UriInterface>>
*/
private $baseUris = [];
private array $baseUris = [];

/**
* @var array<string, array<string, CrawlUri>>
*/
private $queue = [];
private array $queue = [];

public function createJobId(BaseUriCollection $baseUris): string
{
Expand Down
25 changes: 6 additions & 19 deletions src/Queue/LazyQueue.php
Original file line number Diff line number Diff line change
Expand Up @@ -18,30 +18,17 @@

final class LazyQueue implements QueueInterface
{
/**
* @var QueueInterface
*/
private $primaryQueue;

/**
* @var QueueInterface
*/
private $secondaryQueue;

/**
* @var array<string, string>
*/
private $jobIdMapper = [];
private array $jobIdMapper = [];

/**
* @var int
*/
private $toSkip = 0;
private int $toSkip = 0;

public function __construct(QueueInterface $primaryQueue, QueueInterface $secondaryQueue)
{
$this->primaryQueue = $primaryQueue;
$this->secondaryQueue = $secondaryQueue;
public function __construct(
private readonly QueueInterface $primaryQueue,
private readonly QueueInterface $secondaryQueue,
) {
}

public function createJobId(BaseUriCollection $baseUris): string
Expand Down
6 changes: 3 additions & 3 deletions src/Subscriber/HtmlCrawlerSubscriber.php
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ private function addNewUriToQueueFromNode(CrawlUri $crawlUri, string $uri, \DOME

try {
$uri = HttpUriFactory::create($uri);
} catch (\InvalidArgumentException $e) {
} catch (\InvalidArgumentException) {
$this->logWithCrawlUri(
$crawlUri,
LogLevel::DEBUG,
Expand Down Expand Up @@ -119,8 +119,8 @@ private function addNewUriToQueueFromNode(CrawlUri $crawlUri, string $uri, \DOME
// Add all data attributes as tags for e.g. other subscribers
if ($node->hasAttributes()) {
foreach ($node->attributes as $attribute) {
if (str_starts_with($attribute->name, 'data-')) {
$newCrawlUri->addTag(substr($attribute->name, 5));
if (str_starts_with((string) $attribute->name, 'data-')) {
$newCrawlUri->addTag(substr((string) $attribute->name, 5));
}
}
}
Expand Down
14 changes: 7 additions & 7 deletions src/Subscriber/RobotsSubscriber.php
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ final class RobotsSubscriber implements SubscriberInterface, EscargotAwareInterf
/**
* @var array<string, File>
*/
private $robotsTxtCache = [];
private array $robotsTxtCache = [];

public function shouldRequest(CrawlUri $crawlUri): string
{
Expand Down Expand Up @@ -176,15 +176,15 @@ private function getRobotsTxtFile(CrawlUri $crawlUri): File|null

try {
$robotsTxtContent = $response->getContent();
} catch (HttpExceptionInterface $e) {
} catch (HttpExceptionInterface) {
return $this->robotsTxtCache[(string) $robotsTxtUri] = null;
}

$parser = new Parser();
$parser->setSource($robotsTxtContent);

return $this->robotsTxtCache[(string) $robotsTxtUri] = $parser->getFile();
} catch (TransportExceptionInterface $exception) {
} catch (TransportExceptionInterface) {
return $this->robotsTxtCache[(string) $robotsTxtUri] = null;
}
}
Expand All @@ -209,7 +209,7 @@ private function handleSitemap(CrawlUri $crawlUri, File $robotsTxt): void
foreach ($robotsTxt->getNonGroupDirectives()->getByField('sitemap')->getDirectives() as $directive) {
try {
$sitemapUri = HttpUriFactory::create($directive->getValue()->get());
} catch (\InvalidArgumentException $e) {
} catch (\InvalidArgumentException) {
$this->logWithCrawlUri(
$crawlUri,
LogLevel::DEBUG,
Expand Down Expand Up @@ -239,14 +239,14 @@ private function extractUrisFromSitemap(CrawlUri $sitemapUri, string $content):
}

set_error_handler(
static function ($errno, $errstr): void {
static function ($errno, $errstr): never {
throw new \Exception($errstr, $errno);
},
);

try {
$urls = new \SimpleXMLElement($content);
} catch (\Exception $exception) {
} catch (\Exception) {
return;
} finally {
restore_error_handler();
Expand All @@ -258,7 +258,7 @@ static function ($errno, $errstr): void {
// Add it to the queue if not present already
try {
$uri = HttpUriFactory::create((string) $url->loc);
} catch (\InvalidArgumentException $e) {
} catch (\InvalidArgumentException) {
$this->logWithCrawlUri(
$sitemapUri,
LogLevel::DEBUG,
Expand Down
Loading

0 comments on commit 79f35e8

Please sign in to comment.