Skip to content

Commit

Permalink
Merge pull request #1744 from hydephp/even-smarter-sitemap-generation
Browse files Browse the repository at this point in the history
[2.x] Even smarter sitemap generation
  • Loading branch information
caendesilva authored Jun 28, 2024
2 parents 5811af9 + b8d30ba commit aebf524
Show file tree
Hide file tree
Showing 4 changed files with 224 additions and 53 deletions.
2 changes: 2 additions & 0 deletions RELEASE_NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ This serves two purposes:
- Minor: Data collection files are now validated for syntax errors during discovery in https://github.com/hydephp/develop/pull/1732
- Minor: Methods in the `Includes` facade now return `HtmlString` objects instead of `string` in https://github.com/hydephp/develop/pull/1738. For more information, see below.
- Minor: `Includes::path()` and `Includes::get()` methods now normalizes paths to be basenames to match the behaviour of the other include methods in https://github.com/hydephp/develop/pull/1738. This means that nested directories are no longer supported, as you should use a data collection for that.
- Minor: The `processing_time_ms` attribute in the `sitemap.xml` file has now been removed in https://github.com/hydephp/develop/pull/1744
- Improved the sitemap data generation to be smarter and more dynamic in https://github.com/hydephp/develop/pull/1744
- The `hasFeature` method on the Hyde facade and HydeKernel now only accepts a Feature enum value instead of a string for its parameter.
- Changed how the documentation search is generated, to be an `InMemoryPage` instead of a post-build task.
- Media asset files are now copied using the new build task instead of the deprecated `BuildService::transferMediaAssets()` method.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,29 +8,25 @@

use Hyde\Hyde;
use SimpleXMLElement;
use Hyde\Facades\Config;
use Hyde\Pages\HtmlPage;
use Hyde\Pages\BladePage;
use Hyde\Pages\MarkdownPage;
use Hyde\Pages\MarkdownPost;
use Hyde\Facades\Filesystem;
use Hyde\Pages\InMemoryPage;
use Hyde\Support\Models\Route;
use Illuminate\Support\Carbon;
use Hyde\Pages\DocumentationPage;
use Hyde\Foundation\Facades\Routes;
use Hyde\Framework\Concerns\TracksExecutionTime;

use function blank;
use function filemtime;
use function in_array;
use function date;
use function time;
use function str_starts_with;

/**
* @see https://www.sitemaps.org/protocol.html
*/
class SitemapGenerator extends BaseXmlGenerator
{
use TracksExecutionTime;

public function generate(): static
{
Routes::all()->each(function (Route $route): void {
Expand All @@ -40,17 +36,8 @@ public function generate(): static
return $this;
}

public function getXml(): string
{
$this->xmlElement->addAttribute('processing_time_ms', $this->getFormattedProcessingTime());

return parent::getXml();
}

protected function constructBaseElement(): void
{
$this->startClock();

$this->xmlElement = new SimpleXMLElement('<?xml version="1.0" encoding="UTF-8"?><urlset xmlns="https://www.sitemaps.org/schemas/sitemap/0.9"></urlset>');
$this->xmlElement->addAttribute('generator', 'HydePHP '.Hyde::version());
}
Expand All @@ -61,62 +48,69 @@ protected function addRoute(Route $route): void

$this->addChild($urlItem, 'loc', $this->resolveRouteLink($route));
$this->addChild($urlItem, 'lastmod', $this->getLastModDate($route->getSourcePath()));
$this->addChild($urlItem, 'changefreq', 'daily');
$this->addChild($urlItem, 'changefreq', $this->generateChangeFrequency(...$this->getRouteInformation($route)));
$this->addChild($urlItem, 'priority', $this->generatePriority(...$this->getRouteInformation($route)));
}

if (Config::getBool('hyde.sitemap.dynamic_priority', true)) {
$this->addChild($urlItem, 'priority', $this->getPriority(
$route->getPageClass(), $route->getPage()->getIdentifier()
));
}
protected function resolveRouteLink(Route $route): string
{
return Hyde::url($route->getOutputPath());
}

protected function getLastModDate(string $file): string
{
return date('c', @filemtime($file) ?: time());
return date('c', @Filesystem::lastModified($file) ?: Carbon::now()->timestamp);
}

protected function getPriority(string $pageClass, string $slug): string
/**
* @param class-string<\Hyde\Pages\Concerns\HydePage> $pageClass
* @return numeric-string
*/
protected function generatePriority(string $pageClass, string $identifier): string
{
$priority = 0.5;

if (in_array($pageClass, [BladePage::class, MarkdownPage::class])) {
if (in_array($pageClass, [BladePage::class, MarkdownPage::class, DocumentationPage::class])) {
$priority = 0.9;
if ($slug === 'index') {

if ($identifier === 'index') {
$priority = 1;
}
if ($slug === '404') {
$priority = 0.5;
}
}

if ($pageClass === DocumentationPage::class) {
$priority = 0.9;
if (in_array($pageClass, [MarkdownPost::class, InMemoryPage::class, HtmlPage::class])) {
$priority = 0.75;
}

if ($pageClass === MarkdownPost::class) {
$priority = 0.75;
if ($identifier === '404') {
$priority = 0.25;
}

return (string) $priority;
}

/** @return numeric-string */
protected function getFormattedProcessingTime(): string
{
return (string) $this->getExecutionTimeInMs();
}

protected function resolveRouteLink(Route $route): string
/**
* @param class-string<\Hyde\Pages\Concerns\HydePage> $pageClass
* @return 'always'|'hourly'|'daily '|'weekly'|'monthly'|'yearly'|'never'
*/
protected function generateChangeFrequency(string $pageClass, string $identifier): string
{
$baseUrl = Config::getNullableString('hyde.url');
$frequency = 'weekly';

if (blank($baseUrl) || str_starts_with($baseUrl, 'http://localhost')) {
// While the sitemap spec requires a full URL, we rather fall back
// to using relative links instead of using localhost links.
if (in_array($pageClass, [BladePage::class, MarkdownPage::class, DocumentationPage::class])) {
$frequency = 'daily';
}

return $route->getLink();
} else {
return Hyde::url($route->getOutputPath());
if ($identifier === '404') {
$frequency = 'monthly';
}

return $frequency;
}

/** @return array{class-string<\Hyde\Pages\Concerns\HydePage>, string} */
protected function getRouteInformation(Route $route): array
{
return [$route->getPageClass(), $route->getPage()->getIdentifier()];
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

namespace Hyde\Framework\Testing\Feature\Commands;

use Hyde\Facades\Filesystem;
use Hyde\Hyde;
use Hyde\Testing\TestCase;

Expand All @@ -16,14 +15,33 @@ class BuildSitemapCommandTest extends TestCase
{
public function testSitemapIsGeneratedWhenConditionsAreMet()
{
$this->withSiteUrl();
config(['hyde.generate_sitemap' => true]);
config(['hyde.url' => 'https://example.com']);

$this->cleanUpWhenDone('_site/sitemap.xml');

$this->assertFileDoesNotExist(Hyde::path('_site/sitemap.xml'));

$this->artisan('build:sitemap')->assertExitCode(0);
$this->artisan('build:sitemap')
->expectsOutputToContain('Generating sitemap...')
->doesntExpectOutputToContain('Skipped')
->expectsOutputToContain(' > Created _site/sitemap.xml')
->assertExitCode(0);

$this->assertFileExists(Hyde::path('_site/sitemap.xml'));
}

public function testSitemapIsNotGeneratedWhenConditionsAreNotMet()
{
config(['hyde.url' => '']);

Filesystem::unlink('_site/sitemap.xml');
$this->assertFileDoesNotExist(Hyde::path('_site/sitemap.xml'));

$this->artisan('build:sitemap')
->expectsOutputToContain('Generating sitemap...')
->expectsOutputToContain('Skipped')
->expectsOutput(' > Cannot generate sitemap without a valid base URL')
->assertExitCode(0);

$this->assertFileDoesNotExist(Hyde::path('_site/sitemap.xml'));
}
}
157 changes: 157 additions & 0 deletions packages/framework/tests/Feature/SitemapFeatureTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
<?php

declare(strict_types=1);

namespace Hyde\Framework\Testing\Feature;

use Mockery;
use Hyde\Hyde;
use Hyde\Testing\TestCase;
use Illuminate\Support\Carbon;
use Illuminate\Support\Facades\File;
use Illuminate\Filesystem\Filesystem;

/**
* High level test of the sitemap generation feature.
*
* It contains a setup that covers all code paths, proving 100% coverage in actual usage.
*
* @see \Hyde\Framework\Testing\Feature\Services\SitemapServiceTest
* @see \Hyde\Framework\Testing\Feature\Commands\BuildSitemapCommandTest
*
* @covers \Hyde\Framework\Features\XmlGenerators\SitemapGenerator
* @covers \Hyde\Framework\Actions\PostBuildTasks\GenerateSitemap
* @covers \Hyde\Console\Commands\BuildSitemapCommand
*/
class SitemapFeatureTest extends TestCase
{
public function testTheSitemapFeature()
{
Carbon::setTestNow('2024-01-01 12:00:00');
$filesystem = Mockery::mock(Filesystem::class)->makePartial();
$filesystem->shouldReceive('lastModified')->andReturn(Carbon::now()->timestamp);
File::swap($filesystem);

$this->cleanUpWhenDone('_site/sitemap.xml');
$this->setUpBroadSiteStructure();
$this->withSiteUrl();

$this->artisan('build:sitemap')
->expectsOutputToContain('Created _site/sitemap.xml')
->assertExitCode(0);

$this->assertFileExists('_site/sitemap.xml');

$this->assertSameXml(
'<?xml version="1.0" encoding="UTF-8"?>'."\n{$this->stripFormatting($this->expected(Hyde::version()))}\n",
file_get_contents('_site/sitemap.xml')
);
}

protected function setUpBroadSiteStructure(): void
{
$this->file('_pages/about.md', "# About\n\nThis is the about page.");
$this->file('_pages/contact.html', '<h1>Contact</h1><p>This is the contact page.</p>');
$this->file('_posts/hello-world.md', "# Hello, World!\n\nThis is the first post.");
$this->file('_posts/second-post.md', "# Second Post\n\nThis is the second post.");
$this->file('_docs/index.md', "# Documentation\n\nThis is the documentation index.");
$this->file('_docs/installation.md', "# Installation\n\nThis is the installation guide.");
$this->file('_docs/usage.md', "# Usage\n\nThis is the usage guide.");
$this->file('_docs/404.md', "# 404\n\nThis is the 404 page.");
}

protected function expected(string $version): string
{
return <<<XML
<urlset xmlns="https://www.sitemaps.org/schemas/sitemap/0.9" generator="HydePHP $version">
<url>
<loc>https://example.com/contact.html</loc>
<lastmod>2024-01-01T12:00:00+00:00</lastmod>
<changefreq>weekly</changefreq>
<priority>0.75</priority>
</url>
<url>
<loc>https://example.com/404.html</loc>
<lastmod>2024-01-01T12:00:00+00:00</lastmod>
<changefreq>monthly</changefreq>
<priority>0.25</priority>
</url>
<url>
<loc>https://example.com/index.html</loc>
<lastmod>2024-01-01T12:00:00+00:00</lastmod>
<changefreq>daily</changefreq>
<priority>1</priority>
</url>
<url>
<loc>https://example.com/about.html</loc>
<lastmod>2024-01-01T12:00:00+00:00</lastmod>
<changefreq>daily</changefreq>
<priority>0.9</priority>
</url>
<url>
<loc>https://example.com/posts/hello-world.html</loc>
<lastmod>2024-01-01T12:00:00+00:00</lastmod>
<changefreq>weekly</changefreq>
<priority>0.75</priority>
</url>
<url>
<loc>https://example.com/posts/second-post.html</loc>
<lastmod>2024-01-01T12:00:00+00:00</lastmod>
<changefreq>weekly</changefreq>
<priority>0.75</priority>
</url>
<url>
<loc>https://example.com/docs/404.html</loc>
<lastmod>2024-01-01T12:00:00+00:00</lastmod>
<changefreq>monthly</changefreq>
<priority>0.25</priority>
</url>
<url>
<loc>https://example.com/docs/index.html</loc>
<lastmod>2024-01-01T12:00:00+00:00</lastmod>
<changefreq>daily</changefreq>
<priority>1</priority>
</url>
<url>
<loc>https://example.com/docs/installation.html</loc>
<lastmod>2024-01-01T12:00:00+00:00</lastmod>
<changefreq>daily</changefreq>
<priority>0.9</priority>
</url>
<url>
<loc>https://example.com/docs/usage.html</loc>
<lastmod>2024-01-01T12:00:00+00:00</lastmod>
<changefreq>daily</changefreq>
<priority>0.9</priority>
</url>
<url>
<loc>https://example.com/docs/search.json</loc>
<lastmod>2024-01-01T12:00:00+00:00</lastmod>
<changefreq>weekly</changefreq>
<priority>0.5</priority>
</url>
<url>
<loc>https://example.com/docs/search.html</loc>
<lastmod>2024-01-01T12:00:00+00:00</lastmod>
<changefreq>weekly</changefreq>
<priority>0.5</priority>
</url>
</urlset>
XML;
}

protected function stripFormatting(string $xml): string
{
return implode('', array_map('trim', explode("\n", $xml)));
}

protected function expandLines(string $xml): string
{
return str_replace('><', ">\n<", $xml);
}

protected function assertSameXml(string $expected, string $actual): void
{
$this->assertSame($this->expandLines($expected), $this->expandLines($actual));
}
}

0 comments on commit aebf524

Please sign in to comment.