diff --git a/CHANGELOG.md b/CHANGELOG.md index b613d39..dd64438 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,9 +6,18 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ### Added -* SitemapQuery Extensibility point through `Codein\IbexaSeoToolkit\Event\SitemapQueryEvent` +* Analyzers traits for word count and string normalization +* New `internal_links_hostnames` configuration parameter to improve internal links analysis +* `AnalysisDTO` content extensibility point through `Codein\IbexaSeoToolkit\Event\AnalysisDTOEvent` +* `SitemapQuery` extensibility point through `Codein\IbexaSeoToolkit\Event\SitemapQueryEvent` ### Changed +* Refactor and improve links analysis +* Allow analysis based either on + * full preview content (metas, title, etc.) + * "real" page content (page without head, header, footer, etc. ) + * richtext fields content +* Fixed richtext fields merging * Fixed versions requirements in the docs to be consistent. * Use `ezpublish.api.service.inner_schema_namer` factory instead of the internal schema_namer service (#8) * Update install documentation @@ -23,6 +32,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 * Use `ezplatform` entrypoint for assets import (#9) * [Admin UI] Fixed toolbar icon path +### Removed +* Removed `XmlProcessingService::combineAndProcessXmlFields($fields, $process = true)` second parameter `$process = true` + ## [1.0.0] - 2021-07-09 ### Added diff --git a/bundle/Analysis/Analyzers/AbstractLinksAnalyzer.php b/bundle/Analysis/Analyzers/AbstractLinksAnalyzer.php new file mode 100644 index 0000000..dfcec16 --- /dev/null +++ b/bundle/Analysis/Analyzers/AbstractLinksAnalyzer.php @@ -0,0 +1,81 @@ +analyzerService = $analyzerService; + $this->internalHostnames = $siteAccessConfigResolver->getParameterConfig('internal_links_hostnames'); + } + + public function analyze(AnalysisDTO $analysisDTO): array + { + $domDocument = $analysisDTO->getContentDOMDocument(); + $wordCount = $this->getWordCount($domDocument); + $domxPath = new DOMXPath($domDocument); + $count = $this->getLinksCount($domxPath->query('.//a')); + $ratio = ($wordCount > 0 ? $count / $wordCount : 0); + + $status = RatioLevels::LOW; + if ($ratio > 0 && $ratio < self::GOOD_RATIO) { + $status = RatioLevels::MEDIUM; + } elseif ($ratio >= self::GOOD_RATIO) { + $status = RatioLevels::HIGH; + } + + return [ + self::CATEGORY => [ + 'status' => $status, + 'data' => [ + 'count' => $count, + 'recommended' => \ceil($wordCount / (1 / self::GOOD_RATIO)), + ], + ], + ]; + } + + protected function hrefIsInternal(string $linkHref): bool + { + if (false !== \mb_strpos($linkHref, 'ezlocation://')) { + return true; + } + + $parsed = parse_url($linkHref); + $isInternal = false; + if (\is_array($parsed)) { + $isInternal = true; + if (isset($parsed['scheme'], $parsed['host']) + && !\in_array($parsed['host'], $this->internalHostnames, true) + ) { + $isInternal = false; + } + } + + return $isInternal; + } + + abstract protected function getLinksCount(DOMNodeList $allLinks): int; +} diff --git a/bundle/Analysis/Analyzers/InternalLinksAnalyzer.php b/bundle/Analysis/Analyzers/InternalLinksAnalyzer.php index 114e832..87d6596 100644 --- a/bundle/Analysis/Analyzers/InternalLinksAnalyzer.php +++ b/bundle/Analysis/Analyzers/InternalLinksAnalyzer.php @@ -2,88 +2,25 @@ namespace Codein\IbexaSeoToolkit\Analysis\Analyzers; -use Codein\IbexaSeoToolkit\Analysis\AbstractAnalyzer; -use Codein\IbexaSeoToolkit\Analysis\RatioLevels; -use Codein\IbexaSeoToolkit\Model\AnalysisDTO; -use Codein\IbexaSeoToolkit\Service\AnalyzerService; -use Codein\IbexaSeoToolkit\Service\XmlProcessingService; +use DOMElement; +use DOMNodeList; /** * Class InternalLinksAnalyzer. */ -final class InternalLinksAnalyzer extends AbstractAnalyzer +final class InternalLinksAnalyzer extends AbstractLinksAnalyzer { - private const CATEGORY = 'codein_seo_toolkit.analyzer.category.lisibility'; - - private const GOOD_RATIO = 1 / 100; - - /** @var XmlProcessingService */ - private $xmlProcessingService; - - /** @var AnalyzerService */ - private $analyzerService; - - public function __construct(AnalyzerService $analyzerService, XmlProcessingService $xmlProcessingService) + protected function getLinksCount(DOMNodeList $allLinks): int { - $this->xmlProcessingService = $xmlProcessingService; - $this->analyzerService = $analyzerService; - } - - public function analyze(AnalysisDTO $analysisDTO): array - { - $fields = $analysisDTO->getFields(); - - \libxml_use_internal_errors(true); - /** @var \DOMDocument $xml */ - try { - $html = $this->xmlProcessingService->combineAndProcessXmlFields($fields); - } catch (\Exception $e) { - return $this->analyzerService->compile(self::CATEGORY, null, null); - } - - $htmlText = \strip_tags($html->saveHTML()); - $wordCount = \str_word_count($htmlText); - - $domxPath = new \DOMXPath($html); - $allLinks = $domxPath->query('.//a'); - $count = 0; - - /** @var \DOMElement $link */ + /** @var DOMElement $link */ foreach ($allLinks as $link) { $linkHref = $link->getAttribute('href'); - // Drop internal links - if (false !== \mb_strpos($linkHref, 'ezlocation://')) { + if (false === \mb_strpos($linkHref, 'mailto:') && $this->hrefIsInternal($linkHref)) { ++$count; } } - $ratio = $count / $wordCount; - - $status = RatioLevels::LOW; - if ($ratio > 0 && $ratio < self::GOOD_RATIO) { - $status = RatioLevels::MEDIUM; - } elseif ($ratio >= self::GOOD_RATIO) { - $status = RatioLevels::HIGH; - } - - return [ - self::CATEGORY => [ - 'status' => $status, - 'data' => [ - 'count' => $count, - 'recommended' => \ceil($wordCount / (1 / self::GOOD_RATIO)), - ], - ], - ]; - } - - public function support(AnalysisDTO $analysisDTO): bool - { - if (0 === \count($analysisDTO->getFields())) { - return false; - } - - return true; + return $count; } } diff --git a/bundle/Analysis/Analyzers/KeywordInTitlesAnalyzer.php b/bundle/Analysis/Analyzers/KeywordInTitlesAnalyzer.php index 60d5182..23e2110 100644 --- a/bundle/Analysis/Analyzers/KeywordInTitlesAnalyzer.php +++ b/bundle/Analysis/Analyzers/KeywordInTitlesAnalyzer.php @@ -3,42 +3,36 @@ namespace Codein\IbexaSeoToolkit\Analysis\Analyzers; use Codein\IbexaSeoToolkit\Analysis\AbstractAnalyzer; +use Codein\IbexaSeoToolkit\Analysis\Analyzers\Traits\StringNormalizerTrait; use Codein\IbexaSeoToolkit\Analysis\RatioLevels; use Codein\IbexaSeoToolkit\Model\AnalysisDTO; use Codein\IbexaSeoToolkit\Service\AnalyzerService; -use Codein\IbexaSeoToolkit\Service\XmlProcessingService; /** * Class KeywordInTitlesAnalyzer. */ final class KeywordInTitlesAnalyzer extends AbstractAnalyzer { + use StringNormalizerTrait; + private const CATEGORY = 'codein_seo_toolkit.analyzer.category.keyword'; + /** @var AnalyzerService */ private $analyzerService; - private $xmlProcessingService; public function __construct( - AnalyzerService $analyzerService, - XmlProcessingService $xmlProcessingService + AnalyzerService $analyzerService ) { $this->analyzerService = $analyzerService; - $this->xmlProcessingService = $xmlProcessingService; } public function analyze(AnalysisDTO $analysisDTO): array { - $fields = $analysisDTO->getFields(); - - \libxml_use_internal_errors(true); - /** @var \DOMDocument $xml */ - $html = $this->xmlProcessingService->combineAndProcessXmlFields($fields); - - $domxPath = new \DOMXPath($html); + $domxPath = new \DOMXPath($analysisDTO->getContentDOMDocument()); $titles = $domxPath->query('//*[self::h1 or self::h2 or self::h3 or self::h4 or self::h5 or self::h6]'); - $keywordSynonyms = \explode(',', \strtr(\mb_strtolower($analysisDTO->getKeyword()), AnalyzerService::ACCENT_VALUES)); + $keywordSynonyms = \explode(',', $this->normalizeString($analysisDTO->getKeyword())); $keywordSynonyms = \array_map('trim', $keywordSynonyms); $numberOfTitles = 0; @@ -46,7 +40,7 @@ public function analyze(AnalysisDTO $analysisDTO): array foreach ($titles as $title) { foreach ($keywordSynonyms as $keyword) { /** @var \DOMElement $title */ - $titleLowercase = \strtr(\mb_strtolower($title->textContent), AnalyzerService::ACCENT_VALUES); + $titleLowercase = $this->normalizeString($title->textContent); if (false !== \mb_strpos($titleLowercase, $keyword)) { ++$numberOfTitlesContainingKeyword; break; @@ -71,13 +65,4 @@ public function analyze(AnalysisDTO $analysisDTO): array 'ratio' => $ratioKeywordInTitle, ]); } - - public function support(AnalysisDTO $analysisDTO): bool - { - if (0 === \count($analysisDTO->getFields())) { - return false; - } - - return true; - } } diff --git a/bundle/Analysis/Analyzers/KeywordInUrlSlugAnalyzer.php b/bundle/Analysis/Analyzers/KeywordInUrlSlugAnalyzer.php index 9ff8044..1ea5e98 100644 --- a/bundle/Analysis/Analyzers/KeywordInUrlSlugAnalyzer.php +++ b/bundle/Analysis/Analyzers/KeywordInUrlSlugAnalyzer.php @@ -3,6 +3,7 @@ namespace Codein\IbexaSeoToolkit\Analysis\Analyzers; use Codein\IbexaSeoToolkit\Analysis\AbstractAnalyzer; +use Codein\IbexaSeoToolkit\Analysis\Analyzers\Traits\StringNormalizerTrait; use Codein\IbexaSeoToolkit\Analysis\RatioLevels; use Codein\IbexaSeoToolkit\Model\AnalysisDTO; use Codein\IbexaSeoToolkit\Service\AnalyzerService; @@ -16,10 +17,17 @@ */ final class KeywordInUrlSlugAnalyzer extends AbstractAnalyzer { + use StringNormalizerTrait; + private const CATEGORY = 'codein_seo_toolkit.analyzer.category.keyword'; + /** @var AnalyzerService */ private $analyzerService; + + /** @var URLAliasService */ private $urlAliasService; + + /** @var LocationService */ private $locationService; public function __construct( @@ -45,7 +53,7 @@ public function analyze(AnalysisDTO $analysisDTO): array $pathArray = \explode('/', $urlAlias->path); $urlSlug = \mb_strtolower(\end($pathArray)); $urlSlugWithoutDashes = \str_replace('-', ' ', $urlSlug); - $keywordSynonyms = \explode(',', \strtr(\mb_strtolower($analysisDTO->getKeyword()), AnalyzerService::ACCENT_VALUES)); + $keywordSynonyms = \explode(',', $this->normalizeString($analysisDTO->getKeyword())); $keywordSynonyms = \array_map('trim', $keywordSynonyms); $bestRatio = 0; diff --git a/bundle/Analysis/Analyzers/KeywordLengthAnalyzer.php b/bundle/Analysis/Analyzers/KeywordLengthAnalyzer.php index 9357ae7..d3c30fd 100644 --- a/bundle/Analysis/Analyzers/KeywordLengthAnalyzer.php +++ b/bundle/Analysis/Analyzers/KeywordLengthAnalyzer.php @@ -3,29 +3,22 @@ namespace Codein\IbexaSeoToolkit\Analysis\Analyzers; use Codein\IbexaSeoToolkit\Analysis\AbstractAnalyzer; +use Codein\IbexaSeoToolkit\Analysis\Analyzers\Traits\StringNormalizerTrait; use Codein\IbexaSeoToolkit\Analysis\RatioLevels; use Codein\IbexaSeoToolkit\Model\AnalysisDTO; -use Codein\IbexaSeoToolkit\Service\AnalyzerService; /** * Class KeywordLengthAnalyzer. */ final class KeywordLengthAnalyzer extends AbstractAnalyzer { - private const CATEGORY = 'codein_seo_toolkit.analyzer.category.keyword'; - - /** @var \Codein\IbexaSeoToolkit\Service\AnalyzerService */ - private $analyzerService; + use StringNormalizerTrait; - public function __construct( - AnalyzerService $analyzerService - ) { - $this->analyzerService = $analyzerService; - } + private const CATEGORY = 'codein_seo_toolkit.analyzer.category.keyword'; public function analyze(AnalysisDTO $analysisDTO): array { - $keywordSynonyms = \explode(',', \strtr(\mb_strtolower($analysisDTO->getKeyword()), AnalyzerService::ACCENT_VALUES)); + $keywordSynonyms = \explode(',', $this->normalizeString($analysisDTO->getKeyword())); $keywordSynonyms = \array_map('trim', $keywordSynonyms); $maxCount = 0; diff --git a/bundle/Analysis/Analyzers/MetaDescriptionContainsKeywordAnalyzer.php b/bundle/Analysis/Analyzers/MetaDescriptionContainsKeywordAnalyzer.php index fa10ab7..0c761ed 100644 --- a/bundle/Analysis/Analyzers/MetaDescriptionContainsKeywordAnalyzer.php +++ b/bundle/Analysis/Analyzers/MetaDescriptionContainsKeywordAnalyzer.php @@ -3,6 +3,7 @@ namespace Codein\IbexaSeoToolkit\Analysis\Analyzers; use Codein\IbexaSeoToolkit\Analysis\AbstractAnalyzer; +use Codein\IbexaSeoToolkit\Analysis\Analyzers\Traits\StringNormalizerTrait; use Codein\IbexaSeoToolkit\Analysis\RatioLevels; use Codein\IbexaSeoToolkit\Model\AnalysisDTO; use Codein\IbexaSeoToolkit\Service\AnalyzerService; @@ -14,6 +15,8 @@ */ final class MetaDescriptionContainsKeywordAnalyzer extends AbstractAnalyzer { + use StringNormalizerTrait; + private const CATEGORY = 'codein_seo_toolkit.analyzer.category.keyword'; /** @var \Codein\IbexaSeoToolkit\Service\AnalyzerService */ @@ -32,16 +35,13 @@ public function __construct( public function analyze(AnalysisDTO $analysisDTO): array { - $domDocument = new \DOMDocument(); - $domDocument->loadHTML($analysisDTO->getPreviewHtml()); - - $domxPath = new \DOMXPath($domDocument); + $domxPath = new \DOMXPath($analysisDTO->getHtmlPreviewDOMDocument()); /** @var \DOMNodeList $titleTags */ $metaDescriptionTags = $domxPath->query('//meta[@name="description"]'); try { - $keywordSynonyms = \explode(',', \strtr(\mb_strtolower($analysisDTO->getKeyword()), AnalyzerService::ACCENT_VALUES)); + $keywordSynonyms = \explode(',', $this->normalizeString($analysisDTO->getKeyword())); $keywordSynonyms = \array_map('trim', $keywordSynonyms); $status = RatioLevels::MEDIUM; @@ -51,7 +51,7 @@ public function analyze(AnalysisDTO $analysisDTO): array /** @var \DOMElement $metaDescriptionTag */ foreach ($metaDescriptionTags as $metaDescriptionTag) { foreach ($keywordSynonyms as $keyword) { - $contentMetaDescriptionTagAttribute = \strtr(\mb_strtolower($metaDescriptionTag->getAttribute('content')), AnalyzerService::ACCENT_VALUES); + $contentMetaDescriptionTagAttribute = $this->normalizeString($metaDescriptionTag->getAttribute('content')); if (false !== \mb_strpos($contentMetaDescriptionTagAttribute, $keyword)) { $status = RatioLevels::HIGH; break; diff --git a/bundle/Analysis/Analyzers/OneH1TagMaximumAnalyzer.php b/bundle/Analysis/Analyzers/OneH1TagMaximumAnalyzer.php index a51e728..0bc6e51 100644 --- a/bundle/Analysis/Analyzers/OneH1TagMaximumAnalyzer.php +++ b/bundle/Analysis/Analyzers/OneH1TagMaximumAnalyzer.php @@ -23,10 +23,7 @@ public function __construct(AnalyzerService $analyzerService) public function analyze(AnalysisDTO $analysisDTO): array { - $domDocument = new \DOMDocument(); - $domDocument->loadHTML($analysisDTO->getPreviewHtml()); - - $selector = new \DOMXPath($domDocument); + $selector = new \DOMXPath($analysisDTO->getHtmlPreviewDOMDocument()); $h1 = $selector->query('//h1'); $count = $h1->count(); $status = RatioLevels::LOW; diff --git a/bundle/Analysis/Analyzers/OutboundLinksAnalyzer.php b/bundle/Analysis/Analyzers/OutboundLinksAnalyzer.php index 7b69fe7..16394d7 100644 --- a/bundle/Analysis/Analyzers/OutboundLinksAnalyzer.php +++ b/bundle/Analysis/Analyzers/OutboundLinksAnalyzer.php @@ -2,79 +2,25 @@ namespace Codein\IbexaSeoToolkit\Analysis\Analyzers; -use Codein\IbexaSeoToolkit\Analysis\AbstractAnalyzer; -use Codein\IbexaSeoToolkit\Analysis\RatioLevels; -use Codein\IbexaSeoToolkit\Model\AnalysisDTO; -use Codein\IbexaSeoToolkit\Service\XmlProcessingService; +use DOMElement; +use DOMNodeList; /** * Class OutboundLinksAnalyzer. */ -final class OutboundLinksAnalyzer extends AbstractAnalyzer +final class OutboundLinksAnalyzer extends AbstractLinksAnalyzer { - private const CATEGORY = 'codein_seo_toolkit.analyzer.category.lisibility'; - - private const GOOD_RATIO = 1 / 400; - - /** @var XmlProcessingService */ - private $xmlProcessingService; - - public function __construct(XmlProcessingService $xmlProcessingService) - { - $this->xmlProcessingService = $xmlProcessingService; - } - - public function analyze(AnalysisDTO $analysisDTO): array + protected function getLinksCount(DOMNodeList $allLinks): int { - $fields = $analysisDTO->getFields(); - - \libxml_use_internal_errors(true); - /** @var \DOMDocument $xml */ - $html = $this->xmlProcessingService->combineAndProcessXmlFields($fields); - - $htmlText = \strip_tags($html->saveHTML()); - $wordCount = \str_word_count($htmlText); - - $domxPath = new \DOMXPath($html); - $allLinks = $domxPath->query('.//a'); - $count = 0; - - /** @var \DOMElement $link */ + /** @var DOMElement $link */ foreach ($allLinks as $link) { $linkHref = $link->getAttribute('href'); - // Drop internal links - if (false === \mb_strpos($linkHref, 'ezlocation://')) { + if (false === \mb_strpos($linkHref, 'mailto:') && !$this->hrefIsInternal($linkHref)) { ++$count; } } - $ratio = $count / $wordCount; - - $status = RatioLevels::LOW; - if ($ratio > 0 && $ratio < self::GOOD_RATIO) { - $status = RatioLevels::MEDIUM; - } elseif ($ratio >= self::GOOD_RATIO) { - $status = RatioLevels::HIGH; - } - - return [ - self::CATEGORY => [ - 'status' => $status, - 'data' => [ - 'count' => $count, - 'recommended' => \ceil($wordCount / (1 / self::GOOD_RATIO)), - ], - ], - ]; - } - - public function support(AnalysisDTO $analysisDTO): bool - { - if (0 === \count($analysisDTO->getFields())) { - return false; - } - - return true; + return $count; } } diff --git a/bundle/Analysis/Analyzers/SeoTitleWidthAnalyzer.php b/bundle/Analysis/Analyzers/SeoTitleWidthAnalyzer.php index 6a4546c..1921e3a 100644 --- a/bundle/Analysis/Analyzers/SeoTitleWidthAnalyzer.php +++ b/bundle/Analysis/Analyzers/SeoTitleWidthAnalyzer.php @@ -25,10 +25,7 @@ public function __construct( public function analyze(AnalysisDTO $analysisDTO): array { - $domDocument = new \DOMDocument(); - $domDocument->loadHTML($analysisDTO->getPreviewHtml()); - - $domxPath = new \DOMXPath($domDocument); + $domxPath = new \DOMXPath($analysisDTO->getHtmlPreviewDOMDocument()); /** @var \DOMNodeList $titleTags */ $titleTags = $domxPath->query('//title'); diff --git a/bundle/Analysis/Analyzers/TitleTagContainsKeywordAnalyzer.php b/bundle/Analysis/Analyzers/TitleTagContainsKeywordAnalyzer.php index c6c5e6d..2bd8a77 100644 --- a/bundle/Analysis/Analyzers/TitleTagContainsKeywordAnalyzer.php +++ b/bundle/Analysis/Analyzers/TitleTagContainsKeywordAnalyzer.php @@ -3,6 +3,7 @@ namespace Codein\IbexaSeoToolkit\Analysis\Analyzers; use Codein\IbexaSeoToolkit\Analysis\AbstractAnalyzer; +use Codein\IbexaSeoToolkit\Analysis\Analyzers\Traits\StringNormalizerTrait; use Codein\IbexaSeoToolkit\Analysis\RatioLevels; use Codein\IbexaSeoToolkit\Model\AnalysisDTO; use Codein\IbexaSeoToolkit\Service\AnalyzerService; @@ -14,6 +15,8 @@ */ final class TitleTagContainsKeywordAnalyzer extends AbstractAnalyzer { + use StringNormalizerTrait; + private const CATEGORY = 'codein_seo_toolkit.analyzer.category.keyword'; /** @var \Codein\IbexaSeoToolkit\Service\AnalyzerService */ @@ -32,16 +35,13 @@ public function __construct( public function analyze(AnalysisDTO $analysisDTO): array { - $domDocument = new \DOMDocument(); - $domDocument->loadHTML($analysisDTO->getPreviewHtml()); - - $domxPath = new \DOMXPath($domDocument); + $domxPath = new \DOMXPath($analysisDTO->getHtmlPreviewDOMDocument()); /** @var \DOMNodeList $titleTags */ $titleTags = $domxPath->query('//title'); try { - $keywordSynonyms = \explode(',', \strtr(\mb_strtolower($analysisDTO->getKeyword()), AnalyzerService::ACCENT_VALUES)); + $keywordSynonyms = \explode(',', $this->normalizeString($analysisDTO->getKeyword())); $keywordSynonyms = \array_map('trim', $keywordSynonyms); $status = RatioLevels::MEDIUM; @@ -51,7 +51,7 @@ public function analyze(AnalysisDTO $analysisDTO): array /** @var \DOMElement $titleTag */ foreach ($titleTags as $titleTag) { foreach ($keywordSynonyms as $keyword) { - $contentTitleTagAttribute = \strtr(\mb_strtolower($titleTag->textContent), AnalyzerService::ACCENT_VALUES); + $contentTitleTagAttribute = $this->normalizeString($titleTag->textContent); if (false !== \mb_strpos($contentTitleTagAttribute, $keyword)) { $status = RatioLevels::HIGH; break; diff --git a/bundle/Analysis/Analyzers/Traits/StringNormalizerTrait.php b/bundle/Analysis/Analyzers/Traits/StringNormalizerTrait.php new file mode 100644 index 0000000..51ec0e6 --- /dev/null +++ b/bundle/Analysis/Analyzers/Traits/StringNormalizerTrait.php @@ -0,0 +1,13 @@ +saveHTML()); + $htmlText = html_entity_decode(preg_replace(['/\n/', '/\r/', '/\t/', '/\s+/'], [' ', ' ', ' ', ' '], $htmlText)); + + return str_word_count($htmlText); + } +} diff --git a/bundle/Analysis/Analyzers/WordCountAnalyzer.php b/bundle/Analysis/Analyzers/WordCountAnalyzer.php index 7bb2f26..d273981 100644 --- a/bundle/Analysis/Analyzers/WordCountAnalyzer.php +++ b/bundle/Analysis/Analyzers/WordCountAnalyzer.php @@ -3,39 +3,25 @@ namespace Codein\IbexaSeoToolkit\Analysis\Analyzers; use Codein\IbexaSeoToolkit\Analysis\AbstractAnalyzer; +use Codein\IbexaSeoToolkit\Analysis\Analyzers\Traits\WordCountTrait; use Codein\IbexaSeoToolkit\Analysis\RatioLevels; use Codein\IbexaSeoToolkit\Model\AnalysisDTO; -use Codein\IbexaSeoToolkit\Service\XmlProcessingService; /** * Class WordCountAnalyzer. */ final class WordCountAnalyzer extends AbstractAnalyzer { + use WordCountTrait; + public const CATEGORY = 'codein_seo_toolkit.analyzer.category.lisibility'; public const INFIMUM = 700; public const SUPREMUM = 1500; - /** @var XmlProcessingService */ - private $xmlProcessingService; - - public function __construct(XmlProcessingService $xmlProcessingService) - { - $this->xmlProcessingService = $xmlProcessingService; - } - public function analyze(AnalysisDTO $analysisDTO): array { - $fields = $analysisDTO->getFields(); - - \libxml_use_internal_errors(true); - /** @var \DOMDocument $xml */ - $html = $this->xmlProcessingService->combineAndProcessXmlFields($fields)->saveHTML(); - - $text = \strip_tags($html); - - $count = \str_word_count($text); + $count = $this->getWordCount($analysisDTO->getContentDOMDocument()); $status = RatioLevels::LOW; // Pillar content increases the requirements @@ -58,13 +44,4 @@ public function analyze(AnalysisDTO $analysisDTO): array ], ]; } - - public function support(AnalysisDTO $analysisDTO): bool - { - if (0 === \count($analysisDTO->getFields())) { - return false; - } - - return true; - } } diff --git a/bundle/Controller/AnalyzeContentController.php b/bundle/Controller/AnalyzeContentController.php index eab852b..11916c9 100644 --- a/bundle/Controller/AnalyzeContentController.php +++ b/bundle/Controller/AnalyzeContentController.php @@ -3,6 +3,7 @@ namespace Codein\IbexaSeoToolkit\Controller; use Codein\IbexaSeoToolkit\Analysis\ParentAnalyzerService; +use Codein\IbexaSeoToolkit\Event\AnalysisDTOEvent; use Codein\IbexaSeoToolkit\Exception\AnalyzeException; use Codein\IbexaSeoToolkit\Exception\ValidationException; use Codein\IbexaSeoToolkit\Form\Type\AnalysisDTOType; @@ -10,6 +11,7 @@ use Codein\IbexaSeoToolkit\Service\AnalyzeContentService; use eZ\Publish\Core\MVC\Symfony\Controller\Content\PreviewController; use FOS\RestBundle\Controller\Annotations as Rest; +use Symfony\Component\EventDispatcher\EventDispatcherInterface; use Symfony\Component\Form\FormFactoryInterface; use Symfony\Component\HttpFoundation\Request; @@ -19,11 +21,21 @@ */ final class AnalyzeContentController { + /** @var AnalyzeContentService */ private $analyzeContentService; + + /** @var PreviewController */ private $previewControllerService; + + /** @var ParentAnalyzerService */ private $parentAnalyzerService; + + /** @var FormFactoryInterface */ private $formFactory; + /** @var EventDispatcherInterface */ + private $eventDispatcher; + /** * AnalyzeContentController constructor. */ @@ -31,12 +43,14 @@ public function __construct( PreviewController $previewControllerService, AnalyzeContentService $analyzeContentService, ParentAnalyzerService $parentAnalyzerService, - FormFactoryInterface $formFactory + FormFactoryInterface $formFactory, + EventDispatcherInterface $eventDispatcher ) { $this->analyzeContentService = $analyzeContentService; $this->previewControllerService = $previewControllerService; $this->parentAnalyzerService = $parentAnalyzerService; $this->formFactory = $formFactory; + $this->eventDispatcher = $eventDispatcher; } public function __invoke(Request $request) @@ -76,9 +90,17 @@ public function __invoke(Request $request) $analysisDTO->setIsPillarContent($contentConfiguration->getIsPillarContent()) ->setKeyword($contentConfiguration->getKeyword()) - ->setPreviewHtml($dataPreviewHtml); + ->setPreviewHtml($dataPreviewHtml) + ; + + try { + $event = new AnalysisDTOEvent($analysisDTO); + $this->eventDispatcher->dispatch($event); + } catch (\Exception $e) { + throw new AnalyzeException('codein_seo_toolkit.analyzer.error.dto_exception', $e); + } - $analyzeResult = $this->parentAnalyzerService->analyze($analysisDTO); + $analyzeResult = $this->parentAnalyzerService->analyze($event->getAnalysisDTO()); if (\array_key_exists('error', $analyzeResult)) { throw new AnalyzeException('codein_seo_toolkit.analyzer.error.content_not_configured'); diff --git a/bundle/DependencyInjection/Configuration.php b/bundle/DependencyInjection/Configuration.php index 685b128..c7f795f 100644 --- a/bundle/DependencyInjection/Configuration.php +++ b/bundle/DependencyInjection/Configuration.php @@ -33,6 +33,7 @@ public function getConfigTreeBuilder(): TreeBuilder $this->addMetasSection($nodeBuilder); $this->addLinksSection($nodeBuilder); $this->addHreflangSection($nodeBuilder); + $this->addInternalLinksHostnamesSection($nodeBuilder); $nodeBuilder->end(); return $treeBuilder; @@ -311,4 +312,17 @@ private function addHreflangSection(NodeBuilder $nodeBuilder): self return $this; } + + private function addInternalLinksHostnamesSection(NodeBuilder $nodeBuilder): self + { + $nodeBuilder + ->arrayNode('internal_links_hostnames') + ->info('Hostnames to consider as internal links during analysis.') + ->defaultValue([]) + ->scalarPrototype()->end() + ->end() + ; + + return $this; + } } diff --git a/bundle/Event/AnalysisDTOEvent.php b/bundle/Event/AnalysisDTOEvent.php new file mode 100644 index 0000000..94bf706 --- /dev/null +++ b/bundle/Event/AnalysisDTOEvent.php @@ -0,0 +1,29 @@ +analysisDTO = $analysisDTO; + } + + public function getAnalysisDTO(): AnalysisDTO + { + return $this->analysisDTO; + } + + public function setAnalysisDTO(AnalysisDTO $analysisDTO): self + { + $this->analysisDTO = $analysisDTO; + + return $this; + } +} diff --git a/bundle/EventSubscriber/AnalysisDTOEventSubscriber.php b/bundle/EventSubscriber/AnalysisDTOEventSubscriber.php new file mode 100644 index 0000000..fe338d9 --- /dev/null +++ b/bundle/EventSubscriber/AnalysisDTOEventSubscriber.php @@ -0,0 +1,50 @@ +processingService = $processingService; + } + + public static function getSubscribedEvents() + { + return [ + AnalysisDTOEvent::class => [ + ['setAnalyzableHtmlContent', -1000], + ], + ]; + } + + public function setAnalyzableHtmlContent(AnalysisDTOEvent $event) + { + libxml_use_internal_errors(true); + if (0 === \count($event->getAnalysisDTO()->getFields())) { + $xpath = new DOMXPath($event->getAnalysisDTO()->getContentDOMDocument()); + $body = $xpath->query('//*'); + + foreach ($body as $item) { + /** @var DOMElement $item */ + if (\in_array($item->tagName, ['head', 'header', 'footer', 'script', 'nav', 'aside', 'style', 'xml'], true) + && $item->parentNode instanceof DOMElement) { + $item->parentNode->removeChild($item); + } + } + } else { + $event->getAnalysisDTO()->setContentDOMDocument($this->processingService->combineAndProcessXmlFields( + $event->getAnalysisDTO()->getFields() + )); + } + } +} diff --git a/bundle/Model/AnalysisDTO.php b/bundle/Model/AnalysisDTO.php index 46aac2a..ee01217 100644 --- a/bundle/Model/AnalysisDTO.php +++ b/bundle/Model/AnalysisDTO.php @@ -2,6 +2,8 @@ namespace Codein\IbexaSeoToolkit\Model; +use DOMDocument; + /** * Class AnalysisDTO. */ @@ -16,6 +18,14 @@ class AnalysisDTO extends PreAnalysisDTO /** @var ?string */ private $previewHtml; + /** @var DOMDocument */ + private $contentDOMDocument; + + public function __construct() + { + $this->contentDOMDocument = new DOMDocument(); + } + /** * Get the value of keyword. */ @@ -57,7 +67,7 @@ public function setIsPillarContent($isPillarContent): self } /** - * Get the value of previewHtml. + * Get the value of the full HTML preview. * * @return ?string */ @@ -67,7 +77,8 @@ public function getPreviewHtml(): ?string } /** - * Set the value of previewHtml. + * Set the value of the full HTML preview. + * Keeps the analyzableDOMDocument in sync with the HTML preview. * * @param string $previewHtml */ @@ -75,6 +86,41 @@ public function setPreviewHtml(?string $previewHtml): self { $this->previewHtml = $previewHtml; + if (null !== $previewHtml) { + $this->contentDOMDocument->loadHTML($previewHtml); + } else { + $this->contentDOMDocument = new DOMDocument(); + } + + return $this; + } + + /** + * Get the full HTML preview as DOMDocument. + */ + public function getHtmlPreviewDOMDocument(): DOMDocument + { + $DOMDocument = new DOMDocument(); + $DOMDocument->loadHTML($this->previewHtml); + + return $DOMDocument; + } + + /** + * Get the analyzable part of the HTML preview as DOMDocument. + */ + public function getContentDOMDocument(): DOMDocument + { + return $this->contentDOMDocument; + } + + /** + * Set the analyzable part of the HTML preview as DOMDocument. + */ + public function setContentDOMDocument(DOMDocument $contentDOMDocument): self + { + $this->contentDOMDocument = $contentDOMDocument; + return $this; } } diff --git a/bundle/Resources/translations/codein_seo_toolkit.en.yaml b/bundle/Resources/translations/codein_seo_toolkit.en.yaml index 024d4c0..72d1fb9 100644 --- a/bundle/Resources/translations/codein_seo_toolkit.en.yaml +++ b/bundle/Resources/translations/codein_seo_toolkit.en.yaml @@ -27,6 +27,7 @@ codein_seo_toolkit.analyzer.error.content_not_configured: "Please configure the codein_seo_toolkit.analyzer.error.analyzer_form_invalid: "Form is invalid. Please contact technical assitance." codein_seo_toolkit.analyzer.error.data_transfered: "Data needed for analysis missing." codein_seo_toolkit.analyzer.error.keywords_required: "Keyword Field is required" +codein_seo_toolkit.analyzer.error.dto_exception: "Collecting data for analysis generated an error" codein_seo_toolkit.analyzer.siteaccess_analyzed: "Siteaccess analyzed" codein_seo_toolkit.analyzer.category.keyword: 'Keyword' codein_seo_toolkit.analyzer.category.lisibility: 'Lisibility' diff --git a/bundle/Resources/translations/codein_seo_toolkit.fr.yaml b/bundle/Resources/translations/codein_seo_toolkit.fr.yaml index 484dd0b..965e878 100644 --- a/bundle/Resources/translations/codein_seo_toolkit.fr.yaml +++ b/bundle/Resources/translations/codein_seo_toolkit.fr.yaml @@ -27,6 +27,7 @@ codein_seo_toolkit.analyzer.error.content_not_configured: "Veuillez configurer l codein_seo_toolkit.analyzer.error.analyzer_form_invalid: "Le formulaire est invalide. Veuillez contacter votre prestataire pour une résolution." codein_seo_toolkit.analyzer.error.data_transfered: "Des données nécessaires pour l'analyse sont manquantes." codein_seo_toolkit.analyzer.error.keywords_required: "Le champ Mot-clé est requis" +codein_seo_toolkit.analyzer.error.dto_exception: "Collecting data for analysis generated an error" codein_seo_toolkit.analyzer.siteaccess_analyzed: "Siteaccess analysé" codein_seo_toolkit.analyzer.category.keyword: 'Mot-clé' codein_seo_toolkit.analyzer.category.lisibility: 'Lisibilité' diff --git a/bundle/Service/XmlProcessingService.php b/bundle/Service/XmlProcessingService.php index 866a6d3..29b0247 100644 --- a/bundle/Service/XmlProcessingService.php +++ b/bundle/Service/XmlProcessingService.php @@ -3,39 +3,33 @@ namespace Codein\IbexaSeoToolkit\Service; use Codein\IbexaSeoToolkit\Model\Field; +use DOMDocument; /** * Class XmlProcessingService. */ final class XmlProcessingService { - public function combineAndProcessXmlFields($fields, $process = true) + public function combineAndProcessXmlFields($fields): DOMDocument { - $xml = ''; + $xmlDocument = new DOMDocument(); + /** @var Field $field */ foreach ($fields as $key => $field) { - $fieldXml = $field->getFieldValue(); - if (0 !== $key) { - $fieldXml = \preg_replace('/^<\?.*\?>(\n)?/', '', $fieldXml); + if (0 === $key) { + $xmlDocument->loadXML($field->getFieldValue()); + } else { + $fieldXMLDocument = new DOMDocument(); + $fieldXMLDocument->loadXML($field->getFieldValue()); + foreach ($fieldXMLDocument->firstChild->childNodes as $childNode) { + $domNode = $xmlDocument->importNode($childNode, true); + $xmlDocument->firstChild->appendChild($domNode); + } } - $xml .= $fieldXml; - } - - if ($process) { - $domDocument = new \DOMDocument(); - $domDocument->loadXML($xml); - - return $this->processDocument($domDocument); } - return $xml; - } - - private function processDocument(\DOMDocument $domDocument) - { - $xmlStr = $domDocument->saveHTML(); - $domDocument = new \DOMDocument('1.0', 'utf-8'); - $domDocument->loadHTML($xmlStr); + $domDocument = new DOMDocument('1.0', 'utf-8'); + $domDocument->loadHTML($xmlDocument->saveHTML()); return $domDocument; } diff --git a/docs/ANALYZERS.md b/docs/ANALYZERS.md index f6a2298..47ad9d5 100644 --- a/docs/ANALYZERS.md +++ b/docs/ANALYZERS.md @@ -1,7 +1,7 @@ Analyzers ====== -The bundle uses a set of analyzers to analyze Rich Text Contents and Content Preview. +The bundle uses a set of analyzers to analyze the page content An analyzer extends the `Codein\IbexaSeoToolkit\Analysis\AbstractAnalyzer` abstract class. @@ -19,22 +19,35 @@ For example, if we want to rank for "Ibexa Bundle", it make sense to use this ke Moreover, another goal is to help content creators to write content which will be read well by both search engines and the customer (a.k.a. *Lisibility analysis*). +### Analyzed content scope + +The content can be analyzed through 3 scopes + +1. `Full source code` : full content preview source code (Ex: analysis of `` metadata and title) +2. `Filtered source code` : filtered version of the preview source code (Ex: count words, count links) + * By default the following tags are removed from the full content preview source code : 'head', 'header', 'footer', + 'script', 'nav', 'aside', 'style', 'xml' + * You can build your own filtered content by subscribing the `Codein\IbexaSeoToolkit\Event\AnalysisDTOEvent` event +3. `Rich text data` : rich text fields content if they are configured for the current content type + * If no rich text fields are configured for the current content type, `Filtered source code` will be used instead. + ### Data analytics Analyzers uses data configured: -* Rich text field (at writing time) -* Content preview (requires at least to save the content to get up to date insight) +* Content preview in full or filtered mode (requires at least to save the content to get up to date insight) +* Rich text field (at writing time) if they are configured for the current content type * Keyword text field (contributed in the content edit view) * Is this a pillar content or not (contributed in the content edit view) ## Analyzers available At the moment there are several available analyzers: + ### Word Count Analyzer * Class: `Codein\IbexaSeoToolkit\Analysis\Analyzers\WordCountAnalyzer` -* Data context: `Rich text data` -* Role : It will count the number of words in all **Rich Text** fields configured. +* Data context: `Filtered source code` or `Rich text data` +* Role : It will count the number of words the configured scope. * Scores : * _Low_ : if text content < 700 words * _Medium_ : if 700 <= text content < 1500 words @@ -44,7 +57,7 @@ At the moment there are several available analyzers: ### Keyword In Titles Analyzer * Class: `Codein\IbexaSeoToolkit\Analysis\Analyzers\KeywordInTitlesAnalyzer` -* Data context: `Rich text data` +* Data context: `Filtered source code` or `Rich text data` * Role : It will check if the keyword is present in (h1|h2|h3|h4|h5|h6) titles. * Scores : * _Low_ : if keyword is present in less than 10% of titles @@ -64,17 +77,53 @@ At the moment there are several available analyzers: ### Title Tag Contains Keyword Analyzer * Class: `Codein\IbexaSeoToolkit\Analysis\Analyzers\TitleTagContainsKeywordAnalyzer` -* Data context: `Content Preview` +* Data context: `Full source code` * Role : It will check if the keyword is present in the title tag (= ``). * Scores : * _Low_ : if no title tag is in the DOM * _Medium_ : if the title text does not contain the keyword * _High_ : if the title text contains the keyword +### Meta description contains Keyword Analyzer + +* Class: `Codein\IbexaSeoToolkit\Analysis\Analyzers\MetaDescriptionContainsKeywordAnalyzer` +* Data context: `Full source code` +* Role : It will check if the keyword is present in the meta description tag. +* Scores : + * _Low_ : if no meta description tag is in the DOM + * _Medium_ : if the meta description text does not contain the keyword + * _High_ : if the meta description text contains the keyword + +### Internal links analyzer + +* Class: `Codein\IbexaSeoToolkit\Analysis\Analyzers\InternalLinksAnalyzer` +* Data context: `Filtered source code` +* Role : It will count the number of internal links in the significant part of the page +* Scores : + * _Low_ : if no internal links + * _Medium_ : if the number of internal ratio is less than 1% of word count + * _High_ : if the number of internal ratio is at least 1% of word count + +Use the `internal_links_hostnames` configuration parameter to set which hostnames to consider as internal links +in absolute href's + +### External links analyzer + +* Class: `Codein\IbexaSeoToolkit\Analysis\Analyzers\OutboundLinksAnalyzer` +* Data context: `Filtered source code` +* Role : It will count the number of external links in the significant part of the page +* Scores : + * _Low_ : if no external links + * _Medium_ : if the number of external ratio is less than 1% of word count + * _High_ : if the number of external ratio is at least 1% of word count + +Use the `internal_links_hostnames` configuration parameter to set which hostnames to consider as internal links +in absolute href's + ### One H1 Tag Maximum Analyzer * Class: `Codein\IbexaSeoToolkit\Analysis\Analyzers\OneH1TagMaximumAnalyzer` -* Data context: `Content Preview` +* Data context: `Full source code` * Role : It will check if the article has exactly one `h1 tag`. * Scores : * _Low_ : No h1 tag or more than one. @@ -90,6 +139,16 @@ By extending AbstractAnalyzer, your service should automatically be tagged : `co If that's not the case, know that it's required for your analysis class to be taken into account. +### Create you own filtered source code scope + +By default some analyzers are using a filtered version of the page source code excluding all `head`, `header`, `footer`, +`script`, `nav`, `aside`, `style`, `xml` tags. + +In order to improve the analysis quality, you might filter the source code your own way (Ex: keep only the `main` tag) + +Subscribe to the `Codein\IbexaSeoToolkit\Event\AnalysisDTOEvent` event and look at the `Codein\IbexaSeoToolkit\EventSubscriber\AnalysisDTOEventSubscriber` +for a working example. + ## That's it! Check out the docs for information on how to use the bundle! [Return to the diff --git a/docs/usage/ANALYZERS.md b/docs/usage/ANALYZERS.md index 68581ae..9039470 100644 --- a/docs/usage/ANALYZERS.md +++ b/docs/usage/ANALYZERS.md @@ -18,6 +18,8 @@ To enable analyzers on a content type, update the configuration of the bundle: codein_ibexa_seo_toolkit: system: default: # siteaccess + internal_links_hostnames: # To improve links analysis, set hostnames to consider as internal absolute links + - myhostname.com analysis: content_types: product: