Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/improve page content retrievement for analysis #13

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
14 changes: 13 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,18 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]
### Added
* SitemapQuery Extensibility point through `Codein\IbexaSeoToolkit\Event\SitemapQueryEvent`
* Analyzers traits for word count and string normalization
* New `internal_links_hostnames` configuration parameter to improve internal links analysis
* `AnalysisDTO` content extensibility point through `Codein\IbexaSeoToolkit\Event\AnalysisDTOEvent`
* `SitemapQuery` extensibility point through `Codein\IbexaSeoToolkit\Event\SitemapQueryEvent`

### Changed
* Refactor and improve links analysis
* Allow analysis based either on
* full preview content (metas, title, etc.)
* "real" page content (page without head, header, footer, etc. )
* richtext fields content
* Fixed richtext fields merging
* Fixed versions requirements in the docs to be consistent.
* Use `ezpublish.api.service.inner_schema_namer` factory instead of the internal schema_namer service (#8)
* Update install documentation
Expand All @@ -23,6 +32,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
* Use `ezplatform` entrypoint for assets import (#9)
* [Admin UI] Fixed toolbar icon path

### Removed
* Removed `XmlProcessingService::combineAndProcessXmlFields($fields, $process = true)` second parameter `$process = true`

## [1.0.0] - 2021-07-09
### Added

Expand Down
81 changes: 81 additions & 0 deletions bundle/Analysis/Analyzers/AbstractLinksAnalyzer.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
<?php declare(strict_types=1);

namespace Codein\IbexaSeoToolkit\Analysis\Analyzers;

use Codein\IbexaSeoToolkit\Analysis\AbstractAnalyzer;
use Codein\IbexaSeoToolkit\Analysis\Analyzers\Traits\WordCountTrait;
use Codein\IbexaSeoToolkit\Analysis\RatioLevels;
use Codein\IbexaSeoToolkit\Helper\SiteAccessConfigResolver;
use Codein\IbexaSeoToolkit\Model\AnalysisDTO;
use Codein\IbexaSeoToolkit\Service\AnalyzerService;
use DOMNodeList;
use DOMXPath;

abstract class AbstractLinksAnalyzer extends AbstractAnalyzer
{
use WordCountTrait;

protected const CATEGORY = 'codein_seo_toolkit.analyzer.category.lisibility';

protected const GOOD_RATIO = 1 / 100;

/** @var AnalyzerService */
protected $analyzerService;

/** @var string[] */
protected $internalHostnames;

public function __construct(AnalyzerService $analyzerService, SiteAccessConfigResolver $siteAccessConfigResolver)
{
$this->analyzerService = $analyzerService;
$this->internalHostnames = $siteAccessConfigResolver->getParameterConfig('internal_links_hostnames');
}

public function analyze(AnalysisDTO $analysisDTO): array
{
$domDocument = $analysisDTO->getContentDOMDocument();
$wordCount = $this->getWordCount($domDocument);
$domxPath = new DOMXPath($domDocument);
$count = $this->getLinksCount($domxPath->query('.//a'));
$ratio = ($wordCount > 0 ? $count / $wordCount : 0);

$status = RatioLevels::LOW;
if ($ratio > 0 && $ratio < self::GOOD_RATIO) {
$status = RatioLevels::MEDIUM;
} elseif ($ratio >= self::GOOD_RATIO) {
$status = RatioLevels::HIGH;
}

return [
self::CATEGORY => [
'status' => $status,
'data' => [
'count' => $count,
'recommended' => \ceil($wordCount / (1 / self::GOOD_RATIO)),
],
],
];
}

protected function hrefIsInternal(string $linkHref): bool
{
if (false !== \mb_strpos($linkHref, 'ezlocation://')) {
return true;
}

$parsed = parse_url($linkHref);
$isInternal = false;
if (\is_array($parsed)) {
$isInternal = true;
if (isset($parsed['scheme'], $parsed['host'])
&& !\in_array($parsed['host'], $this->internalHostnames, true)
) {
$isInternal = false;
}
}

return $isInternal;
}

abstract protected function getLinksCount(DOMNodeList $allLinks): int;
}
77 changes: 7 additions & 70 deletions bundle/Analysis/Analyzers/InternalLinksAnalyzer.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,88 +2,25 @@

namespace Codein\IbexaSeoToolkit\Analysis\Analyzers;

use Codein\IbexaSeoToolkit\Analysis\AbstractAnalyzer;
use Codein\IbexaSeoToolkit\Analysis\RatioLevels;
use Codein\IbexaSeoToolkit\Model\AnalysisDTO;
use Codein\IbexaSeoToolkit\Service\AnalyzerService;
use Codein\IbexaSeoToolkit\Service\XmlProcessingService;
use DOMElement;
use DOMNodeList;

/**
* Class InternalLinksAnalyzer.
*/
final class InternalLinksAnalyzer extends AbstractAnalyzer
final class InternalLinksAnalyzer extends AbstractLinksAnalyzer
{
private const CATEGORY = 'codein_seo_toolkit.analyzer.category.lisibility';

private const GOOD_RATIO = 1 / 100;

/** @var XmlProcessingService */
private $xmlProcessingService;

/** @var AnalyzerService */
private $analyzerService;

public function __construct(AnalyzerService $analyzerService, XmlProcessingService $xmlProcessingService)
protected function getLinksCount(DOMNodeList $allLinks): int
{
$this->xmlProcessingService = $xmlProcessingService;
$this->analyzerService = $analyzerService;
}

public function analyze(AnalysisDTO $analysisDTO): array
{
$fields = $analysisDTO->getFields();

\libxml_use_internal_errors(true);
/** @var \DOMDocument $xml */
try {
$html = $this->xmlProcessingService->combineAndProcessXmlFields($fields);
} catch (\Exception $e) {
return $this->analyzerService->compile(self::CATEGORY, null, null);
}

$htmlText = \strip_tags($html->saveHTML());
$wordCount = \str_word_count($htmlText);

$domxPath = new \DOMXPath($html);
$allLinks = $domxPath->query('.//a');

$count = 0;

/** @var \DOMElement $link */
/** @var DOMElement $link */
foreach ($allLinks as $link) {
$linkHref = $link->getAttribute('href');
// Drop internal links
if (false !== \mb_strpos($linkHref, 'ezlocation://')) {
if (false === \mb_strpos($linkHref, 'mailto:') && $this->hrefIsInternal($linkHref)) {
++$count;
}
}

$ratio = $count / $wordCount;

$status = RatioLevels::LOW;
if ($ratio > 0 && $ratio < self::GOOD_RATIO) {
$status = RatioLevels::MEDIUM;
} elseif ($ratio >= self::GOOD_RATIO) {
$status = RatioLevels::HIGH;
}

return [
self::CATEGORY => [
'status' => $status,
'data' => [
'count' => $count,
'recommended' => \ceil($wordCount / (1 / self::GOOD_RATIO)),
],
],
];
}

public function support(AnalysisDTO $analysisDTO): bool
{
if (0 === \count($analysisDTO->getFields())) {
return false;
}

return true;
return $count;
}
}
31 changes: 8 additions & 23 deletions bundle/Analysis/Analyzers/KeywordInTitlesAnalyzer.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,50 +3,44 @@
namespace Codein\IbexaSeoToolkit\Analysis\Analyzers;

use Codein\IbexaSeoToolkit\Analysis\AbstractAnalyzer;
use Codein\IbexaSeoToolkit\Analysis\Analyzers\Traits\StringNormalizerTrait;
use Codein\IbexaSeoToolkit\Analysis\RatioLevels;
use Codein\IbexaSeoToolkit\Model\AnalysisDTO;
use Codein\IbexaSeoToolkit\Service\AnalyzerService;
use Codein\IbexaSeoToolkit\Service\XmlProcessingService;

/**
* Class KeywordInTitlesAnalyzer.
*/
final class KeywordInTitlesAnalyzer extends AbstractAnalyzer
{
use StringNormalizerTrait;

private const CATEGORY = 'codein_seo_toolkit.analyzer.category.keyword';

/** @var AnalyzerService */
private $analyzerService;
private $xmlProcessingService;

public function __construct(
AnalyzerService $analyzerService,
XmlProcessingService $xmlProcessingService
AnalyzerService $analyzerService
) {
$this->analyzerService = $analyzerService;
$this->xmlProcessingService = $xmlProcessingService;
}

public function analyze(AnalysisDTO $analysisDTO): array
{
$fields = $analysisDTO->getFields();

\libxml_use_internal_errors(true);
/** @var \DOMDocument $xml */
$html = $this->xmlProcessingService->combineAndProcessXmlFields($fields);

$domxPath = new \DOMXPath($html);
$domxPath = new \DOMXPath($analysisDTO->getContentDOMDocument());

$titles = $domxPath->query('//*[self::h1 or self::h2 or self::h3 or self::h4 or self::h5 or self::h6]');

$keywordSynonyms = \explode(',', \strtr(\mb_strtolower($analysisDTO->getKeyword()), AnalyzerService::ACCENT_VALUES));
$keywordSynonyms = \explode(',', $this->normalizeString($analysisDTO->getKeyword()));
$keywordSynonyms = \array_map('trim', $keywordSynonyms);

$numberOfTitles = 0;
$numberOfTitlesContainingKeyword = 0;
foreach ($titles as $title) {
foreach ($keywordSynonyms as $keyword) {
/** @var \DOMElement $title */
$titleLowercase = \strtr(\mb_strtolower($title->textContent), AnalyzerService::ACCENT_VALUES);
$titleLowercase = $this->normalizeString($title->textContent);
if (false !== \mb_strpos($titleLowercase, $keyword)) {
++$numberOfTitlesContainingKeyword;
break;
Expand All @@ -71,13 +65,4 @@ public function analyze(AnalysisDTO $analysisDTO): array
'ratio' => $ratioKeywordInTitle,
]);
}

public function support(AnalysisDTO $analysisDTO): bool
{
if (0 === \count($analysisDTO->getFields())) {
return false;
}

return true;
}
}
10 changes: 9 additions & 1 deletion bundle/Analysis/Analyzers/KeywordInUrlSlugAnalyzer.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
namespace Codein\IbexaSeoToolkit\Analysis\Analyzers;

use Codein\IbexaSeoToolkit\Analysis\AbstractAnalyzer;
use Codein\IbexaSeoToolkit\Analysis\Analyzers\Traits\StringNormalizerTrait;
use Codein\IbexaSeoToolkit\Analysis\RatioLevels;
use Codein\IbexaSeoToolkit\Model\AnalysisDTO;
use Codein\IbexaSeoToolkit\Service\AnalyzerService;
Expand All @@ -16,10 +17,17 @@
*/
final class KeywordInUrlSlugAnalyzer extends AbstractAnalyzer
{
use StringNormalizerTrait;

private const CATEGORY = 'codein_seo_toolkit.analyzer.category.keyword';

/** @var AnalyzerService */
private $analyzerService;

/** @var URLAliasService */
private $urlAliasService;

/** @var LocationService */
private $locationService;

public function __construct(
Expand All @@ -45,7 +53,7 @@ public function analyze(AnalysisDTO $analysisDTO): array
$pathArray = \explode('/', $urlAlias->path);
$urlSlug = \mb_strtolower(\end($pathArray));
$urlSlugWithoutDashes = \str_replace('-', ' ', $urlSlug);
$keywordSynonyms = \explode(',', \strtr(\mb_strtolower($analysisDTO->getKeyword()), AnalyzerService::ACCENT_VALUES));
$keywordSynonyms = \explode(',', $this->normalizeString($analysisDTO->getKeyword()));
$keywordSynonyms = \array_map('trim', $keywordSynonyms);

$bestRatio = 0;
Expand Down
15 changes: 4 additions & 11 deletions bundle/Analysis/Analyzers/KeywordLengthAnalyzer.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,29 +3,22 @@
namespace Codein\IbexaSeoToolkit\Analysis\Analyzers;

use Codein\IbexaSeoToolkit\Analysis\AbstractAnalyzer;
use Codein\IbexaSeoToolkit\Analysis\Analyzers\Traits\StringNormalizerTrait;
use Codein\IbexaSeoToolkit\Analysis\RatioLevels;
use Codein\IbexaSeoToolkit\Model\AnalysisDTO;
use Codein\IbexaSeoToolkit\Service\AnalyzerService;

/**
* Class KeywordLengthAnalyzer.
*/
final class KeywordLengthAnalyzer extends AbstractAnalyzer
{
private const CATEGORY = 'codein_seo_toolkit.analyzer.category.keyword';

/** @var \Codein\IbexaSeoToolkit\Service\AnalyzerService */
private $analyzerService;
use StringNormalizerTrait;

public function __construct(
AnalyzerService $analyzerService
) {
$this->analyzerService = $analyzerService;
}
private const CATEGORY = 'codein_seo_toolkit.analyzer.category.keyword';

public function analyze(AnalysisDTO $analysisDTO): array
{
$keywordSynonyms = \explode(',', \strtr(\mb_strtolower($analysisDTO->getKeyword()), AnalyzerService::ACCENT_VALUES));
$keywordSynonyms = \explode(',', $this->normalizeString($analysisDTO->getKeyword()));
$keywordSynonyms = \array_map('trim', $keywordSynonyms);
$maxCount = 0;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
namespace Codein\IbexaSeoToolkit\Analysis\Analyzers;

use Codein\IbexaSeoToolkit\Analysis\AbstractAnalyzer;
use Codein\IbexaSeoToolkit\Analysis\Analyzers\Traits\StringNormalizerTrait;
use Codein\IbexaSeoToolkit\Analysis\RatioLevels;
use Codein\IbexaSeoToolkit\Model\AnalysisDTO;
use Codein\IbexaSeoToolkit\Service\AnalyzerService;
Expand All @@ -14,6 +15,8 @@
*/
final class MetaDescriptionContainsKeywordAnalyzer extends AbstractAnalyzer
{
use StringNormalizerTrait;

private const CATEGORY = 'codein_seo_toolkit.analyzer.category.keyword';

/** @var \Codein\IbexaSeoToolkit\Service\AnalyzerService */
Expand All @@ -32,16 +35,13 @@ public function __construct(

public function analyze(AnalysisDTO $analysisDTO): array
{
$domDocument = new \DOMDocument();
$domDocument->loadHTML($analysisDTO->getPreviewHtml());

$domxPath = new \DOMXPath($domDocument);
$domxPath = new \DOMXPath($analysisDTO->getHtmlPreviewDOMDocument());

/** @var \DOMNodeList $titleTags */
$metaDescriptionTags = $domxPath->query('//meta[@name="description"]');

try {
$keywordSynonyms = \explode(',', \strtr(\mb_strtolower($analysisDTO->getKeyword()), AnalyzerService::ACCENT_VALUES));
$keywordSynonyms = \explode(',', $this->normalizeString($analysisDTO->getKeyword()));

$keywordSynonyms = \array_map('trim', $keywordSynonyms);
$status = RatioLevels::MEDIUM;
Expand All @@ -51,7 +51,7 @@ public function analyze(AnalysisDTO $analysisDTO): array
/** @var \DOMElement $metaDescriptionTag */
foreach ($metaDescriptionTags as $metaDescriptionTag) {
foreach ($keywordSynonyms as $keyword) {
$contentMetaDescriptionTagAttribute = \strtr(\mb_strtolower($metaDescriptionTag->getAttribute('content')), AnalyzerService::ACCENT_VALUES);
$contentMetaDescriptionTagAttribute = $this->normalizeString($metaDescriptionTag->getAttribute('content'));
if (false !== \mb_strpos($contentMetaDescriptionTagAttribute, $keyword)) {
$status = RatioLevels::HIGH;
break;
Expand Down
Loading