Skip to content

Commit

Permalink
Merge pull request #588 from TomCan/url-transformer
Browse files Browse the repository at this point in the history
Add UrlTransformerService and integrate in LinkifyExtension
  • Loading branch information
tvlooy authored Nov 13, 2024
2 parents cf62bd4 + ca7c2b4 commit 3843db3
Show file tree
Hide file tree
Showing 4 changed files with 218 additions and 15 deletions.
153 changes: 153 additions & 0 deletions src/Service/UrlTransformerService.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
<?php

namespace App\Service;

class UrlTransformerService
{
/*
* Known hostformats.
* key is regex to match host portion of url (parse_url)
* value are <type>[,parameters...] where type denotes the affiliate program later used for the transformation logic
*/
private array $hostFormats = [
'/^(www\.)?amazon\.(com|co\.(jp|uk|za)|com\.(au|be|br|mx|tr)|ae|ca|cn|de|eg|es|fr|ie|in|it|nl|pl|sa|se|sg)$/' => 'amazon',
'/^(www\.)?bol\.com$/' => 'bol',
'/^(www\.)?coolblue\.be$/' => 'awin,85165',
];

private $partnerIds = [];

public function __construct()
{
// Get all partner ids from ENV.
// Multiple ids are supported by space seperating them.
foreach ($_ENV as $key => $value) {
if (0 === strpos($key, 'PARTNER_')) {
$this->partnerIds[strtolower(substr($key, 8))] = array_filter(explode(' ', $value));
}
}
}

public function extractUrls(string $text): array
{
$pattern = '#\bhttps?://[^,\s()<>]+(?:\([\w\d]+\)|([^,[:punct:]\s]|/))#';
if (preg_match_all($pattern, $text, $matches)) {
// remove duplicates
$urls = array_unique($matches[0]);

return $urls;
}

return [];
}

/**
* Replace all urls in input array (as key) with its replacements (values).
*
* Custom function because str_replace could replace occurrences from previous replacements
* or urls that are child/parent urls of other urls that need to be replaced
*/
public function replaceUrls(string $text, array $urls): string
{
// urls contain the original urls as key, and replacements as value
$urlsOnly = array_keys($urls);

// sort by length longest to shortest
usort($urlsOnly, function ($a, $b) {
return strlen($b) - strlen($a);
});

// create index of all positions of urls, where a position can only be taken by the longest url (child/parent)
$byUrl = [];
$byPosition = [];
foreach ($urlsOnly as $url) {
$byUrl[$url] = [];
// get first match
$position = strpos($text, $url, 0);
while (false !== $position) {
if (!isset($byPosition[$position])) {
// position not already matched with longer url
$byUrl[$url][] = $position;
$byPosition[$position] = $url;
}
// find next occurrence
$position = strpos($text, $url, $position + strlen($url));
}
}

// start replacements back to front to not mess up earlier positions
krsort($byPosition, SORT_NUMERIC);
foreach ($byPosition as $position => $url) {
$text = substr($text, 0, $position).
$urls[$url].
substr($text, $position + strlen($url))
;
}

return $text;
}

public function transformUrl(string $url): string
{
// parse URL into parts
$urlParts = parse_url($url);

// find matching hostpattern
$matchedFormat = '';
foreach ($this->hostFormats as $hostFormat => $key) {
if (preg_match($hostFormat, $urlParts['host'])) {
$matchedFormat = $key;
break;
}
}

if ($matchedFormat) {
// split by comma. Shift first element off and use as key to identify type of link.
$params = explode(',', $matchedFormat);
$key = array_shift($params);
// have we configured a partner id for this program?
if (isset($this->partnerIds[$key][0]) && $this->partnerIds[$key][0]) {
if (count($this->partnerIds[$key]) > 1) {
// select random id from array
$partnerId = $this->partnerIds[$key][array_rand($this->partnerIds[$key])];
} else {
$partnerId = $this->partnerIds[$key][0];
}

switch ($key) {
case 'amazon':
// append id as tag parameter
if (isset($urlParts['query'])) {
$url .= '&tag='.$partnerId;
} else {
$url .= '?tag='.$partnerId;
}
break;

case 'bol':
// generate text link to partner program and append original URL encoded
$url = 'https://partner.bol.com/click/click?p=1&t=url&s='.$partnerId.'&f=TXL&url='.urlencode($url);
break;

case 'tradetracker':
// params[0] should contain campaignid, append original URL encoded
$url = 'https://tc.tradetracker.net/?c='.$params[0].'&m=12&a='.$partnerId.'&r=&u='.urlencode($urlParts['path']);
if (isset($urlParts['query'])) {
$url .= urlencode('?'.$urlParts['query']);
}
break;

case 'awin':
// params[0] should contain merchantid, append original URL encoded
$url = 'https://www.awin1.com/cread.php?awinmid='.$params[0].'&awinaffid='.$this->partnerIds[$key].'&ued='.urlencode($url);
break;

default:
// No matching format
}
}
}

return $url;
}
}
26 changes: 13 additions & 13 deletions src/Twig/LinkifyExtension.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,16 @@

namespace App\Twig;

use App\Service\UrlTransformerService;
use Twig\Extension\AbstractExtension;
use Twig\TwigFilter;

class LinkifyExtension extends AbstractExtension
{
public function __construct(private UrlTransformerService $urlTransformerService)
{
}

public function getFilters(): array
{
return [
Expand All @@ -21,19 +26,14 @@ public function linkifyFilter($html)
return $html;
}

// Selects all urls starting with ://
$html = preg_replace(
'~[[:alpha:]]+://[^<>[:space:]]+[[:alnum:]/=!\?,]~',
'<a href="\\0" target="_blank" rel="noopener noreferrer">\\0</a>',
$html
);

// Selects all urls starting with www. but do not start with ://
$html = preg_replace(
'~(?<!://)www.[^<>[:space:]]+[[:alnum:]/=!\?,]~',
'<a href="http://\\0" target="_blank" rel="noopener noreferrer">\\0</a>',
$html
);
// extract, transform, create links and replace
$urls = $this->urlTransformerService->extractUrls($html);
$replacements = [];
foreach ($urls as $url) {
$replacement = $this->urlTransformerService->transformUrl($url);
$replacements[$url] = '<a href="'.$replacement.'" target="_blank" rel="noopener noreferrer">'.$url.'</a>';
}
$html = $this->urlTransformerService->replaceUrls($html, $replacements);

return $html;
}
Expand Down
49 changes: 49 additions & 0 deletions tests/php/unit/Service/UrlTransformerTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
<?php

namespace App\Tests\Unit\Service;

use App\Service\UrlTransformerService;
use PHPUnit\Framework\TestCase;

class UrlTransformerTest extends TestCase
{
// Simple party with simple excludes
public function testUrlExtraction()
{
$urlTransformer = new UrlTransformerService();

$urls = $urlTransformer->extractUrls('https://www.test.com');
$this->assertEquals(1, count($urls));
$this->assertEquals('https://www.test.com', $urls[0]);
}

public function testAmazonUrlExtraction()
{
$_ENV['PARTNER_AMAZON'] = 'abc-123';
$urlTransformer = new UrlTransformerService();

$url = $urlTransformer->transformUrl('https://www.amazon.com/Zmart-Funny-Christmas-Coworkers-Secret/dp/B0CC1S12S3');
$this->assertEquals('https://www.amazon.com/Zmart-Funny-Christmas-Coworkers-Secret/dp/B0CC1S12S3?tag=abc-123', $url);

$url = $urlTransformer->transformUrl('https://www.amazon.com/Zmart-Funny-Christmas-Coworkers-Secret/dp/B0CC1S12S3?crid=123456789');
$this->assertEquals('https://www.amazon.com/Zmart-Funny-Christmas-Coworkers-Secret/dp/B0CC1S12S3?crid=123456789&tag=abc-123', $url);
}

public function testurlReplacement()
{
$urlTransformer = new UrlTransformerService();

$html = 'Multiple links to https://www.tom.be, again https://www.tom.be and a child https://www.tom.be/zeb and a child https://www.tom.be/arne';
$replacements = [
'https://www.tom.be' => 'https://www.tom.be/zeb',
'https://www.tom.be/zeb' => 'https://www.tom.be/Zeb',
'https://www.tom.be/arne' => 'https://www.tom.be/Arne',
];
// It should not replace the /zeb instances that are the result of the replacements of https://www.tom.be.
// It should also not replace https://www.tom.be in any of the other child urls, but instead replace them with the capital names.
$expectedHtml = 'Multiple links to https://www.tom.be/zeb, again https://www.tom.be/zeb and a child https://www.tom.be/Zeb and a child https://www.tom.be/Arne';

$actual = $urlTransformer->replaceUrls($html, $replacements);
$this->assertEquals($expectedHtml, $actual);
}
}
5 changes: 3 additions & 2 deletions tests/php/unit/Twig/LinkifyExtensionTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

namespace App\Tests\Unit\Twig;

use App\Service\UrlTransformerService;
use App\Twig\LinkifyExtension;
use PHPUnit\Framework\TestCase;

Expand All @@ -14,7 +15,7 @@ class LinkifyExtensionTest extends TestCase
*/
public function itShouldWrapAnHtmlLinkInAnAnchorTag($rawHtml, $anchorHtml)
{
$filter = new LinkifyExtension();
$filter = new LinkifyExtension(new UrlTransformerService());

$this->assertEquals($anchorHtml, $filter->linkifyFilter($rawHtml));
}
Expand All @@ -36,7 +37,7 @@ public function htmlProvider()
],
[
'www.test.com',
'<a href="http://www.test.com" target="_blank" rel="noopener noreferrer">www.test.com</a>',
'www.test.com',
],
[
'http://test',
Expand Down

0 comments on commit 3843db3

Please sign in to comment.