From 7e78cb35b8a0870eaf3c29575c82f7cac1c493d0 Mon Sep 17 00:00:00 2001 From: Lars Moelleken Date: Sat, 23 Nov 2019 03:55:43 +0100 Subject: [PATCH 1/3] [+]: updates from my fork - support for more unicode chars - language-specific stop-words (disabled by default) - currency support - php7 types --- URLify.php | 556 ++++++++++++++++++++++++++++++++++++------- composer.json | 3 +- tests/URLifyTest.php | 12 +- 3 files changed, 475 insertions(+), 96 deletions(-) diff --git a/URLify.php b/URLify.php index fdf0f43..90835e8 100644 --- a/URLify.php +++ b/URLify.php @@ -1,20 +1,15 @@ An array of things that should replaced by the separator.

+ * @param bool $merge

Keep the previous (default) array-to-separator array.

+ */ + public static function add_array_to_separator(array $array, bool $merge = true) + { + if ($merge === true) { + self::$arrayToSeparator = \array_unique( + \array_merge( + self::$arrayToSeparator, + $array + ) + ); + } else { + self::$arrayToSeparator = $array; + } + } + + /** + * Add new characters to the list. `$map` should be a hash. + * + * @param array $map * @param string|null $language - */ - public static function add_chars ($map, string $language = null) + */ + public static function add_chars(array $map, string $language = null) { - $language_key = $language ?? uniqid('urlify', true); + $language_key = $language ?? \uniqid('urlify', true); if (isset(self::$maps[$language_key])) { - self::$maps[$language_key] = array_merge($map, self::$maps[$language_key]); + self::$maps[$language_key] = \array_merge($map, self::$maps[$language_key]); } else { self::$maps[$language_key] = $map; } - } + } - /** - * Append words to the remove list. Accepts either single words - * or an array of words. - * @param mixed $words - */ - public static function remove_words ($words) - { - $words = is_array ($words) ? $words : array ($words); - self::$remove_list = array_unique (array_merge (self::$remove_list, $words)); - } - - /** - * Transliterates characters to their ASCII equivalents. + /** + * Transliterates characters to their ASCII equivalents. * $language specifies a priority for a specific language. * The latter is useful if languages have different rules for the same character. - * @param string $text - * @param string $language + * + * @param string $string

The input string.

+ * @param string $language

Your primary language.

+ * @param string $unknown

Character use if character unknown. (default is ?).

+ * * @return string - */ - public static function downcode ($text, $language = "") - { + */ + public static function downcode( + string $string, + string $language = 'en', + string $unknown = '' + ): string { + + $string = self::expandString($string, $language); + foreach (self::$maps as $mapsInner) { foreach ($mapsInner as $orig => $replace) { - $text = str_replace($orig, $replace, $text); + $string = \str_replace($orig, $replace, $string); } } $langSpecific = \voku\helper\ASCII::charsArrayWithOneLanguage($language, true); if (!empty($langSpecific)) { - $text = str_replace( + $string = \str_replace( $langSpecific['orig'], $langSpecific['replace'], - $text + $string ); } + foreach (\voku\helper\ASCII::charsArrayWithMultiLanguageValues(true) as $replace => $orig) { - $text = str_replace($orig, $replace, $text); + $string = \str_replace($orig, $replace, $string); } - return $text; - } + return \voku\helper\ASCII::to_transliterate($string, $unknown, false); + } - /** - * Filters a string, e.g., "Petty theft" to "petty-theft" - * @param string $text The text to return filtered - * @param int $length The length (after filtering) of the string to be returned - * @param string $language The transliteration language, passed down to downcode() - * @param bool $file_name Whether there should be and additional filter considering this is a filename - * @param bool $use_remove_list Whether you want to remove specific elements previously set in self::$remove_list - * @param bool $lower_case Whether you want the filter to maintain casing or lowercase everything (default) - * @param bool $treat_underscore_as_space Treat underscore as space, so it will replaced with "-" + /** + * Convert a String to URL. + * + * e.g.: "Petty
theft" to "Petty-theft" + * + * @param string $string

The text you want to convert.

+ * @param int $maxLength

Max. length of the output string, set to "0" (zero) to + * disable it

+ * @param string $language

The language you want to convert to.

+ * @param bool $fileName

+ * Keep the "." from the extension e.g.: "imaäe.jpg" => + * "image.jpg" + *

+ * @param bool $removeWords

+ * Remove some "words" from the string.
+ * Info: Set extra words via remove_words(). + *

+ * @param bool $strToLower

Use strtolower() at the end.

+ * @param bool|string $separator

Define a new separator for the words.

+ * * @return string - */ - public static function filter ($text, $length = 60, $language = "", $file_name = false, $use_remove_list = true, $lower_case = true, $treat_underscore_as_space = true) + */ + public static function filter( + string $string, + int $maxLength = 200, + string $language = 'en', + bool $fileName = false, + bool $removeWords = false, + bool $strToLower = true, + $separator = '-' + ): string { + if ($string === '') { + return ''; + } + + // fallback + if ($language === '') { + $language = 'en'; + } + + // separator-fallback + if ($separator === false) { + $separator = '_'; + } + if ($separator === true || $separator === '') { + $separator = '-'; + } + + // escaped separator + $separatorEscaped = \preg_quote($separator, '/'); + + // use defaults, if there are no values + if (self::$arrayToSeparator === []) { + self::reset_array_to_separator(); + } + + // remove apostrophes which are not used as quotes around a string + $stringTmp = \preg_replace("/(\w)'(\w)/u", '${1}${2}', $string); + if ($stringTmp !== null) { + $string = (string) $stringTmp; + } + + // replace with $separator + // + + // remove all other html-tags + $string = \strip_tags( + (string) \preg_replace( + self::$arrayToSeparator, + $separator, + $string + ) + ); + + // use special language replacer + $string = self::downcode($string, $language); + + // replace with $separator, again + $string = (string) \preg_replace( + self::$arrayToSeparator, + $separator, + $string + ); + + // remove all these words from the string before urlifying + $removeWordsSearch = '//'; + if ($removeWords === true) { + $removeList = self::get_remove_list($language); + if ($removeList !== []) { + $removeWordsSearch = '/\b(?:' . \implode('|', $removeList) . ')\b/ui'; + } + } + + // keep the "." from e.g.: a file-extension? + if ($fileName) { + $removePatternAddOn = '.'; + } else { + $removePatternAddOn = ''; + } + + $string = (string) \preg_replace( + [ + '/[^' . $separatorEscaped . $removePatternAddOn . '\-a-zA-Z0-9\s]/u', + // 1) remove un-needed chars + '/[\s]+/u', + // 2) convert spaces to $separator + $removeWordsSearch, + // 3) remove some extras words + '/[' . ($separatorEscaped ?: ' ') . ']+/u', + // 4) remove double $separator's + '/[' . ($separatorEscaped ?: ' ') . ']+$/u', + // 5) remove $separator at the end + ], + [ + '', + $separator, + '', + $separator, + '', + ], + $string + ); + + // "substr" only if "$length" is set + if ( + $maxLength + && + $maxLength > 0 + && + \strlen($string) > $maxLength + ) { + $string = (string) \substr(\trim($string, $separator), 0, $maxLength); + } + + // convert to lowercase + if ($strToLower === true) { + $string = \strtolower($string); + } + + // trim "$separator" from beginning and end of the string + return \trim($string, $separator); + } + + /** + * Append words to the remove list. Accepts either single words or an array of words. + * + * @param string|string[] $words + * @param string $language + * @param bool $merge

Keep the previous (default) remove-words array.

+ */ + public static function remove_words($words, string $language = 'en', bool $merge = true) { - $text = self::downcode ($text,$language); - - if ($use_remove_list) { - // remove all these words from the string before urlifying - $text = preg_replace ('/\b(' . implode ('|', self::$remove_list) . ')\b/i', '', $text); - } - - // if downcode doesn't hit, the char will be stripped here - $remove_pattern = ($file_name) ? '/[^_\-.\-a-zA-Z0-9\s]/u' : '/[^\s_\-a-zA-Z0-9]/u'; - $text = preg_replace ($remove_pattern, '', $text); // remove unneeded chars - if ($treat_underscore_as_space) { - $text = str_replace ('_', ' ', $text); // treat underscores as spaces - } - $text = preg_replace ('/^\s+|\s+$/u', '', $text); // trim leading/trailing spaces - $text = preg_replace ('/[-\s]+/u', '-', $text); // convert spaces to hyphens - if ($lower_case) { - $text = strtolower ($text); // convert to lowercase - } - - return trim (substr ($text, 0, $length), '-'); // trim to first $length chars - } - - /** - * Alias of `URLify::downcode()`. - */ - public static function transliterate ($text) + if (\is_array($words) === false) { + $words = [$words]; + } + + /** @noinspection ForeachSourceInspection */ + foreach ($words as $removeWordKey => $removeWord) { + $words[$removeWordKey] = \preg_quote($removeWord, '/'); + } + + if ($merge === true) { + self::$remove_list[$language] = \array_unique( + \array_merge( + self::get_remove_list($language), + $words + ) + ); + } else { + self::$remove_list[$language] = $words; + } + } + + /** + * Reset the internal "self::$arrayToSeparator" to the default values. + */ + public static function reset_array_to_separator() { - return self::downcode ($text); - } + self::$arrayToSeparator = [ + '/"|&|<|>|–|—/i', // ", &, <, >, –, — + '/⁻|-|—|_|"|`|´|\'/', + "#/\r\n|\r|\n|#isU", + ]; + } + + /** + * @param string $language + * + * @return string + */ + private static function get_language_for_reset_remove_list(string $language): string + { + if ($language === '') { + return ''; + } + + if ( + \strpos($language, '_') === false + && + \strpos($language, '-') === false + ) { + $language = \strtolower($language); + } else { + $regex = '/(?[a-z]{2}).*/i'; + $language = \strtolower((string) \preg_replace($regex, '$1', $language)); + } + + return $language; + } + + /** + * reset the word-remove-array + * + * @param string $language + */ + public static function reset_remove_list(string $language = 'en') + { + if ($language === '') { + return; + } + + $language_orig = $language; + $language = self::get_language_for_reset_remove_list($language); + if ($language === '') { + return; + } + + $stopWords = new \voku\helper\StopWords(); + + try { + self::$remove_list[$language_orig] = $stopWords->getStopWordsFromLanguage($language); + } catch (\voku\helper\StopWordsLanguageNotExists $e) { + self::$remove_list[$language_orig] = []; + } + } + + /** + * Alias of `URLify::downcode()`. + * + * @param string $string + * @param string $language + * + * @return string + */ + public static function transliterate(string $string, string $language = 'en'): string + { + return self::downcode($string, $language); + } + + /** + * Expands the given string replacing some special parts for words. + * e.g. "lorem@ipsum.com" is replaced by "lorem at ipsum dot com". + * + * Most of these transformations have been inspired by the pelle/slugger + * project, distributed under the Eclipse Public License. + * Copyright 2012 Pelle Braendgaard + * + * @param string $string The string to expand + * @param string $language + * + * @return string The result of expanding the string + */ + protected static function expandString(string $string, string $language = 'en'): string + { + $string = self::expandCurrencies($string, $language); + + return self::expandSymbols($string, $language); + } + + /** + * Expands the numeric currencies in euros, dollars, pounds + * and yens that the given string may include. + * + * @param string $string + * @param string $language + * + * @return string + */ + private static function expandCurrencies(string $string, string $language = 'en'): string + { + if ($language === 'de') { + return (string) \preg_replace( + [ + '/(?:\s|^)(\d+)(?: )*€(?:\s|$)/', + '/(?:\s|^)\$(?: )*(\d+)(?:\s|$)/', + '/(?:\s|^)£(?: )*(\d+)(?:\s|$)/', + '/(?:\s|^)¥(?: )*(\d+)(?:\s|$)/', + '/(?:\s|^)(\d+)[.|,](\d+)(?: )*€(?:\s|$)/', + '/(?:\s|^)\$(?: )*(\d+)[.|,](\d+)(?:\s|$)/', + '/(?:\s|^)£(?: )*(\d+)[.|,](\d+)(?:\s|$)/', + ], + [ + ' \1 Euro ', + ' \1 Dollar ', + ' \1 Pound ', + ' \1 Yen ', + ' \1 Euro \2 Cent ', + ' \1 Dollar \2 Cent ', + ' \1 Pound \2 Pence ', + ], + $string + ); + } + + return (string) \preg_replace( + [ + '/(?:\s|^)1(?: )*€(?:\s|$)/', + '/(?:\s|^)(\d+)(?: )*€(?:\s|$)/', + '/(?:\s|^)\$(?: )*1(?:\s|$)/', + '/(?:\s|^)\$(?: )*(\d+)(?:\s|$)/', + '/(?:\s|^)£(?: )*1(?:\s|$)/', + '/(?:\s|^)£(?: )*(\d+)(?:\s|$)/', + '/(?:\s|^)¥(?: )*(\d+)(?:\s|$)/', + '/(?:\s|^)1[.|,](\d+)(?: )*€(?:\s|$)/', + '/(?:\s|^)(\d+)[.|,](\d+)(?: )*€(?:\s|$)/', + '/(?:\s|^)1[.|,](\d+)(?: )*$(?:\s|$)/', + '/(?:\s|^)\$(?: )*(\d+)[.|,](\d+)(?:\s|$)/', + '/(?:\s|^)1[.|,](\d+)(?: )*£(?:\s|$)/', + '/(?:\s|^)£(?: )*(\d+)[.|,](\d+)(?:\s|$)/', + ], + [ + ' 1 Euro ', + ' \1 Euros ', + ' 1 Dollar ', + ' \1 Dollars ', + ' 1 Pound ', + ' \1 Pounds ', + ' \1 Yen ', + ' 1 Euros \1 Cents ', + ' \1 Euros \2 Cents ', + ' 1 Dollars \1 Cents ', + ' \1 Dollars \2 Cents ', + ' 1 Pounds \1 Pence ', + ' \1 Pounds \2 Pence ', + ], + $string + ); + } + + /** + * Expands the special symbols that the given string may include, such as '@', '.', '#' and '%'. + * + * @param string $string + * @param string $language + * + * @return string + */ + private static function expandSymbols(string $string, string $language = 'en'): string + { + $maps = \voku\helper\ASCII::charsArray(true); + + return (string) \preg_replace( + [ + '/\s*©\s*/', + '/\s*®\s*/', + '/\s*@\s*/', + '/\s*&\s*/', + '/\s*%\s*/', + '/(\s*=\s*)/', + ], + [ + $maps['latin_symbols']['©'], + $maps['latin_symbols']['®'], + $maps['latin_symbols']['@'], + $maps[$language]['&'] ?? '&', + $maps[$language]['%'] ?? '%', + $maps[$language]['='] ?? '=', + ], + $string + ); + } + + /** + * return the "self::$remove_list[$language]" array + * + * @param string $language + * + * @return array + */ + private static function get_remove_list(string $language = 'en'): array + { + // check for language + if ($language === '') { + return []; + } + + // set remove-array + if (!isset(self::$remove_list[$language])) { + self::reset_remove_list($language); + } + + // check for array + if ( + !isset(self::$remove_list[$language]) + || + empty(self::$remove_list[$language]) + ) { + return []; + } + + return self::$remove_list[$language]; + } } diff --git a/composer.json b/composer.json index ad51983..3fcfaa1 100644 --- a/composer.json +++ b/composer.json @@ -14,7 +14,8 @@ ], "require": { "php": ">=7.0.0", - "voku/portable-ascii": "^1.3" + "voku/portable-ascii": "^1.3", + "voku/stop-words": "^2.0" }, "require-dev": { "phpunit/phpunit": "~6.0 || ~7.0" diff --git a/tests/URLifyTest.php b/tests/URLifyTest.php index a9f626e..4f49fcd 100644 --- a/tests/URLifyTest.php +++ b/tests/URLifyTest.php @@ -14,10 +14,10 @@ function test_filter () { $this->assertEquals ('jetudie-le-francais', URLify::filter (' J\'étudie le français ')); $this->assertEquals ('lo-siento-no-hablo-espanol', URLify::filter ('Lo siento, no hablo español.')); $this->assertEquals ('fkspws', URLify::filter ('ΦΞΠΏΣ')); - $this->assertEquals ('', URLify::filter('大般若經')); - $this->assertEquals ('test-.txt', URLify::filter('test-大般若經.txt', 60, "", $file_name = true)); + $this->assertEquals ('da-ban-ruo-jing', URLify::filter('大般若經')); + $this->assertEquals ('test-da-ban-ruo-jing-.txt', URLify::filter('test-大般若經.txt', 60, "", $file_name = true)); $this->assertEquals ('yakrhy-ltoytr', URLify::filter('ياكرهي لتويتر')); - $this->assertEquals ('saaat', URLify::filter('ساعت ۲۵')); + $this->assertEquals ('saaat-25', URLify::filter('ساعت ۲۵')); $this->assertEquals ('foto.jpg', URLify::filter ('фото.jpg', 60, "", $file_name = true)); // priorization of language-specific maps $this->assertEquals ('aouaou', URLify::filter ('ÄÖÜäöü',60,"tr")); @@ -25,13 +25,13 @@ function test_filter () { $this->assertEquals ('bobby-mcferrin-dont-worry-be-happy', URLify::filter ("Bobby McFerrin — Don't worry be happy",600,"en")); // test stripping and conversion of UTF-8 spaces - $this->assertEquals ('test-mahito-mukai', URLify::filter('向井 真人test (Mahito Mukai)')); + $this->assertEquals ('xiang-jing-zhen-ren-test-mahito-mukai', URLify::filter('向井 真人test (Mahito Mukai)')); // Treat underscore as space $this->assertEquals ('text_with_underscore', URLify::filter('text_with_underscore', 60, "en", true, true, true, false)); } function test_add_chars () { - $this->assertEquals ('¿ (r) ¼ ¼ ¾ ¶', URLify::downcode ('¿ ® ¼ ¼ ¾ ¶')); + $this->assertEquals ('? (r) 1/4 1/4 3/4 P', URLify::downcode ('¿ ® ¼ ¼ ¾ ¶')); URLify::add_chars (array ( '¿' => '?', '®' => '(r)', '¼' => '1/4', '¼' => '1/2', '¾' => '3/4', '¶' => 'P' @@ -42,7 +42,7 @@ function test_add_chars () { function test_remove_words () { $this->assertEquals ('foo-bar', URLify::filter ('foo bar')); URLify::remove_words (array ('foo', 'bar')); - $this->assertEquals ('', URLify::filter ('foo bar')); + $this->assertEquals ('', URLify::filter ('foo bar', 200, 'en', false, true)); } function test_unknown_language_code () { From ace9bf9ab7a635ca3aee9c53024d31a2f25c5cab Mon Sep 17 00:00:00 2001 From: Lars Moelleken Date: Sat, 23 Nov 2019 04:41:32 +0100 Subject: [PATCH 2/3] [+]: add tests --- .editorconfig | 805 ++++++++++++++++++ .gitattributes | 6 + .travis.yml | 2 + URLify.php | 9 +- phpcs.php_cs | 238 ++++++ phpstan.neon | 8 + scripts/downcode.php | 27 +- scripts/filter.php | 27 +- scripts/transliterate.php | 27 +- tests/BaseSluggerTest.php | 110 +++ tests/SeoSluggerTest.php | 20 + tests/SeoUtf8SluggerTest.php | 21 + tests/SluggerTest.php | 32 + tests/URLifyOrigTest.php | 67 ++ tests/URLifyTest.php | 503 +++++++++-- tests/Utf8SluggerTest.php | 26 + tests/Utf8UrlSlugTest.php | 77 ++ tests/bootstrap.php | 11 +- tests/fixtures/urlify/expected/arabic.txt | 17 + tests/fixtures/urlify/expected/hebrew.txt | 6 + .../fixtures/urlify/expected/iso-8859-1-1.txt | 12 + tests/fixtures/urlify/expected/iso-8859-1.txt | 12 + .../fixtures/urlify/expected/iso-8859-2-1.txt | 12 + tests/fixtures/urlify/expected/iso-8859-2.txt | 12 + .../fixtures/urlify/expected/iso-8859-3-1.txt | 12 + tests/fixtures/urlify/expected/iso-8859-3.txt | 12 + .../fixtures/urlify/expected/iso-8859-4-1.txt | 12 + tests/fixtures/urlify/expected/iso-8859-4.txt | 12 + tests/fixtures/urlify/expected/japanese.txt | 167 ++++ tests/fixtures/urlify/expected/pangrams-1.txt | 26 + tests/fixtures/urlify/expected/pangrams.txt | 26 + .../urlify/expected/sample-unicode-chart.txt | 84 ++ .../urlify/expected/sample-utf-8-bom.txt | 179 ++++ tests/fixtures/urlify/expected/strings-1.txt | 20 + tests/fixtures/urlify/expected/strings-2.txt | 15 + tests/fixtures/urlify/expected/strings-3.txt | 15 + tests/fixtures/urlify/input/arabic.txt | 17 + tests/fixtures/urlify/input/hebrew.txt | 6 + tests/fixtures/urlify/input/iso-8859-1-1.txt | 12 + tests/fixtures/urlify/input/iso-8859-1.txt | 12 + tests/fixtures/urlify/input/iso-8859-2-1.txt | 12 + tests/fixtures/urlify/input/iso-8859-2.txt | 12 + tests/fixtures/urlify/input/iso-8859-3-1.txt | 12 + tests/fixtures/urlify/input/iso-8859-3.txt | 12 + tests/fixtures/urlify/input/iso-8859-4-1.txt | 12 + tests/fixtures/urlify/input/iso-8859-4.txt | 12 + tests/fixtures/urlify/input/japanese.txt | 167 ++++ tests/fixtures/urlify/input/pangrams-1.txt | 26 + tests/fixtures/urlify/input/pangrams.txt | 26 + .../urlify/input/sample-unicode-chart.txt | 84 ++ .../urlify/input/sample-utf-8-bom.txt | 179 ++++ tests/fixtures/urlify/input/strings-1.txt | 20 + tests/fixtures/urlify/input/strings-2.txt | 15 + tests/fixtures/urlify/input/strings-3.txt | 15 + tests/profile.php | 8 + 55 files changed, 3227 insertions(+), 99 deletions(-) create mode 100644 .editorconfig create mode 100644 .gitattributes create mode 100644 phpcs.php_cs create mode 100644 phpstan.neon create mode 100644 tests/BaseSluggerTest.php create mode 100644 tests/SeoSluggerTest.php create mode 100644 tests/SeoUtf8SluggerTest.php create mode 100644 tests/SluggerTest.php create mode 100644 tests/URLifyOrigTest.php create mode 100644 tests/Utf8SluggerTest.php create mode 100644 tests/Utf8UrlSlugTest.php create mode 100644 tests/fixtures/urlify/expected/arabic.txt create mode 100644 tests/fixtures/urlify/expected/hebrew.txt create mode 100644 tests/fixtures/urlify/expected/iso-8859-1-1.txt create mode 100644 tests/fixtures/urlify/expected/iso-8859-1.txt create mode 100644 tests/fixtures/urlify/expected/iso-8859-2-1.txt create mode 100644 tests/fixtures/urlify/expected/iso-8859-2.txt create mode 100644 tests/fixtures/urlify/expected/iso-8859-3-1.txt create mode 100644 tests/fixtures/urlify/expected/iso-8859-3.txt create mode 100644 tests/fixtures/urlify/expected/iso-8859-4-1.txt create mode 100644 tests/fixtures/urlify/expected/iso-8859-4.txt create mode 100644 tests/fixtures/urlify/expected/japanese.txt create mode 100644 tests/fixtures/urlify/expected/pangrams-1.txt create mode 100644 tests/fixtures/urlify/expected/pangrams.txt create mode 100644 tests/fixtures/urlify/expected/sample-unicode-chart.txt create mode 100644 tests/fixtures/urlify/expected/sample-utf-8-bom.txt create mode 100644 tests/fixtures/urlify/expected/strings-1.txt create mode 100644 tests/fixtures/urlify/expected/strings-2.txt create mode 100644 tests/fixtures/urlify/expected/strings-3.txt create mode 100644 tests/fixtures/urlify/input/arabic.txt create mode 100644 tests/fixtures/urlify/input/hebrew.txt create mode 100644 tests/fixtures/urlify/input/iso-8859-1-1.txt create mode 100644 tests/fixtures/urlify/input/iso-8859-1.txt create mode 100644 tests/fixtures/urlify/input/iso-8859-2-1.txt create mode 100644 tests/fixtures/urlify/input/iso-8859-2.txt create mode 100644 tests/fixtures/urlify/input/iso-8859-3-1.txt create mode 100644 tests/fixtures/urlify/input/iso-8859-3.txt create mode 100644 tests/fixtures/urlify/input/iso-8859-4-1.txt create mode 100644 tests/fixtures/urlify/input/iso-8859-4.txt create mode 100644 tests/fixtures/urlify/input/japanese.txt create mode 100644 tests/fixtures/urlify/input/pangrams-1.txt create mode 100644 tests/fixtures/urlify/input/pangrams.txt create mode 100644 tests/fixtures/urlify/input/sample-unicode-chart.txt create mode 100644 tests/fixtures/urlify/input/sample-utf-8-bom.txt create mode 100644 tests/fixtures/urlify/input/strings-1.txt create mode 100644 tests/fixtures/urlify/input/strings-2.txt create mode 100644 tests/fixtures/urlify/input/strings-3.txt create mode 100644 tests/profile.php diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..ab93fd4 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,805 @@ +[*] +charset = utf-8 +end_of_line = lf +indent_size = 4 +indent_style = space +insert_final_newline = false +max_line_length = 120 +tab_width = 4 +ij_continuation_indent_size = 8 +ij_formatter_off_tag = @formatter:off +ij_formatter_on_tag = @formatter:on +ij_formatter_tags_enabled = false +ij_smart_tabs = false +ij_wrap_on_typing = false + +[*.blade.php] +ij_blade_keep_indents_on_empty_lines = false + +[*.css] +ij_css_align_closing_brace_with_properties = false +ij_css_blank_lines_around_nested_selector = 1 +ij_css_blank_lines_between_blocks = 1 +ij_css_brace_placement = 0 +ij_css_enforce_quotes_on_format = false +ij_css_hex_color_long_format = false +ij_css_hex_color_lower_case = false +ij_css_hex_color_short_format = false +ij_css_hex_color_upper_case = false +ij_css_keep_blank_lines_in_code = 2 +ij_css_keep_indents_on_empty_lines = false +ij_css_keep_single_line_blocks = false +ij_css_properties_order = font,font-family,font-size,font-weight,font-style,font-variant,font-size-adjust,font-stretch,line-height,position,z-index,top,right,bottom,left,display,visibility,float,clear,overflow,overflow-x,overflow-y,clip,zoom,align-content,align-items,align-self,flex,flex-flow,flex-basis,flex-direction,flex-grow,flex-shrink,flex-wrap,justify-content,order,box-sizing,width,min-width,max-width,height,min-height,max-height,margin,margin-top,margin-right,margin-bottom,margin-left,padding,padding-top,padding-right,padding-bottom,padding-left,table-layout,empty-cells,caption-side,border-spacing,border-collapse,list-style,list-style-position,list-style-type,list-style-image,content,quotes,counter-reset,counter-increment,resize,cursor,user-select,nav-index,nav-up,nav-right,nav-down,nav-left,transition,transition-delay,transition-timing-function,transition-duration,transition-property,transform,transform-origin,animation,animation-name,animation-duration,animation-play-state,animation-timing-function,animation-delay,animation-iteration-count,animation-direction,text-align,text-align-last,vertical-align,white-space,text-decoration,text-emphasis,text-emphasis-color,text-emphasis-style,text-emphasis-position,text-indent,text-justify,letter-spacing,word-spacing,text-outline,text-transform,text-wrap,text-overflow,text-overflow-ellipsis,text-overflow-mode,word-wrap,word-break,tab-size,hyphens,pointer-events,opacity,color,border,border-width,border-style,border-color,border-top,border-top-width,border-top-style,border-top-color,border-right,border-right-width,border-right-style,border-right-color,border-bottom,border-bottom-width,border-bottom-style,border-bottom-color,border-left,border-left-width,border-left-style,border-left-color,border-radius,border-top-left-radius,border-top-right-radius,border-bottom-right-radius,border-bottom-left-radius,border-image,border-image-source,border-image-slice,border-image-width,border-image-outset,border-image-repeat,outline,outline-width,outline-style,outline-color,outline-offset,background,background-color,background-image,background-repeat,background-attachment,background-position,background-position-x,background-position-y,background-clip,background-origin,background-size,box-decoration-break,box-shadow,text-shadow +ij_css_space_after_colon = true +ij_css_space_before_opening_brace = true +ij_css_use_double_quotes = true +ij_css_value_alignment = 0 + +[*.csv] +max_line_length = 2147483647 +ij_csv_wrap_long_lines = false + +[*.feature] +indent_size = 2 +ij_gherkin_keep_indents_on_empty_lines = false + +[*.haml] +indent_size = 2 +ij_haml_keep_indents_on_empty_lines = false + +[*.less] +indent_size = 2 +ij_less_align_closing_brace_with_properties = false +ij_less_blank_lines_around_nested_selector = 1 +ij_less_blank_lines_between_blocks = 1 +ij_less_brace_placement = 0 +ij_less_enforce_quotes_on_format = false +ij_less_hex_color_long_format = false +ij_less_hex_color_lower_case = false +ij_less_hex_color_short_format = false +ij_less_hex_color_upper_case = false +ij_less_keep_blank_lines_in_code = 2 +ij_less_keep_indents_on_empty_lines = false +ij_less_keep_single_line_blocks = false +ij_less_properties_order = font,font-family,font-size,font-weight,font-style,font-variant,font-size-adjust,font-stretch,line-height,position,z-index,top,right,bottom,left,display,visibility,float,clear,overflow,overflow-x,overflow-y,clip,zoom,align-content,align-items,align-self,flex,flex-flow,flex-basis,flex-direction,flex-grow,flex-shrink,flex-wrap,justify-content,order,box-sizing,width,min-width,max-width,height,min-height,max-height,margin,margin-top,margin-right,margin-bottom,margin-left,padding,padding-top,padding-right,padding-bottom,padding-left,table-layout,empty-cells,caption-side,border-spacing,border-collapse,list-style,list-style-position,list-style-type,list-style-image,content,quotes,counter-reset,counter-increment,resize,cursor,user-select,nav-index,nav-up,nav-right,nav-down,nav-left,transition,transition-delay,transition-timing-function,transition-duration,transition-property,transform,transform-origin,animation,animation-name,animation-duration,animation-play-state,animation-timing-function,animation-delay,animation-iteration-count,animation-direction,text-align,text-align-last,vertical-align,white-space,text-decoration,text-emphasis,text-emphasis-color,text-emphasis-style,text-emphasis-position,text-indent,text-justify,letter-spacing,word-spacing,text-outline,text-transform,text-wrap,text-overflow,text-overflow-ellipsis,text-overflow-mode,word-wrap,word-break,tab-size,hyphens,pointer-events,opacity,color,border,border-width,border-style,border-color,border-top,border-top-width,border-top-style,border-top-color,border-right,border-right-width,border-right-style,border-right-color,border-bottom,border-bottom-width,border-bottom-style,border-bottom-color,border-left,border-left-width,border-left-style,border-left-color,border-radius,border-top-left-radius,border-top-right-radius,border-bottom-right-radius,border-bottom-left-radius,border-image,border-image-source,border-image-slice,border-image-width,border-image-outset,border-image-repeat,outline,outline-width,outline-style,outline-color,outline-offset,background,background-color,background-image,background-repeat,background-attachment,background-position,background-position-x,background-position-y,background-clip,background-origin,background-size,box-decoration-break,box-shadow,text-shadow +ij_less_space_after_colon = true +ij_less_space_before_opening_brace = true +ij_less_use_double_quotes = true +ij_less_value_alignment = 0 + +[*.sass] +indent_size = 2 +ij_sass_align_closing_brace_with_properties = false +ij_sass_blank_lines_around_nested_selector = 1 +ij_sass_blank_lines_between_blocks = 1 +ij_sass_brace_placement = 0 +ij_sass_enforce_quotes_on_format = false +ij_sass_hex_color_long_format = false +ij_sass_hex_color_lower_case = false +ij_sass_hex_color_short_format = false +ij_sass_hex_color_upper_case = false +ij_sass_keep_blank_lines_in_code = 2 +ij_sass_keep_indents_on_empty_lines = false +ij_sass_keep_single_line_blocks = false +ij_sass_properties_order = font,font-family,font-size,font-weight,font-style,font-variant,font-size-adjust,font-stretch,line-height,position,z-index,top,right,bottom,left,display,visibility,float,clear,overflow,overflow-x,overflow-y,clip,zoom,align-content,align-items,align-self,flex,flex-flow,flex-basis,flex-direction,flex-grow,flex-shrink,flex-wrap,justify-content,order,box-sizing,width,min-width,max-width,height,min-height,max-height,margin,margin-top,margin-right,margin-bottom,margin-left,padding,padding-top,padding-right,padding-bottom,padding-left,table-layout,empty-cells,caption-side,border-spacing,border-collapse,list-style,list-style-position,list-style-type,list-style-image,content,quotes,counter-reset,counter-increment,resize,cursor,user-select,nav-index,nav-up,nav-right,nav-down,nav-left,transition,transition-delay,transition-timing-function,transition-duration,transition-property,transform,transform-origin,animation,animation-name,animation-duration,animation-play-state,animation-timing-function,animation-delay,animation-iteration-count,animation-direction,text-align,text-align-last,vertical-align,white-space,text-decoration,text-emphasis,text-emphasis-color,text-emphasis-style,text-emphasis-position,text-indent,text-justify,letter-spacing,word-spacing,text-outline,text-transform,text-wrap,text-overflow,text-overflow-ellipsis,text-overflow-mode,word-wrap,word-break,tab-size,hyphens,pointer-events,opacity,color,border,border-width,border-style,border-color,border-top,border-top-width,border-top-style,border-top-color,border-right,border-right-width,border-right-style,border-right-color,border-bottom,border-bottom-width,border-bottom-style,border-bottom-color,border-left,border-left-width,border-left-style,border-left-color,border-radius,border-top-left-radius,border-top-right-radius,border-bottom-right-radius,border-bottom-left-radius,border-image,border-image-source,border-image-slice,border-image-width,border-image-outset,border-image-repeat,outline,outline-width,outline-style,outline-color,outline-offset,background,background-color,background-image,background-repeat,background-attachment,background-position,background-position-x,background-position-y,background-clip,background-origin,background-size,box-decoration-break,box-shadow,text-shadow +ij_sass_space_after_colon = true +ij_sass_space_before_opening_brace = true +ij_sass_use_double_quotes = true +ij_sass_value_alignment = 0 + +[*.scss] +indent_size = 2 +ij_scss_align_closing_brace_with_properties = false +ij_scss_blank_lines_around_nested_selector = 1 +ij_scss_blank_lines_between_blocks = 1 +ij_scss_brace_placement = 0 +ij_scss_enforce_quotes_on_format = false +ij_scss_hex_color_long_format = false +ij_scss_hex_color_lower_case = false +ij_scss_hex_color_short_format = false +ij_scss_hex_color_upper_case = false +ij_scss_keep_blank_lines_in_code = 2 +ij_scss_keep_indents_on_empty_lines = false +ij_scss_keep_single_line_blocks = false +ij_scss_properties_order = font,font-family,font-size,font-weight,font-style,font-variant,font-size-adjust,font-stretch,line-height,position,z-index,top,right,bottom,left,display,visibility,float,clear,overflow,overflow-x,overflow-y,clip,zoom,align-content,align-items,align-self,flex,flex-flow,flex-basis,flex-direction,flex-grow,flex-shrink,flex-wrap,justify-content,order,box-sizing,width,min-width,max-width,height,min-height,max-height,margin,margin-top,margin-right,margin-bottom,margin-left,padding,padding-top,padding-right,padding-bottom,padding-left,table-layout,empty-cells,caption-side,border-spacing,border-collapse,list-style,list-style-position,list-style-type,list-style-image,content,quotes,counter-reset,counter-increment,resize,cursor,user-select,nav-index,nav-up,nav-right,nav-down,nav-left,transition,transition-delay,transition-timing-function,transition-duration,transition-property,transform,transform-origin,animation,animation-name,animation-duration,animation-play-state,animation-timing-function,animation-delay,animation-iteration-count,animation-direction,text-align,text-align-last,vertical-align,white-space,text-decoration,text-emphasis,text-emphasis-color,text-emphasis-style,text-emphasis-position,text-indent,text-justify,letter-spacing,word-spacing,text-outline,text-transform,text-wrap,text-overflow,text-overflow-ellipsis,text-overflow-mode,word-wrap,word-break,tab-size,hyphens,pointer-events,opacity,color,border,border-width,border-style,border-color,border-top,border-top-width,border-top-style,border-top-color,border-right,border-right-width,border-right-style,border-right-color,border-bottom,border-bottom-width,border-bottom-style,border-bottom-color,border-left,border-left-width,border-left-style,border-left-color,border-radius,border-top-left-radius,border-top-right-radius,border-bottom-right-radius,border-bottom-left-radius,border-image,border-image-source,border-image-slice,border-image-width,border-image-outset,border-image-repeat,outline,outline-width,outline-style,outline-color,outline-offset,background,background-color,background-image,background-repeat,background-attachment,background-position,background-position-x,background-position-y,background-clip,background-origin,background-size,box-decoration-break,box-shadow,text-shadow +ij_scss_space_after_colon = true +ij_scss_space_before_opening_brace = true +ij_scss_use_double_quotes = true +ij_scss_value_alignment = 0 + +[*.twig] +ij_twig_keep_indents_on_empty_lines = false +ij_twig_spaces_inside_delimiters = true +ij_twig_spaces_inside_variable_delimiters = true + +[.editorconfig] +ij_editorconfig_align_group_field_declarations = false +ij_editorconfig_space_after_colon = false +ij_editorconfig_space_after_comma = true +ij_editorconfig_space_before_colon = false +ij_editorconfig_space_before_comma = false +ij_editorconfig_spaces_around_assignment_operators = true + +[{*.ats,*.ts}] +ij_continuation_indent_size = 4 +ij_typescript_align_imports = false +ij_typescript_align_multiline_array_initializer_expression = false +ij_typescript_align_multiline_binary_operation = false +ij_typescript_align_multiline_chained_methods = false +ij_typescript_align_multiline_extends_list = false +ij_typescript_align_multiline_for = true +ij_typescript_align_multiline_parameters = true +ij_typescript_align_multiline_parameters_in_calls = false +ij_typescript_align_multiline_ternary_operation = false +ij_typescript_align_object_properties = 0 +ij_typescript_align_union_types = false +ij_typescript_align_var_statements = 0 +ij_typescript_array_initializer_new_line_after_left_brace = false +ij_typescript_array_initializer_right_brace_on_new_line = false +ij_typescript_array_initializer_wrap = off +ij_typescript_assignment_wrap = off +ij_typescript_binary_operation_sign_on_next_line = false +ij_typescript_binary_operation_wrap = off +ij_typescript_blacklist_imports = rxjs/Rx,node_modules/**/*,@angular/material,@angular/material/typings/** +ij_typescript_blank_lines_after_imports = 1 +ij_typescript_blank_lines_around_class = 1 +ij_typescript_blank_lines_around_field = 0 +ij_typescript_blank_lines_around_field_in_interface = 0 +ij_typescript_blank_lines_around_function = 1 +ij_typescript_blank_lines_around_method = 1 +ij_typescript_blank_lines_around_method_in_interface = 1 +ij_typescript_block_brace_style = end_of_line +ij_typescript_call_parameters_new_line_after_left_paren = false +ij_typescript_call_parameters_right_paren_on_new_line = false +ij_typescript_call_parameters_wrap = off +ij_typescript_catch_on_new_line = false +ij_typescript_chained_call_dot_on_new_line = true +ij_typescript_class_brace_style = end_of_line +ij_typescript_comma_on_new_line = false +ij_typescript_do_while_brace_force = never +ij_typescript_else_on_new_line = false +ij_typescript_enforce_trailing_comma = keep +ij_typescript_extends_keyword_wrap = off +ij_typescript_extends_list_wrap = off +ij_typescript_field_prefix = _ +ij_typescript_file_name_style = relaxed +ij_typescript_finally_on_new_line = false +ij_typescript_for_brace_force = never +ij_typescript_for_statement_new_line_after_left_paren = false +ij_typescript_for_statement_right_paren_on_new_line = false +ij_typescript_for_statement_wrap = off +ij_typescript_force_quote_style = false +ij_typescript_force_semicolon_style = false +ij_typescript_function_expression_brace_style = end_of_line +ij_typescript_if_brace_force = never +ij_typescript_import_merge_members = global +ij_typescript_import_prefer_absolute_path = global +ij_typescript_import_sort_members = true +ij_typescript_import_sort_module_name = false +ij_typescript_import_use_node_resolution = true +ij_typescript_imports_wrap = on_every_item +ij_typescript_indent_case_from_switch = true +ij_typescript_indent_chained_calls = true +ij_typescript_indent_package_children = 0 +ij_typescript_jsdoc_include_types = false +ij_typescript_jsx_attribute_value = braces +ij_typescript_keep_blank_lines_in_code = 2 +ij_typescript_keep_first_column_comment = true +ij_typescript_keep_indents_on_empty_lines = false +ij_typescript_keep_line_breaks = true +ij_typescript_keep_simple_blocks_in_one_line = false +ij_typescript_keep_simple_methods_in_one_line = false +ij_typescript_line_comment_add_space = true +ij_typescript_line_comment_at_first_column = false +ij_typescript_method_brace_style = end_of_line +ij_typescript_method_call_chain_wrap = off +ij_typescript_method_parameters_new_line_after_left_paren = false +ij_typescript_method_parameters_right_paren_on_new_line = false +ij_typescript_method_parameters_wrap = off +ij_typescript_object_literal_wrap = on_every_item +ij_typescript_parentheses_expression_new_line_after_left_paren = false +ij_typescript_parentheses_expression_right_paren_on_new_line = false +ij_typescript_place_assignment_sign_on_next_line = false +ij_typescript_prefer_as_type_cast = false +ij_typescript_prefer_parameters_wrap = false +ij_typescript_reformat_c_style_comments = false +ij_typescript_space_after_colon = true +ij_typescript_space_after_comma = true +ij_typescript_space_after_dots_in_rest_parameter = false +ij_typescript_space_after_generator_mult = true +ij_typescript_space_after_property_colon = true +ij_typescript_space_after_quest = true +ij_typescript_space_after_type_colon = true +ij_typescript_space_after_unary_not = false +ij_typescript_space_before_async_arrow_lparen = true +ij_typescript_space_before_catch_keyword = true +ij_typescript_space_before_catch_left_brace = true +ij_typescript_space_before_catch_parentheses = true +ij_typescript_space_before_class_lbrace = true +ij_typescript_space_before_class_left_brace = true +ij_typescript_space_before_colon = true +ij_typescript_space_before_comma = false +ij_typescript_space_before_do_left_brace = true +ij_typescript_space_before_else_keyword = true +ij_typescript_space_before_else_left_brace = true +ij_typescript_space_before_finally_keyword = true +ij_typescript_space_before_finally_left_brace = true +ij_typescript_space_before_for_left_brace = true +ij_typescript_space_before_for_parentheses = true +ij_typescript_space_before_for_semicolon = false +ij_typescript_space_before_function_left_parenth = true +ij_typescript_space_before_generator_mult = false +ij_typescript_space_before_if_left_brace = true +ij_typescript_space_before_if_parentheses = true +ij_typescript_space_before_method_call_parentheses = false +ij_typescript_space_before_method_left_brace = true +ij_typescript_space_before_method_parentheses = false +ij_typescript_space_before_property_colon = false +ij_typescript_space_before_quest = true +ij_typescript_space_before_switch_left_brace = true +ij_typescript_space_before_switch_parentheses = true +ij_typescript_space_before_try_left_brace = true +ij_typescript_space_before_type_colon = false +ij_typescript_space_before_unary_not = false +ij_typescript_space_before_while_keyword = true +ij_typescript_space_before_while_left_brace = true +ij_typescript_space_before_while_parentheses = true +ij_typescript_spaces_around_additive_operators = true +ij_typescript_spaces_around_arrow_function_operator = true +ij_typescript_spaces_around_assignment_operators = true +ij_typescript_spaces_around_bitwise_operators = true +ij_typescript_spaces_around_equality_operators = true +ij_typescript_spaces_around_logical_operators = true +ij_typescript_spaces_around_multiplicative_operators = true +ij_typescript_spaces_around_relational_operators = true +ij_typescript_spaces_around_shift_operators = true +ij_typescript_spaces_around_unary_operator = false +ij_typescript_spaces_within_array_initializer_brackets = false +ij_typescript_spaces_within_brackets = false +ij_typescript_spaces_within_catch_parentheses = false +ij_typescript_spaces_within_for_parentheses = false +ij_typescript_spaces_within_if_parentheses = false +ij_typescript_spaces_within_imports = false +ij_typescript_spaces_within_interpolation_expressions = false +ij_typescript_spaces_within_method_call_parentheses = false +ij_typescript_spaces_within_method_parentheses = false +ij_typescript_spaces_within_object_literal_braces = false +ij_typescript_spaces_within_object_type_braces = true +ij_typescript_spaces_within_parentheses = false +ij_typescript_spaces_within_switch_parentheses = false +ij_typescript_spaces_within_type_assertion = false +ij_typescript_spaces_within_union_types = true +ij_typescript_spaces_within_while_parentheses = false +ij_typescript_special_else_if_treatment = true +ij_typescript_ternary_operation_signs_on_next_line = false +ij_typescript_ternary_operation_wrap = off +ij_typescript_union_types_wrap = on_every_item +ij_typescript_use_chained_calls_group_indents = false +ij_typescript_use_double_quotes = true +ij_typescript_use_explicit_js_extension = global +ij_typescript_use_path_mapping = always +ij_typescript_use_public_modifier = false +ij_typescript_use_semicolon_after_statement = true +ij_typescript_var_declaration_wrap = normal +ij_typescript_while_brace_force = never +ij_typescript_while_on_new_line = false +ij_typescript_wrap_comments = false + +[{*.bash,*.zsh,*.sh}] +indent_size = 2 +tab_width = 2 +ij_shell_binary_ops_start_line = false +ij_shell_keep_column_alignment_padding = false +ij_shell_minify_program = false +ij_shell_redirect_followed_by_space = false +ij_shell_switch_cases_indented = false + +[{*.cjsx,*.coffee}] +indent_size = 2 +tab_width = 2 +ij_continuation_indent_size = 2 +ij_coffeescript_align_function_body = false +ij_coffeescript_align_imports = false +ij_coffeescript_align_multiline_array_initializer_expression = true +ij_coffeescript_align_multiline_parameters = true +ij_coffeescript_align_multiline_parameters_in_calls = false +ij_coffeescript_align_object_properties = 0 +ij_coffeescript_align_union_types = false +ij_coffeescript_align_var_statements = 0 +ij_coffeescript_array_initializer_new_line_after_left_brace = false +ij_coffeescript_array_initializer_right_brace_on_new_line = false +ij_coffeescript_array_initializer_wrap = normal +ij_coffeescript_blacklist_imports = rxjs/Rx,node_modules/**/*,@angular/material,@angular/material/typings/** +ij_coffeescript_blank_lines_around_function = 1 +ij_coffeescript_call_parameters_new_line_after_left_paren = false +ij_coffeescript_call_parameters_right_paren_on_new_line = false +ij_coffeescript_call_parameters_wrap = normal +ij_coffeescript_chained_call_dot_on_new_line = true +ij_coffeescript_comma_on_new_line = false +ij_coffeescript_enforce_trailing_comma = keep +ij_coffeescript_field_prefix = _ +ij_coffeescript_file_name_style = relaxed +ij_coffeescript_force_quote_style = false +ij_coffeescript_force_semicolon_style = false +ij_coffeescript_function_expression_brace_style = end_of_line +ij_coffeescript_import_merge_members = global +ij_coffeescript_import_prefer_absolute_path = global +ij_coffeescript_import_sort_members = true +ij_coffeescript_import_sort_module_name = false +ij_coffeescript_import_use_node_resolution = true +ij_coffeescript_imports_wrap = on_every_item +ij_coffeescript_indent_chained_calls = true +ij_coffeescript_indent_package_children = 0 +ij_coffeescript_jsx_attribute_value = braces +ij_coffeescript_keep_blank_lines_in_code = 2 +ij_coffeescript_keep_first_column_comment = true +ij_coffeescript_keep_indents_on_empty_lines = false +ij_coffeescript_keep_line_breaks = true +ij_coffeescript_keep_simple_methods_in_one_line = false +ij_coffeescript_method_parameters_new_line_after_left_paren = false +ij_coffeescript_method_parameters_right_paren_on_new_line = false +ij_coffeescript_method_parameters_wrap = off +ij_coffeescript_object_literal_wrap = on_every_item +ij_coffeescript_prefer_as_type_cast = false +ij_coffeescript_reformat_c_style_comments = false +ij_coffeescript_space_after_comma = true +ij_coffeescript_space_after_dots_in_rest_parameter = false +ij_coffeescript_space_after_generator_mult = true +ij_coffeescript_space_after_property_colon = true +ij_coffeescript_space_after_type_colon = true +ij_coffeescript_space_after_unary_not = false +ij_coffeescript_space_before_async_arrow_lparen = true +ij_coffeescript_space_before_class_lbrace = true +ij_coffeescript_space_before_comma = false +ij_coffeescript_space_before_function_left_parenth = true +ij_coffeescript_space_before_generator_mult = false +ij_coffeescript_space_before_property_colon = false +ij_coffeescript_space_before_type_colon = false +ij_coffeescript_space_before_unary_not = false +ij_coffeescript_spaces_around_additive_operators = true +ij_coffeescript_spaces_around_arrow_function_operator = true +ij_coffeescript_spaces_around_assignment_operators = true +ij_coffeescript_spaces_around_bitwise_operators = true +ij_coffeescript_spaces_around_equality_operators = true +ij_coffeescript_spaces_around_logical_operators = true +ij_coffeescript_spaces_around_multiplicative_operators = true +ij_coffeescript_spaces_around_relational_operators = true +ij_coffeescript_spaces_around_shift_operators = true +ij_coffeescript_spaces_around_unary_operator = false +ij_coffeescript_spaces_within_array_initializer_braces = false +ij_coffeescript_spaces_within_array_initializer_brackets = false +ij_coffeescript_spaces_within_imports = false +ij_coffeescript_spaces_within_index_brackets = false +ij_coffeescript_spaces_within_interpolation_expressions = false +ij_coffeescript_spaces_within_method_call_parentheses = false +ij_coffeescript_spaces_within_method_parentheses = false +ij_coffeescript_spaces_within_object_braces = false +ij_coffeescript_spaces_within_object_literal_braces = false +ij_coffeescript_spaces_within_object_type_braces = true +ij_coffeescript_spaces_within_range_brackets = false +ij_coffeescript_spaces_within_type_assertion = false +ij_coffeescript_spaces_within_union_types = true +ij_coffeescript_union_types_wrap = on_every_item +ij_coffeescript_use_chained_calls_group_indents = false +ij_coffeescript_use_double_quotes = true +ij_coffeescript_use_explicit_js_extension = global +ij_coffeescript_use_path_mapping = always +ij_coffeescript_use_public_modifier = false +ij_coffeescript_use_semicolon_after_statement = false +ij_coffeescript_var_declaration_wrap = normal + +[{*.ctp,*.hphp,*.phtml,*.php_cs,*.module,*.php,*.php5,*.php4,*.inc}] +ij_continuation_indent_size = 4 +ij_php_align_assignments = false +ij_php_align_class_constants = false +ij_php_align_group_field_declarations = false +ij_php_align_inline_comments = false +ij_php_align_key_value_pairs = false +ij_php_align_multiline_array_initializer_expression = false +ij_php_align_multiline_binary_operation = false +ij_php_align_multiline_chained_methods = false +ij_php_align_multiline_extends_list = false +ij_php_align_multiline_for = true +ij_php_align_multiline_parameters = true +ij_php_align_multiline_parameters_in_calls = false +ij_php_align_multiline_ternary_operation = false +ij_php_align_phpdoc_comments = true +ij_php_align_phpdoc_param_names = true +ij_php_anonymous_brace_style = end_of_line +ij_php_api_weight = 28 +ij_php_array_initializer_new_line_after_left_brace = false +ij_php_array_initializer_right_brace_on_new_line = false +ij_php_array_initializer_wrap = off +ij_php_assignment_wrap = off +ij_php_author_weight = 28 +ij_php_binary_operation_sign_on_next_line = false +ij_php_binary_operation_wrap = off +ij_php_blank_lines_after_class_header = 0 +ij_php_blank_lines_after_function = 1 +ij_php_blank_lines_after_imports = 1 +ij_php_blank_lines_after_opening_tag = 0 +ij_php_blank_lines_after_package = 0 +ij_php_blank_lines_around_class = 1 +ij_php_blank_lines_around_constants = 0 +ij_php_blank_lines_around_field = 0 +ij_php_blank_lines_around_method = 1 +ij_php_blank_lines_before_class_end = 0 +ij_php_blank_lines_before_imports = 1 +ij_php_blank_lines_before_method_body = 0 +ij_php_blank_lines_before_package = 1 +ij_php_blank_lines_before_return_statement = 0 +ij_php_blank_lines_between_imports = 0 +ij_php_block_brace_style = end_of_line +ij_php_call_parameters_new_line_after_left_paren = false +ij_php_call_parameters_right_paren_on_new_line = false +ij_php_call_parameters_wrap = off +ij_php_catch_on_new_line = false +ij_php_category_weight = 28 +ij_php_class_brace_style = next_line +ij_php_comma_after_last_array_element = true +ij_php_concat_spaces = true +ij_php_copyright_weight = 28 +ij_php_deprecated_weight = 28 +ij_php_do_while_brace_force = never +ij_php_else_if_style = as_is +ij_php_else_on_new_line = false +ij_php_example_weight = 28 +ij_php_extends_keyword_wrap = off +ij_php_extends_list_wrap = off +ij_php_fields_default_visibility = private +ij_php_filesource_weight = 28 +ij_php_finally_on_new_line = false +ij_php_for_brace_force = never +ij_php_for_statement_new_line_after_left_paren = false +ij_php_for_statement_right_paren_on_new_line = false +ij_php_for_statement_wrap = off +ij_php_force_short_declaration_array_style = true +ij_php_global_weight = 28 +ij_php_group_use_wrap = on_every_item +ij_php_if_brace_force = never +ij_php_if_lparen_on_next_line = false +ij_php_if_rparen_on_next_line = false +ij_php_ignore_weight = 28 +ij_php_import_sorting = alphabetic +ij_php_indent_break_from_case = true +ij_php_indent_case_from_switch = true +ij_php_indent_code_in_php_tags = false +ij_php_internal_weight = 28 +ij_php_keep_blank_lines_after_lbrace = 2 +ij_php_keep_blank_lines_before_right_brace = 2 +ij_php_keep_blank_lines_in_code = 2 +ij_php_keep_blank_lines_in_declarations = 2 +ij_php_keep_control_statement_in_one_line = true +ij_php_keep_first_column_comment = true +ij_php_keep_indents_on_empty_lines = false +ij_php_keep_line_breaks = true +ij_php_keep_rparen_and_lbrace_on_one_line = false +ij_php_keep_simple_methods_in_one_line = false +ij_php_lambda_brace_style = end_of_line +ij_php_license_weight = 28 +ij_php_line_comment_add_space = false +ij_php_line_comment_at_first_column = true +ij_php_link_weight = 28 +ij_php_lower_case_boolean_const = true +ij_php_lower_case_null_const = true +ij_php_method_brace_style = next_line +ij_php_method_call_chain_wrap = off +ij_php_method_parameters_new_line_after_left_paren = false +ij_php_method_parameters_right_paren_on_new_line = false +ij_php_method_parameters_wrap = off +ij_php_method_weight = 28 +ij_php_modifier_list_wrap = false +ij_php_multiline_chained_calls_semicolon_on_new_line = false +ij_php_namespace_brace_style = 1 +ij_php_null_type_position = in_the_beginning +ij_php_package_weight = 28 +ij_php_param_weight = 0 +ij_php_parentheses_expression_new_line_after_left_paren = false +ij_php_parentheses_expression_right_paren_on_new_line = false +ij_php_phpdoc_blank_line_before_tags = false +ij_php_phpdoc_blank_lines_around_parameters = true +ij_php_phpdoc_keep_blank_lines = true +ij_php_phpdoc_param_spaces_between_name_and_description = 1 +ij_php_phpdoc_param_spaces_between_tag_and_type = 1 +ij_php_phpdoc_param_spaces_between_type_and_name = 1 +ij_php_phpdoc_use_fqcn = true +ij_php_phpdoc_wrap_long_lines = true +ij_php_place_assignment_sign_on_next_line = false +ij_php_place_parens_for_constructor = 0 +ij_php_property_read_weight = 28 +ij_php_property_weight = 28 +ij_php_property_write_weight = 28 +ij_php_return_type_on_new_line = false +ij_php_return_weight = 1 +ij_php_see_weight = 28 +ij_php_since_weight = 28 +ij_php_sort_phpdoc_elements = true +ij_php_space_after_colon = true +ij_php_space_after_colon_in_return_type = true +ij_php_space_after_comma = true +ij_php_space_after_for_semicolon = true +ij_php_space_after_quest = true +ij_php_space_after_type_cast = false +ij_php_space_after_unary_not = false +ij_php_space_before_array_initializer_left_brace = false +ij_php_space_before_catch_keyword = true +ij_php_space_before_catch_left_brace = true +ij_php_space_before_catch_parentheses = true +ij_php_space_before_class_left_brace = true +ij_php_space_before_closure_left_parenthesis = true +ij_php_space_before_colon = true +ij_php_space_before_colon_in_return_type = false +ij_php_space_before_comma = false +ij_php_space_before_do_left_brace = true +ij_php_space_before_else_keyword = true +ij_php_space_before_else_left_brace = true +ij_php_space_before_finally_keyword = true +ij_php_space_before_finally_left_brace = true +ij_php_space_before_for_left_brace = true +ij_php_space_before_for_parentheses = true +ij_php_space_before_for_semicolon = false +ij_php_space_before_if_left_brace = true +ij_php_space_before_if_parentheses = true +ij_php_space_before_method_call_parentheses = false +ij_php_space_before_method_left_brace = true +ij_php_space_before_method_parentheses = false +ij_php_space_before_quest = true +ij_php_space_before_switch_left_brace = true +ij_php_space_before_switch_parentheses = true +ij_php_space_before_try_left_brace = true +ij_php_space_before_unary_not = false +ij_php_space_before_while_keyword = true +ij_php_space_before_while_left_brace = true +ij_php_space_before_while_parentheses = true +ij_php_space_between_ternary_quest_and_colon = false +ij_php_spaces_around_additive_operators = true +ij_php_spaces_around_arrow = false +ij_php_spaces_around_assignment_in_declare = false +ij_php_spaces_around_assignment_operators = true +ij_php_spaces_around_bitwise_operators = true +ij_php_spaces_around_equality_operators = true +ij_php_spaces_around_logical_operators = true +ij_php_spaces_around_multiplicative_operators = true +ij_php_spaces_around_null_coalesce_operator = true +ij_php_spaces_around_relational_operators = true +ij_php_spaces_around_shift_operators = true +ij_php_spaces_around_unary_operator = false +ij_php_spaces_around_var_within_brackets = false +ij_php_spaces_within_array_initializer_braces = false +ij_php_spaces_within_brackets = false +ij_php_spaces_within_catch_parentheses = false +ij_php_spaces_within_for_parentheses = false +ij_php_spaces_within_if_parentheses = false +ij_php_spaces_within_method_call_parentheses = false +ij_php_spaces_within_method_parentheses = false +ij_php_spaces_within_parentheses = false +ij_php_spaces_within_short_echo_tags = true +ij_php_spaces_within_switch_parentheses = false +ij_php_spaces_within_while_parentheses = false +ij_php_special_else_if_treatment = false +ij_php_subpackage_weight = 28 +ij_php_ternary_operation_signs_on_next_line = false +ij_php_ternary_operation_wrap = off +ij_php_throws_weight = 2 +ij_php_todo_weight = 28 +ij_php_unknown_tag_weight = 28 +ij_php_upper_case_boolean_const = false +ij_php_upper_case_null_const = false +ij_php_uses_weight = 28 +ij_php_var_weight = 28 +ij_php_variable_naming_style = mixed +ij_php_version_weight = 28 +ij_php_while_brace_force = never +ij_php_while_on_new_line = false + +[{*.js,*.cjs}] +ij_continuation_indent_size = 4 +ij_javascript_align_imports = false +ij_javascript_align_multiline_array_initializer_expression = false +ij_javascript_align_multiline_binary_operation = false +ij_javascript_align_multiline_chained_methods = false +ij_javascript_align_multiline_extends_list = false +ij_javascript_align_multiline_for = true +ij_javascript_align_multiline_parameters = true +ij_javascript_align_multiline_parameters_in_calls = false +ij_javascript_align_multiline_ternary_operation = false +ij_javascript_align_object_properties = 0 +ij_javascript_align_union_types = false +ij_javascript_align_var_statements = 0 +ij_javascript_array_initializer_new_line_after_left_brace = false +ij_javascript_array_initializer_right_brace_on_new_line = false +ij_javascript_array_initializer_wrap = off +ij_javascript_assignment_wrap = off +ij_javascript_binary_operation_sign_on_next_line = false +ij_javascript_binary_operation_wrap = off +ij_javascript_blacklist_imports = rxjs/Rx,node_modules/**/*,@angular/material,@angular/material/typings/** +ij_javascript_blank_lines_after_imports = 1 +ij_javascript_blank_lines_around_class = 1 +ij_javascript_blank_lines_around_field = 0 +ij_javascript_blank_lines_around_function = 1 +ij_javascript_blank_lines_around_method = 1 +ij_javascript_block_brace_style = end_of_line +ij_javascript_call_parameters_new_line_after_left_paren = false +ij_javascript_call_parameters_right_paren_on_new_line = false +ij_javascript_call_parameters_wrap = off +ij_javascript_catch_on_new_line = false +ij_javascript_chained_call_dot_on_new_line = true +ij_javascript_class_brace_style = end_of_line +ij_javascript_comma_on_new_line = false +ij_javascript_do_while_brace_force = never +ij_javascript_else_on_new_line = false +ij_javascript_enforce_trailing_comma = keep +ij_javascript_extends_keyword_wrap = off +ij_javascript_extends_list_wrap = off +ij_javascript_field_prefix = _ +ij_javascript_file_name_style = relaxed +ij_javascript_finally_on_new_line = false +ij_javascript_for_brace_force = never +ij_javascript_for_statement_new_line_after_left_paren = false +ij_javascript_for_statement_right_paren_on_new_line = false +ij_javascript_for_statement_wrap = off +ij_javascript_force_quote_style = false +ij_javascript_force_semicolon_style = false +ij_javascript_function_expression_brace_style = end_of_line +ij_javascript_if_brace_force = never +ij_javascript_import_merge_members = global +ij_javascript_import_prefer_absolute_path = global +ij_javascript_import_sort_members = true +ij_javascript_import_sort_module_name = false +ij_javascript_import_use_node_resolution = true +ij_javascript_imports_wrap = on_every_item +ij_javascript_indent_case_from_switch = true +ij_javascript_indent_chained_calls = true +ij_javascript_indent_package_children = 0 +ij_javascript_jsx_attribute_value = braces +ij_javascript_keep_blank_lines_in_code = 2 +ij_javascript_keep_first_column_comment = true +ij_javascript_keep_indents_on_empty_lines = false +ij_javascript_keep_line_breaks = true +ij_javascript_keep_simple_blocks_in_one_line = false +ij_javascript_keep_simple_methods_in_one_line = false +ij_javascript_line_comment_add_space = true +ij_javascript_line_comment_at_first_column = false +ij_javascript_method_brace_style = end_of_line +ij_javascript_method_call_chain_wrap = off +ij_javascript_method_parameters_new_line_after_left_paren = false +ij_javascript_method_parameters_right_paren_on_new_line = false +ij_javascript_method_parameters_wrap = off +ij_javascript_object_literal_wrap = on_every_item +ij_javascript_parentheses_expression_new_line_after_left_paren = false +ij_javascript_parentheses_expression_right_paren_on_new_line = false +ij_javascript_place_assignment_sign_on_next_line = false +ij_javascript_prefer_as_type_cast = false +ij_javascript_prefer_parameters_wrap = false +ij_javascript_reformat_c_style_comments = false +ij_javascript_space_after_colon = true +ij_javascript_space_after_comma = true +ij_javascript_space_after_dots_in_rest_parameter = false +ij_javascript_space_after_generator_mult = true +ij_javascript_space_after_property_colon = true +ij_javascript_space_after_quest = true +ij_javascript_space_after_type_colon = true +ij_javascript_space_after_unary_not = false +ij_javascript_space_before_async_arrow_lparen = true +ij_javascript_space_before_catch_keyword = true +ij_javascript_space_before_catch_left_brace = true +ij_javascript_space_before_catch_parentheses = true +ij_javascript_space_before_class_lbrace = true +ij_javascript_space_before_class_left_brace = true +ij_javascript_space_before_colon = true +ij_javascript_space_before_comma = false +ij_javascript_space_before_do_left_brace = true +ij_javascript_space_before_else_keyword = true +ij_javascript_space_before_else_left_brace = true +ij_javascript_space_before_finally_keyword = true +ij_javascript_space_before_finally_left_brace = true +ij_javascript_space_before_for_left_brace = true +ij_javascript_space_before_for_parentheses = true +ij_javascript_space_before_for_semicolon = false +ij_javascript_space_before_function_left_parenth = true +ij_javascript_space_before_generator_mult = false +ij_javascript_space_before_if_left_brace = true +ij_javascript_space_before_if_parentheses = true +ij_javascript_space_before_method_call_parentheses = false +ij_javascript_space_before_method_left_brace = true +ij_javascript_space_before_method_parentheses = false +ij_javascript_space_before_property_colon = false +ij_javascript_space_before_quest = true +ij_javascript_space_before_switch_left_brace = true +ij_javascript_space_before_switch_parentheses = true +ij_javascript_space_before_try_left_brace = true +ij_javascript_space_before_type_colon = false +ij_javascript_space_before_unary_not = false +ij_javascript_space_before_while_keyword = true +ij_javascript_space_before_while_left_brace = true +ij_javascript_space_before_while_parentheses = true +ij_javascript_spaces_around_additive_operators = true +ij_javascript_spaces_around_arrow_function_operator = true +ij_javascript_spaces_around_assignment_operators = true +ij_javascript_spaces_around_bitwise_operators = true +ij_javascript_spaces_around_equality_operators = true +ij_javascript_spaces_around_logical_operators = true +ij_javascript_spaces_around_multiplicative_operators = true +ij_javascript_spaces_around_relational_operators = true +ij_javascript_spaces_around_shift_operators = true +ij_javascript_spaces_around_unary_operator = false +ij_javascript_spaces_within_array_initializer_brackets = false +ij_javascript_spaces_within_brackets = false +ij_javascript_spaces_within_catch_parentheses = false +ij_javascript_spaces_within_for_parentheses = false +ij_javascript_spaces_within_if_parentheses = false +ij_javascript_spaces_within_imports = false +ij_javascript_spaces_within_interpolation_expressions = false +ij_javascript_spaces_within_method_call_parentheses = false +ij_javascript_spaces_within_method_parentheses = false +ij_javascript_spaces_within_object_literal_braces = false +ij_javascript_spaces_within_object_type_braces = true +ij_javascript_spaces_within_parentheses = false +ij_javascript_spaces_within_switch_parentheses = false +ij_javascript_spaces_within_type_assertion = false +ij_javascript_spaces_within_union_types = true +ij_javascript_spaces_within_while_parentheses = false +ij_javascript_special_else_if_treatment = true +ij_javascript_ternary_operation_signs_on_next_line = false +ij_javascript_ternary_operation_wrap = off +ij_javascript_union_types_wrap = on_every_item +ij_javascript_use_chained_calls_group_indents = false +ij_javascript_use_double_quotes = true +ij_javascript_use_explicit_js_extension = global +ij_javascript_use_path_mapping = always +ij_javascript_use_public_modifier = false +ij_javascript_use_semicolon_after_statement = true +ij_javascript_var_declaration_wrap = normal +ij_javascript_while_brace_force = never +ij_javascript_while_on_new_line = false +ij_javascript_wrap_comments = false + +[{*.ng,*.sht,*.html,*.shtm,*.htm,*.shtml}] +ij_html_add_new_line_before_tags = body,div,p,form,h1,h2,h3 +ij_html_align_attributes = true +ij_html_align_text = false +ij_html_attribute_wrap = normal +ij_html_block_comment_at_first_column = true +ij_html_do_not_align_children_of_min_lines = 0 +ij_html_do_not_break_if_inline_tags = title,h1,h2,h3,h4,h5,h6,p +ij_html_do_not_indent_children_of_tags = html,body,thead,tbody,tfoot +ij_html_enforce_quotes = false +ij_html_inline_tags = a,abbr,acronym,b,basefont,bdo,big,br,cite,cite,code,dfn,em,font,i,img,input,kbd,label,q,s,samp,select,small,span,strike,strong,sub,sup,textarea,tt,u,var +ij_html_keep_blank_lines = 2 +ij_html_keep_indents_on_empty_lines = false +ij_html_keep_line_breaks = true +ij_html_keep_line_breaks_in_text = true +ij_html_keep_whitespaces = false +ij_html_keep_whitespaces_inside = span,pre,textarea +ij_html_line_comment_at_first_column = true +ij_html_new_line_after_last_attribute = never +ij_html_new_line_before_first_attribute = never +ij_html_quote_style = double +ij_html_remove_new_line_before_tags = br +ij_html_space_after_tag_name = false +ij_html_space_around_equality_in_attribute = false +ij_html_space_inside_empty_tag = false +ij_html_text_wrap = normal + +[{*.yml,*.yaml}] +indent_size = 2 +ij_yaml_keep_indents_on_empty_lines = false +ij_yaml_keep_line_breaks = true + +[{.eslintrc,composer.lock,.babelrc,.stylelintrc,jest.config,bowerrc,*.json,*.jsb3,*.jsb2}] +indent_size = 2 +ij_json_keep_blank_lines_in_code = 0 +ij_json_keep_indents_on_empty_lines = false +ij_json_keep_line_breaks = true +ij_json_space_after_colon = true +ij_json_space_after_comma = true +ij_json_space_before_colon = true +ij_json_space_before_comma = false +ij_json_spaces_within_braces = false +ij_json_spaces_within_brackets = false +ij_json_wrap_long_lines = false + +[{phpunit.xml.dist,*.xslt,*.xul,*.rng,*.xsl,*.xsd,*.ant,*.jhm,*.tld,*.fxml,*.wsdl,*.jrxml,*.xml,*.jnlp,*.s3db}] +ij_xml_block_comment_at_first_column = true +ij_xml_keep_indents_on_empty_lines = false +ij_xml_line_comment_at_first_column = true diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..7da0cfd --- /dev/null +++ b/.gitattributes @@ -0,0 +1,6 @@ +/.editorconfig export-ignore +/.gitignore export-ignore +/.gitattributes export-ignore +/phpcs.php_cs export-ignore +/phpstan.neon export-ignore +/tests export-ignore diff --git a/.travis.yml b/.travis.yml index dee779b..5470dc5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,6 +9,8 @@ php: before_script: - composer install --no-interaction --prefer-source + - if [ "$(phpenv version-name)" == 7.3 ]; then travis_retry composer require phpstan/phpstan-shim; fi script: - vendor/bin/phpunit --coverage-text --verbose + - if [ "$(phpenv version-name)" == 7.3 ]; then php vendor/bin/phpstan analyse; fi diff --git a/URLify.php b/URLify.php index 90835e8..e1358cf 100644 --- a/URLify.php +++ b/URLify.php @@ -73,6 +73,14 @@ public static function add_chars(array $map, string $language = null) } } + /** + * @return void + */ + public static function reset_chars() + { + self::$maps = []; + } + /** * Transliterates characters to their ASCII equivalents. * $language specifies a priority for a specific language. @@ -89,7 +97,6 @@ public static function downcode( string $language = 'en', string $unknown = '' ): string { - $string = self::expandString($string, $language); foreach (self::$maps as $mapsInner) { diff --git a/phpcs.php_cs b/phpcs.php_cs new file mode 100644 index 0000000..640d7ac --- /dev/null +++ b/phpcs.php_cs @@ -0,0 +1,238 @@ +setUsingCache(false) + ->setRiskyAllowed(true) + ->setRules( + [ + 'align_multiline_comment' => [ + 'comment_type' => 'all_multiline', + ], + 'array_indentation' => true, + 'array_syntax' => [ + 'syntax' => 'short', + ], + 'backtick_to_shell_exec' => true, + 'binary_operator_spaces' => [ + 'operators' => ['=>' => 'align_single_space_minimal'], + ], + 'blank_line_after_namespace' => true, + 'blank_line_after_opening_tag' => false, + 'blank_line_before_statement' => true, + 'braces' => true, + 'cast_spaces' => [ + 'space' => 'single', + ], + 'class_attributes_separation' => true, + 'class_keyword_remove' => false, + 'combine_consecutive_issets' => true, + 'combine_consecutive_unsets' => true, + 'combine_nested_dirname' => true, + // 'compact_nullable_typehint' => true, // PHP >= 7.1 + 'concat_space' => [ + 'spacing' => 'one', + ], + 'date_time_immutable' => false, + 'declare_equal_normalize' => true, + // 'declare_strict_types' => true, + 'dir_constant' => true, + 'elseif' => true, + 'encoding' => true, + 'ereg_to_preg' => true, + 'error_suppression' => false, + 'escape_implicit_backslashes' => false, + 'explicit_indirect_variable' => true, + 'explicit_string_variable' => true, + 'final_internal_class' => true, + 'fopen_flag_order' => true, + 'fopen_flags' => true, + 'full_opening_tag' => true, + 'fully_qualified_strict_types' => false, // maybe better for readability, so keep it ... + 'function_declaration' => true, + 'function_to_constant' => true, + 'function_typehint_space' => true, + 'general_phpdoc_annotation_remove' => [ + 'annotations' => [ + 'author', + 'package', + 'version', + ], + ], + 'heredoc_to_nowdoc' => false, + 'implode_call' => false, + 'include' => true, + 'increment_style' => false, // maybe better for readability, so keep it ... + 'indentation_type' => true, + 'line_ending' => true, + 'linebreak_after_opening_tag' => false, + /* // Requires PHP >= 7.1 + 'list_syntax' => [ + 'syntax' => 'short', + ], + */ + 'logical_operators' => true, + 'lowercase_cast' => true, + 'lowercase_constants' => true, + 'lowercase_keywords' => true, + 'lowercase_static_reference' => true, + 'magic_constant_casing' => true, + 'magic_method_casing' => true, + 'method_argument_space' => [ + 'ensure_fully_multiline' => true, + 'keep_multiple_spaces_after_comma' => false, + ], + 'method_chaining_indentation' => false, // maybe better for readability, so keep it ... + 'modernize_types_casting' => true, + 'multiline_comment_opening_closing' => false, // maybe better for readability, so keep it ... + 'multiline_whitespace_before_semicolons' => [ + 'strategy' => 'no_multi_line', + ], + 'native_constant_invocation' => true, + 'native_function_casing' => true, + 'native_function_invocation' => true, + 'new_with_braces' => true, + 'no_alias_functions' => true, + 'no_alternative_syntax' => true, + 'no_binary_string' => true, + 'no_blank_lines_after_class_opening' => false, + 'no_blank_lines_after_phpdoc' => true, + 'no_blank_lines_before_namespace' => false, + 'no_break_comment' => true, + 'no_closing_tag' => true, + 'no_empty_comment' => true, + 'no_empty_phpdoc' => true, + 'no_empty_statement' => true, + 'no_extra_blank_lines' => true, + 'no_homoglyph_names' => true, + 'no_leading_import_slash' => true, + 'no_leading_namespace_whitespace' => true, + 'no_mixed_echo_print' => [ + 'use' => 'echo', + ], + 'no_multiline_whitespace_around_double_arrow' => true, + 'no_null_property_initialization' => true, + 'no_php4_constructor' => true, + 'no_short_bool_cast' => true, + 'no_short_echo_tag' => true, + 'no_singleline_whitespace_before_semicolons' => true, + 'no_spaces_after_function_name' => true, + 'no_spaces_around_offset' => true, + 'no_spaces_inside_parenthesis' => true, + 'no_superfluous_elseif' => false, // maybe better for readability, so keep it ... + 'no_superfluous_phpdoc_tags' => false, // maybe add extra description, so keep it ... + 'no_trailing_comma_in_list_call' => true, + 'no_trailing_comma_in_singleline_array' => true, + 'no_trailing_whitespace' => true, + 'no_trailing_whitespace_in_comment' => true, + 'no_unneeded_control_parentheses' => true, + 'no_unneeded_curly_braces' => true, + 'no_unneeded_final_method' => true, + 'no_unreachable_default_argument_value' => false, // do not changes the logic of the code ... + 'no_unset_on_property' => true, + 'no_unused_imports' => true, + 'no_useless_else' => true, + 'no_useless_return' => true, + 'no_whitespace_before_comma_in_array' => true, + 'no_whitespace_in_blank_line' => true, + 'non_printable_character' => true, + 'normalize_index_brace' => true, + 'not_operator_with_space' => false, + 'not_operator_with_successor_space' => false, + 'object_operator_without_whitespace' => true, + 'ordered_class_elements' => false, // maybe better for readability, so keep it ... + 'ordered_imports' => true, + 'phpdoc_add_missing_param_annotation' => [ + 'only_untyped' => true, + ], + 'phpdoc_align' => false, // maybe better for readability for very long names, so keep it ... + 'phpdoc_annotation_without_dot' => true, + 'phpdoc_indent' => true, + 'phpdoc_inline_tag' => true, + 'phpdoc_no_access' => true, + 'phpdoc_no_alias_tag' => true, + 'phpdoc_no_empty_return' => false, // maybe better for readability, so keep it ... + 'phpdoc_no_package' => true, + 'phpdoc_no_useless_inheritdoc' => true, + 'phpdoc_order' => true, + 'phpdoc_return_self_reference' => true, + 'phpdoc_scalar' => true, + 'phpdoc_separation' => true, + 'phpdoc_single_line_var_spacing' => true, + 'phpdoc_summary' => false, + 'phpdoc_to_comment' => false, + 'phpdoc_to_return_type' => false, + 'phpdoc_trim' => true, + 'phpdoc_trim_consecutive_blank_line_separation' => true, + 'phpdoc_types' => true, + 'phpdoc_types_order' => [ + 'null_adjustment' => 'always_last', + 'sort_algorithm' => 'alpha', + ], + 'phpdoc_var_without_name' => true, + 'php_unit_construct' => true, + 'php_unit_dedicate_assert' => true, + 'php_unit_expectation' => true, + 'php_unit_fqcn_annotation' => true, + 'php_unit_internal_class' => true, + 'php_unit_method_casing' => true, + 'php_unit_mock' => true, + 'php_unit_namespaced' => true, + 'php_unit_no_expectation_annotation' => true, + 'php_unit_ordered_covers' => true, + 'php_unit_set_up_tear_down_visibility' => true, + 'php_unit_strict' => true, + 'php_unit_test_annotation' => true, + 'php_unit_test_case_static_method_calls' => true, + 'php_unit_test_class_requires_covers' => false, + 'pow_to_exponentiation' => true, + 'pre_increment' => false, + 'protected_to_private' => true, + 'return_assignment' => true, + 'return_type_declaration' => true, + 'self_accessor' => true, + 'semicolon_after_instruction' => true, + 'set_type_to_cast' => true, + 'short_scalar_cast' => true, + 'silenced_deprecation_error' => false, + 'simplified_null_return' => false, // maybe better for readability, so keep it ... + 'single_blank_line_at_eof' => true, + 'single_class_element_per_statement' => true, + 'single_import_per_statement' => true, + 'single_line_after_imports' => true, + 'single_line_comment_style' => [ + 'comment_types' => ['hash'], + ], + 'single_quote' => true, + 'space_after_semicolon' => true, + 'standardize_increment' => false, // maybe better for readability, so keep it ... + 'standardize_not_equals' => true, + 'static_lambda' => true, + 'strict_comparison' => true, + 'strict_param' => true, + 'string_line_ending' => true, + 'switch_case_semicolon_to_colon' => true, + 'switch_case_space' => true, + 'ternary_operator_spaces' => true, + 'ternary_to_null_coalescing' => true, + 'trailing_comma_in_multiline_array' => true, + 'trim_array_spaces' => true, + 'unary_operator_spaces' => true, + 'visibility_required' => true, + // 'void_return' => true, // PHP >= 7.1 + 'whitespace_after_comma_in_array' => true, + 'yoda_style' => [ + 'equal' => false, + 'identical' => false, + 'less_and_greater' => false, + ], + ] + ) + ->setIndent(" ") + ->setLineEnding("\n") + ->setFinder( + PhpCsFixer\Finder::create() + ->in(['.']) + ->name('*.php') + ->ignoreDotFiles(true) + ->ignoreVCS(true) + ); diff --git a/phpstan.neon b/phpstan.neon new file mode 100644 index 0000000..2f44329 --- /dev/null +++ b/phpstan.neon @@ -0,0 +1,8 @@ +parameters: + level: 7 + paths: + - %currentWorkingDirectory%/ + excludes_analyse: + - %currentWorkingDirectory%/scripts/ + - %currentWorkingDirectory%/vendor/ + - %currentWorkingDirectory%/tests/ diff --git a/scripts/downcode.php b/scripts/downcode.php index f2a7fda..22da6ae 100644 --- a/scripts/downcode.php +++ b/scripts/downcode.php @@ -1,20 +1,23 @@ 2) { - die ("Usage (argument): php " . basename(__FILE__) . " \"\"\nUsage (pipe): | php " . basename(__FILE__) . "\n"); +require_once \dirname(__DIR__) . '/URLify.php'; + +// Print usage and exit if arguments are invalid +if ($argc < 1 || $argc > 2) { + die('Usage (argument): php ' . \basename(__FILE__) . " \"\"\nUsage (pipe): | php " . \basename(__FILE__) . "\n"); } -//Process the provided argument -if($argc === 2) { - $s = $argv[1]; -//Or read from stdin if the argument wasn't present +// Process the provided argument +if ($argc === 2) { + $s = $argv[1]; +// Or read from stdin if the argument wasn't present } else { - $piped = true; - $s = file_get_contents("php://stdin"); + $piped = true; + $s = \file_get_contents('php://stdin'); } -echo URLify::downcode ($s) . ($piped ? "\n" : ""); +echo URLify::downcode($s) . ($piped ? "\n" : ''); diff --git a/scripts/filter.php b/scripts/filter.php index aec6a0a..034bce3 100644 --- a/scripts/filter.php +++ b/scripts/filter.php @@ -1,20 +1,23 @@ 2) { - die ("Usage (argument): php " . basename(__FILE__) . " \"\"\nUsage (pipe): | php " . basename(__FILE__) . "\n"); +require_once \dirname(__DIR__) . '/URLify.php'; + +// Print usage and exit if arguments are invalid +if ($argc < 1 || $argc > 2) { + die('Usage (argument): php ' . \basename(__FILE__) . " \"\"\nUsage (pipe): | php " . \basename(__FILE__) . "\n"); } -//Process the provided argument -if($argc === 2) { - $s = $argv[1]; -//Or read from stdin if the argument wasn't present +// Process the provided argument +if ($argc === 2) { + $s = $argv[1]; +// Or read from stdin if the argument wasn't present } else { - $piped = true; - $s = file_get_contents("php://stdin"); + $piped = true; + $s = \file_get_contents('php://stdin'); } -echo URLify::filter ($s) . ($piped ? "\n" : ""); +echo URLify::filter($s) . ($piped ? "\n" : ''); diff --git a/scripts/transliterate.php b/scripts/transliterate.php index 258581b..000a62c 100644 --- a/scripts/transliterate.php +++ b/scripts/transliterate.php @@ -1,20 +1,23 @@ 2) { - die ("Usage (argument): php " . basename(__FILE__) . " \"\"\nUsage (pipe): | php " . basename(__FILE__) . "\n"); +require_once \dirname(__DIR__) . '/URLify.php'; + +// Print usage and exit if arguments are invalid +if ($argc < 1 || $argc > 2) { + die('Usage (argument): php ' . \basename(__FILE__) . " \"\"\nUsage (pipe): | php " . \basename(__FILE__) . "\n"); } -//Process the provided argument -if($argc === 2) { - $s = $argv[1]; -//Or read from stdin if the argument wasn't present +// Process the provided argument +if ($argc === 2) { + $s = $argv[1]; +// Or read from stdin if the argument wasn't present } else { - $piped = true; - $s = file_get_contents("php://stdin"); + $piped = true; + $s = \file_get_contents('php://stdin'); } -echo URLify::transliterate($s) . ($piped ? "\n" : ""); +echo URLify::transliterate($s) . ($piped ? "\n" : ''); diff --git a/tests/BaseSluggerTest.php b/tests/BaseSluggerTest.php new file mode 100644 index 0000000..6f05aa0 --- /dev/null +++ b/tests/BaseSluggerTest.php @@ -0,0 +1,110 @@ +sluggerClassName; + $this->slugger = new $sluggerClassNamespace(); + + $fixturesBaseDir = __DIR__ . \DIRECTORY_SEPARATOR . 'fixtures' . \DIRECTORY_SEPARATOR . \strtolower($this->sluggerClassName); + $this->inputFixturesDir = $fixturesBaseDir . \DIRECTORY_SEPARATOR . 'input'; + $this->expectedFixturesDir = $fixturesBaseDir . \DIRECTORY_SEPARATOR . 'expected'; + } + + /** + * @dataProvider provideSlugFileNames + * + * @param $fileName + * + * @noinspection PhpUnitTestsInspection - FP: from parent class + */ + public function testDefaultSlugify($fileName) + { + $inputStrings = \file($this->inputFixturesDir . \DIRECTORY_SEPARATOR . $fileName, \FILE_IGNORE_NEW_LINES); + $expectedSlugs = \file($this->expectedFixturesDir . \DIRECTORY_SEPARATOR . $fileName, \FILE_IGNORE_NEW_LINES); + + $slugger = $this->slugger; + $slugs = \array_map( + static function ($string) use ($slugger) { + /** @noinspection PhpStaticAsDynamicMethodCallInspection */ + return $slugger->filter($string, 200, 'en', false, false, true, '-'); + }, + $inputStrings + ); + + // DEBUG + //\var_export($slugs); + + foreach ($expectedSlugs as $key => $expectedSlugValue) { + static::assertSame($expectedSlugs[$key], $slugs[$key], 'tested-file: ' . $fileName . ' | ' . $slugs[$key]); + } + + static::assertSame($expectedSlugs, $slugs, 'tested-file: ' . $fileName); + } + + /** + * @dataProvider provideSlugEdgeCases + * + * @param $string + * @param $expectedSlug + */ + public function testSlugifyEdgeCases($string, $expectedSlug) + { + $slug = URLify::filter($string, 200, 'de', false, true, true, '-'); + + static::assertSame($expectedSlug, $slug); + } + + /** + * @return array + */ + public function provideSlugEdgeCases(): array + { + return [ + ['', ''], + [' ', ''], + ['-', ''], + ['-A', 'a'], + ['A-', 'a'], + ['-----', ''], + ['-a-A-A-a-', 'a-a-a-a'], + ['A-a-A-a-A-a', 'a-a-a-a-a-a'], + [' -- ', ''], + ['a--A', 'a-a'], + ['a- -A', 'a-a'], + ['a-' . \html_entity_decode(' ') . '-A', 'a-a'], + ['a - ' . \html_entity_decode(' ') . ' -A', 'a-a'], + [' - - ', ''], + [' -A- ', 'a'], + [' - A - ', 'a'], + ["\0", ''], + [true, '1'], + [false, ''], + [1, '1'], + ]; + } +} diff --git a/tests/SeoSluggerTest.php b/tests/SeoSluggerTest.php new file mode 100644 index 0000000..349a612 --- /dev/null +++ b/tests/SeoSluggerTest.php @@ -0,0 +1,20 @@ + '?', '®' => '(r)', '¼' => '1/4', + '¼' => '1/2', '¾' => '3/4', '¶' => 'P', + ]); + static::assertSame('? (r) 1/2 1/2 3/4 P', URLify::downcode('¿ ® ¼ ¼ ¾ ¶')); + URLify::reset_chars(); + } + + public function testRemoveWords() + { + static::assertSame('foo-bar', URLify::filter('foo bar')); + URLify::remove_words(['foo', 'bar']); + static::assertSame('', URLify::filter('foo bar', 200, 'en', false, true)); + } + + public function testUnknownLanguageCode() + { + static::assertSame('Lo siento, no hablo espanol.', URLify::downcode('Lo siento, no hablo español.', -1)); + } + + public function testRemoveWordsDisable() + { + URLify::remove_words(['foo', 'bar']); + static::assertSame('foo-bar', URLify::filter('foo bar', 60, '', false, false)); + } +} diff --git a/tests/URLifyTest.php b/tests/URLifyTest.php index 4f49fcd..ea918ca 100644 --- a/tests/URLifyTest.php +++ b/tests/URLifyTest.php @@ -1,58 +1,451 @@ assertEquals (' J\'etudie le francais ', URLify::downcode (' J\'étudie le français ')); - $this->assertEquals ('Lo siento, no hablo espanol.', URLify::downcode ('Lo siento, no hablo español.')); - $this->assertEquals ('FKsPWS', URLify::downcode ('ΦΞΠΏΣ')); - $this->assertEquals ('foo-bar', URLify::filter ('_foo_bar_')); - } - - function test_filter () { - $this->assertEquals ('jetudie-le-francais', URLify::filter (' J\'étudie le français ')); - $this->assertEquals ('lo-siento-no-hablo-espanol', URLify::filter ('Lo siento, no hablo español.')); - $this->assertEquals ('fkspws', URLify::filter ('ΦΞΠΏΣ')); - $this->assertEquals ('da-ban-ruo-jing', URLify::filter('大般若經')); - $this->assertEquals ('test-da-ban-ruo-jing-.txt', URLify::filter('test-大般若經.txt', 60, "", $file_name = true)); - $this->assertEquals ('yakrhy-ltoytr', URLify::filter('ياكرهي لتويتر')); - $this->assertEquals ('saaat-25', URLify::filter('ساعت ۲۵')); - $this->assertEquals ('foto.jpg', URLify::filter ('фото.jpg', 60, "", $file_name = true)); - // priorization of language-specific maps - $this->assertEquals ('aouaou', URLify::filter ('ÄÖÜäöü',60,"tr")); - $this->assertEquals ('aeoeueaeoeue', URLify::filter ('ÄÖÜäöü',60,"de")); - - $this->assertEquals ('bobby-mcferrin-dont-worry-be-happy', URLify::filter ("Bobby McFerrin — Don't worry be happy",600,"en")); - // test stripping and conversion of UTF-8 spaces - $this->assertEquals ('xiang-jing-zhen-ren-test-mahito-mukai', URLify::filter('向井 真人test (Mahito Mukai)')); - // Treat underscore as space - $this->assertEquals ('text_with_underscore', URLify::filter('text_with_underscore', 60, "en", true, true, true, false)); - } - - function test_add_chars () { - $this->assertEquals ('? (r) 1/4 1/4 3/4 P', URLify::downcode ('¿ ® ¼ ¼ ¾ ¶')); - URLify::add_chars (array ( - '¿' => '?', '®' => '(r)', '¼' => '1/4', - '¼' => '1/2', '¾' => '3/4', '¶' => 'P' - )); - $this->assertEquals ('? (r) 1/2 1/2 3/4 P', URLify::downcode ('¿ ® ¼ ¼ ¾ ¶')); - } - - function test_remove_words () { - $this->assertEquals ('foo-bar', URLify::filter ('foo bar')); - URLify::remove_words (array ('foo', 'bar')); - $this->assertEquals ('', URLify::filter ('foo bar', 200, 'en', false, true)); - } - - function test_unknown_language_code () { - $this->assertEquals ('Lo siento, no hablo espanol.', URLify::downcode ('Lo siento, no hablo español.', -1)); - } - - function test_remove_words_disable () { - URLify::remove_words (array ('foo', 'bar')); - $this->assertEquals ('foo-bar', URLify::filter ('foo bar', 60, '', false, false)); - } -} +/** + * Class URLifyTest + * + * @internal + */ +final class URLifyTest extends \PHPUnit\Framework\TestCase +{ + public function testSlugifyOptions() + { + $input = ' a+A+ - a+A_a _'; + $output = URLify::filter($input, 200, 'de', false, true, true, '_'); + + static::assertSame('a_plus_a_plus_a_plus_a_a', $output); + } + + public function testSlugifyOptionsV2() + { + $input = ' a+A+ - a+A_a _ ♥'; + $output = URLify::filter($input, 200, 'ar', false, true, true, '_'); + + static::assertSame('a_zayd_a_zayd_a_zayd_a_a_hb', $output); + } + + public function testDowncode() + { + $testArray = [ + ' J\'étudie le français ' => ' J\'etudie le francais ', + 'Lo siento, no hablo español.' => 'Lo siento, no hablo espanol.', + '$1 -> %1 -> öäü -> ΦΞΠΏΣ -> 中文空白 -> 💩 ' => ' 1 Dollar -> Prozent 1 -> oeaeue -> FKsPWS -> Zhong Wen Kong Bai -> ', + ' 22.99 € oder $ 19 | 1 $ | $ 1 = foobar' => ' 22 Euro 99 Cent oder 19 Dollar | 1 Dollar | 1 Dollar gleich foobar', + 'זאת השפה העברית.‏' => 'zt hshph h`bryt.', + '𐭠 𐭡 𐭢 𐭣 𐭤 𐭥 𐭦 𐭧 𐭨 𐭩 𐭪 𐭫 𐭬 𐭭 𐭮 𐭯 𐭰 𐭱 𐭲 𐭸 𐭹 𐭺 𐭻 𐭼 𐭽 𐭾 𐭿' => ' ', + 'أحبك' => 'ahbk', + ]; + + foreach ($testArray as $before => $after) { + static::assertSame($after, URLify::downcode($before, 'de'), $before); + static::assertSame($after, URLify::transliterate($before, 'de'), $before); + } + + static::assertSame('FKsPWS, Zhong Wen Kong Bai ', URLify::downcode('ΦΞΠΏΣ, 中文空白', 'de', 'X')); + static::assertSame('FKsPWS, Zhong Wen Kong Bai ', URLify::downcode('ΦΞΠΏΣ, 中文空白', 'de', '')); + } + + public function testRemoveWordsDisable() + { + URLify::remove_words(['foo', 'bar']); + static::assertSame('foo-bar', URLify::filter('foo bar')); + URLify::reset_remove_list(); + } + + public function testRemoveWordsEnabled() + { + URLify::remove_words(['foo', 'bar'], 'en'); + static::assertSame('foo-bar', URLify::filter('foo bar', 10, 'de', false, true)); + URLify::reset_remove_list('en'); + + URLify::remove_words(['foo', 'bar'], 'en'); + static::assertSame('', URLify::filter('foo bar', 10, 'en', false, true)); + URLify::reset_remove_list('en'); + + URLify::remove_words(['foo', 'bar'], 'de'); + static::assertSame('', URLify::filter('foo bar', 10, 'de', false, true)); + URLify::reset_remove_list('de'); + + URLify::remove_words(['foo', 'bär'], 'de'); + static::assertSame('bar', URLify::filter('foo bar', 10, 'de', false, true)); + URLify::reset_remove_list('de'); + } + + public function testDefaultFilter() + { + $testArray = [ + ' J\'étudie le français ' => 'Jetudie-le-francais', + 'Lo siento, no hablo español.' => 'Lo-siento-no-hablo-espanol', + '—ΦΞΠΏΣ—Test—' => 'FKsPWS-Test', + '大般若經' => 'Da-Ban-Ruo-Jing', + 'ياكرهي لتويتر' => 'yakrhy-ltoytr', + 'ساعت ۲۵' => 'saaat-25', + "test\xe2\x80\x99öäü" => 'test-oeaeue', + 'Ɓtest' => 'Btest', + '-ABC-中文空白' => 'ABC-Zhong-Wen-Kong-Bai', + ' ' => '', + '' => '', + '1 ₣ || ä#ü' => '1-French-franc-aeue', + '∆ € $ Þ λ I am A web Develópêr' => 'Unterschied-Euro-Dollar-TH-l-I-am-A-web-Developer', + 'Subject
from a
CMS
' => 'Subject-from-a-CMS', + 'that it\'s \'eleven\' \'o\'clock\'' => 'that-its-eleven-oclock', + ]; + + for ($i = 0; $i < 10; ++$i) { // increase this value to test the performance + foreach ($testArray as $before => $after) { + static::assertSame($after, URLify::filter($before, 200, 'de', false, false, false, '-'), $before); + } + } + + // test static cache + static::assertSame('foo-bar', URLify::filter('_foo_bar_')); + static::assertSame('foo-bar', URLify::filter('_foo_bar_')); + + // test no language + static::assertSame('foo-bar', URLify::filter('_foo_bar_', -1, '')); + + // test no "separator" + static::assertSame('foo-bar', URLify::filter('_foo_bar_', -1, 'de', false, false, false, '')); + + // test new "separator" + static::assertSame('foo_bar', URLify::filter('_foo_bar_', -1, 'de', false, false, false, '_')); + + // test default "separator" + static::assertSame('foo-bar', URLify::filter('_foo_bar_', -1, 'de', false, false, false)); + } + + public function testFilterLanguage() + { + $testArray = [ + 'abz' => ['أبز' => 'ar'], + '' => ['' => 'ar'], + 'testoeaeue' => ['testöäü' => 'de'], + ]; + + foreach ($testArray as $after => $beforeArray) { + foreach ($beforeArray as $before => $lang) { + static::assertSame($after, URLify::filter($before, 60, $lang), $before); + } + } + } + + public function testFilterFile() + { + $testArray = [ + 'test-eDa-Ban-Ruo-Jing-.txt' => "test-\xe9\x00\x0é大般若經.txt", + 'test-Da-Ban-Ruo-Jing-.txt' => 'test-大般若經.txt', + 'foto.jpg' => 'фото.jpg', + 'Foto.jpg' => 'Фото.jpg', + 'oeaeue-test' => 'öäü - test', + 'shdgshdg.png' => 'שדגשדג.png', + 'c-r-aaaaaeaaeOOOOOe141234SSucdthu-.jpg' => '—©®±àáâãäåæÒÓÔÕÖ¼½¾§µçðþú–.jpg', + '000-c-c-.txt' => '000—©—©.txt', + '' => ' ', + ]; + + foreach ($testArray as $after => $before) { + static::assertSame($after, URLify::filter($before, 60, 'de', true, false, false, '-'), $before); + } + + // clean file-names + static::assertSame('foto.jpg', URLify::filter('Фото.jpg', 60, 'de', true, false, true)); + } + + public function testFilter() + { + static::assertSame('aeoeueaeoeue-der-und-aeoeueaeoeue', URLify::filter('ÄÖÜäöü&der & ÄÖÜäöü', 60, 'de', false)); + static::assertSame('aeoeueaeoeue-der', URLify::filter('ÄÖÜäöü-der', 60, 'de', false)); + static::assertSame('aeoeueaeoeue der', URLify::filter('ÄÖÜäöü-der', 60, 'de', false, false, true, ' ')); + static::assertSame('aeoeueaeoeue#der', URLify::filter('####ÄÖÜäöü-der', 60, 'de', false, false, true, '#')); + static::assertSame('aeoeueaeoeue', URLify::filter('ÄÖÜäöü-der-die-das', 60, 'de', false, true)); + static::assertSame('bobby-mcferrin-dont-worry-be-happy', URLify::filter('Bobby McFerrin — Don\'t worry be happy', 600, 'en')); + static::assertSame('ouaou', URLify::filter('ÖÜäöü', 60, 'tr')); + static::assertSame('hello-zs-privet', URLify::filter('hello žš, привет', 60, 'ru')); + + // test stripping and conversion of UTF-8 spaces + static::assertSame('xiang-jing-zhen-ren-test-mahito-mukai', URLify::filter('向井 真人test (Mahito Mukai)')); + } + + public function testFilterAllLanguages() + { + static::assertSame('d-shh-l-c-r-aaaaaeaaeoooooe141234ssucdthu', URLify::filter('Đ-щ-λ—©®±àáâãäåæÒÓÔÕÖ¼½¾§µçðþú–', -1, 'de')); + static::assertSame('d-shh-l-c-r-aaaaaaaeooooo141234ssucdthu', URLify::filter('Đ-щ-λ—©®±àáâãäåæÒÓÔÕÖ¼½¾§µçðþú–', -1, 'latin')); + static::assertSame('d-shh-l-c-r-aaaaaaaeooooo141234ssucdthu', URLify::filter('Đ-щ-λ—©®±àáâãäåæÒÓÔÕÖ¼½¾§µçðþú–', -1, 'latin_symbols')); + static::assertSame('d-shh-l-c-r-aaaaaaaeooooo141234ssucdthu', URLify::filter('Đ-щ-λ—©®±àáâãäåæÒÓÔÕÖ¼½¾§µçðþú–', -1, 'el')); + static::assertSame('d-shh-l-c-r-aaaaaaaeooooo141234ssucdthu', URLify::filter('Đ-щ-λ—©®±àáâãäåæÒÓÔÕÖ¼½¾§µçðþú–', -1, 'tr')); + static::assertSame('d-shh-l-c-r-aaaaaaaeooooo141234ssucdthu', URLify::filter('Đ-щ-λ—©®±àáâãäåæÒÓÔÕÖ¼½¾§µçðþú–', -1, 'ru')); + static::assertSame('d-shh-l-c-r-aaaaaaaeooooo141234ssucdthu', URLify::filter('Đ-щ-λ—©®±àáâãäåæÒÓÔÕÖ¼½¾§µçðþú–', -1, 'uk')); + static::assertSame('d-shh-l-c-r-aaaaaaaeooooo141234ssucdthu', URLify::filter('Đ-щ-λ—©®±àáâãäåæÒÓÔÕÖ¼½¾§µçðþú–', -1, 'cs')); + static::assertSame('d-shh-l-c-r-aaaaaaaeooooo141234ssucdthu', URLify::filter('Đ-щ-λ—©®±àáâãäåæÒÓÔÕÖ¼½¾§µçðþú–', -1, 'pl')); + static::assertSame('d-shh-l-c-r-aaaaaaaeooooo141234ssucdthu', URLify::filter('Đ-щ-λ—©®±àáâãäåæÒÓÔÕÖ¼½¾§µçðþú–', -1, 'ro')); + static::assertSame('d-shh-l-c-r-aaaaaaaeooooo141234ssucdthu', URLify::filter('Đ-щ-λ—©®±àáâãäåæÒÓÔÕÖ¼½¾§µçðþú–', -1, 'lv')); + static::assertSame('d-shh-l-c-r-aaaaaaaeooooo141234ssucdthu', URLify::filter('Đ-щ-λ—©®±àáâãäåæÒÓÔÕÖ¼½¾§µçðþú–', -1, 'lt')); + static::assertSame('d-shh-l-c-r-aaaaaaaeooooo141234ssucdthu', URLify::filter('Đ-щ-λ—©®±àáâãäåæÒÓÔÕÖ¼½¾§µçðþú–', -1, 'vn')); + static::assertSame('d-shh-l-c-r-aaaaaaaeooooo141234ssucdthu', URLify::filter('Đ-щ-λ—©®±àáâãäåæÒÓÔÕÖ¼½¾§µçðþú–', -1, 'ar')); + static::assertSame('dj-shh-l-c-r-aaaaaaaeooooo141234ssucdthu', URLify::filter('Đ-щ-λ—©®±àáâãäåæÒÓÔÕÖ¼½¾§µçðþú–', -1, 'sr')); + static::assertSame('d-shh-l-c-r-aaaaaaaeooooo141234ssucdthu', URLify::filter('Đ-щ-λ—©®±àáâãäåæÒÓÔÕÖ¼½¾§µçðþú–', -1, 'az')); + static::assertSame('d-shh-l-c-r-aaaaaaaeooooo141234ssucdthu', URLify::filter('Đ-щ-λ—©®±àáâãäåæÒÓÔÕÖ¼½¾§µçðþú–', -1, 'other')); + } + + public function testAddArrayToSeparator() + { + static::assertSame('r-14-14-34-test-p', URLify::filter('¿ ® ¼ ¼ ¾ test ¶')); + + URLify::add_array_to_separator( + [ + '/®/', + '/tester/', + ] + ); + static::assertSame('14-14-34-p-abc', URLify::filter('? ¿ >-< & ® ¼ ¼ ¾ ¶
; ! abc')); + URLify::reset_array_to_separator(); + + // merge + + URLify::add_array_to_separator( + [ + '/®/', + '/tester/', + ], + false + ); + static::assertSame('and-amp-14-14-34-p-abc', URLify::filter('? ¿ >-< & ® ¼ ¼ ¾ ¶
; ! abc')); + URLify::reset_array_to_separator(); + } + + public function testAddChars() + { + static::assertSame('? (r) 1/4 1/4 3/4 P', URLify::downcode('¿ ® ¼ ¼ ¾ ¶', 'latin', '?')); + + URLify::add_chars( + [ + '¿' => '?', + '®' => '(r)', + '¼' => '1/4', + '¾' => '3/4', + '¶' => 'p', + ] + ); + static::assertSame('? (r) 1/4 1/4 3/4 p', URLify::downcode('¿ ® ¼ ¼ ¾ ¶')); + + URLify::reset_chars(); + } + + public function testRemoveWords() + { + static::assertSame('foo-bar', URLify::filter('foo bar', 60, 'de', false, true)); -?> + // append (array) v1 + URLify::remove_words( + [ + 'foo', + 'bar', + ], + 'de', + true + ); + static::assertSame('', URLify::filter('foo bar', 60, 'de', false, true)); + + // append (array) v2 + URLify::remove_words( + [ + 'foo/bar', + '\n', + ], + 'de', + true + ); + static::assertSame('lall-n', URLify::filter('foo / bar lall \n', 60, 'de', false, true)); + + // append (string) + URLify::remove_words('lall', 'de', true); + static::assertSame('123', URLify::filter('foo bar lall 123 ', 60, 'de', false, true)); + + // reset + URLify::reset_remove_list('de'); + + // replace + static::assertSame('foo-bar', URLify::filter('foo bar', 60, 'de', false, true)); + URLify::remove_words( + [ + 'foo', + 'bar', + ], + 'de', + false + ); + static::assertSame('', URLify::filter('foo bar', 60, 'de', false, true)); + + // reset + URLify::reset_remove_list('de'); + } + + public function testManyRoundsWithUnknownLanguageCode() + { + $result = []; + for ($i = 0; $i < 100; ++$i) { + $result[] = URLify::downcode('Lo siento, no hablo español.', $i); + } + + foreach ($result as $res) { + static::assertSame('Lo siento, no hablo espanol.', $res); + } + } + + public function testUrlSlug() + { + $tests = [ + ' -ABC-中文空白- ' => 'abc-zhong-wen-kong-bai', + ' - ÖÄÜ- ' => 'oau', + 'öäü' => 'oau', + '' => '', + ' test test' => 'test-test', + 'أبز' => 'abz', + ]; + + foreach ($tests as $before => $after) { + static::assertSame($after, URLify::filter($before, 100, 'latin', false, true, true, '-'), 'tested: ' . $before); + } + + $tests = [ + ' -ABC-中文空白- ' => 'abc', + ' - ÖÄÜ- ' => 'oau', + ' öäüabc' => 'oaua', + ' Düsseldorf' => 'da14', // "duss" with fixes UTF-8 see "Portable UTF-8" + 'Abcdef' => 'abcd', + ]; + + foreach ($tests as $before => $after) { + static::assertSame($after, URLify::filter($before, 4, 'latin', false, true, true, '-'), $before); + } + + // --- + + $tests = [ + ' -ABC-中文空白- ' => 'abc', + ' - ÖÄÜ- ' => 'oeae', + ' öäüabc' => 'oeae', + ' Düsseldorf' => 'da14', // "duss" with fixes UTF-8 see "Portable UTF-8" + 'Abcdef' => 'abcd', + ]; + + foreach ($tests as $before => $after) { + static::assertSame($after, URLify::filter($before, 4, 'de', false, true, true, '-'), $before); + } + + // --- + + $tests = [ + 'Facebook bekämpft erstmals Durchsuchungsbefehle' => 'facebook-bekaempft-erstmals-durchsuchungsbefehle', + '123 -ABC-中文空白- ' => '123-abc-zhong-kong-bai', + ' - ÖÄÜ- ' => 'oeaeue', + 'öäü' => 'oeaeue', + '$1 -> %1 -> öäü -> ΦΞΠΏΣ -> 中文空白 -> 💩 ' => '1-dollar-prozent-1-oeaeue-fkspws-zhong-kong-bai', + 'זאת השפה העברית.‏' => 'zt-hshph-h-bryt', + '𐭠 𐭡 𐭢 𐭣 𐭤 𐭥 𐭦 𐭧 𐭨 𐭩 𐭪 𐭫 𐭬 𐭭 𐭮 𐭯 𐭰 𐭱 𐭲 𐭸 𐭹 𐭺 𐭻 𐭼 𐭽 𐭾 𐭿' => '', + 'أحبك' => 'ahbk', + ]; + + foreach ($tests as $before => $after) { + static::assertSame($after, URLify::filter($before, 100, 'de', false, true, true, '-'), $before); + } + + $invalidTest = [ + // Min/max overlong + "\xC0\x80a" => 'Overlong representation of U+0000 | 1', + "\xE0\x80\x80a" => 'Overlong representation of U+0000 | 2', + "\xF0\x80\x80\x80a" => 'Overlong representation of U+0000 | 3', + "\xF8\x80\x80\x80\x80a" => 'Overlong representation of U+0000 | 4', + "\xFC\x80\x80\x80\x80\x80a" => 'Overlong representation of U+0000 | 5', + "\xC1\xBFa" => 'Overlong representation of U+007F | 6', + "\xE0\x9F\xBFa" => 'Overlong representation of U+07FF | 7', + "\xF0\x8F\xBF\xBFa" => 'Overlong representation of U+FFFF | 8', + "a\xDF" => 'Incomplete two byte sequence (missing final byte) | 9', + "a\xEF\xBF" => 'Incomplete three byte sequence (missing final byte) | 10', + "a\xF4\xBF\xBF" => 'Incomplete four byte sequence (missing final byte) | 11', + // Min/max continuation bytes + "a\x80" => 'Lone 80 continuation byte | 12', + "a\xBF" => 'Lone BF continuation byte | 13', + // Invalid bytes (these can never occur) + "a\xFE" => 'Invalid FE byte | 14', + "a\xFF" => 'Invalid FF byte | 15', + ]; + + foreach ($invalidTest as $test => $note) { + $result = URLify::filter($test); + static::assertTrue($result === 'a' || $result === '', $note); + } + + // --- + + $tests = [ + 'Facebook bekämpft erstmals / Durchsuchungsbefehle' => 'facebook/bekaempft/erstmals/durchsuchungsbefehle', + ' -ABC-中文空白- ' => 'abc/zhong/kong/bai', + ' # - ÖÄÜ- ' => 'oeaeue', + 'öä \nü' => 'oeae/nue', + ]; + + foreach ($tests as $before => $after) { + static::assertSame($after, URLify::filter($before, 100, 'de_ch', false, true, true, '/'), $before); + } + + // --- + + $tests = [ + 'Facebook bekämpft erstmals / Durchsuchungsbefehle' => 'facebook/bekampft/erstmals/durchsuchungsbefehle', + ' -ABC-中文空白- ' => 'abc/zhong/wen/kong/bai', + ' # - ÖÄÜ- ' => 'oau', + 'öä \nü' => 'oa/nu', + ]; + + foreach ($tests as $before => $after) { + static::assertSame($after, URLify::filter($before, 100, 'ru', false, true, true, '/'), $before); + } + } + + public function testGetRemoveList() + { + // reset + URLify::reset_remove_list('de'); + + $test = new URLify(); + + $removeArray = $this->invokeMethod($test, 'get_remove_list', ['de']); + static::assertInternalType('array', $removeArray); + static::assertTrue(\in_array('ein', $removeArray, true)); + + $removeArray = $this->invokeMethod($test, 'get_remove_list', ['']); + static::assertInternalType('array', $removeArray); + static::assertFalse(\in_array('ein', $removeArray, true)); + + // reset + URLify::reset_remove_list('de'); + } + + public function testUnknownLanguageCode() + { + for ($i = 0; $i < 100; ++$i) { + static::assertSame('Lo siento, no hablo espanol.', URLify::downcode('Lo siento, no hablo español.', -1)); + URLify::downcode('Lo siento, no hablo español.'); + } + } + + /** + * Call protected/private method of a class. + * + * @param object &$object Instantiated object that we will run method on + * @param string $methodName Method name to call + * @param array $parameters array of parameters to pass into method + * + * @throws \ReflectionException + * + * @return mixed method return + */ + public function invokeMethod(&$object, $methodName, array $parameters = []) + { + $reflection = new \ReflectionClass(\get_class($object)); + $method = $reflection->getMethod($methodName); + $method->setAccessible(true); + + return $method->invokeArgs($object, $parameters); + } +} diff --git a/tests/Utf8SluggerTest.php b/tests/Utf8SluggerTest.php new file mode 100644 index 0000000..f9b5ae8 --- /dev/null +++ b/tests/Utf8SluggerTest.php @@ -0,0 +1,26 @@ +alert(\'lall\')'; + static::assertSame('alert-lall', URLify::filter($str)); + } + + public function testInvalidChar() + { + $str = "tes\xE9ting"; + static::assertSame('testing', URLify::filter($str)); + + //$str = 'W%F6bse'; + //static::assertSame('Woebse', URLify::filter($str, 200, 'de', false, false, false, '-')); + } + + public function testEmptyStr() + { + $str = ''; + static::assertEmpty(URLify::filter($str)); + } + + public function testNulAndNon7Bit() + { + $str = "a\x00ñ\x00c"; + static::assertSame('anc', URLify::filter($str)); + } + + public function testNul() + { + $str = "a\x00b\x00c"; + static::assertSame('abc', URLify::filter($str)); + } + + public function testHtml() + { + $str = '

3 years ago !!!!

'; + static::assertSame('3-years-ago', URLify::filter($str)); + } + + public function testChinese() + { + $str = '活动日起'; + static::assertSame('huo-dong-ri-qi', URLify::filter($str)); + } +} diff --git a/tests/bootstrap.php b/tests/bootstrap.php index d56d466..625db70 100644 --- a/tests/bootstrap.php +++ b/tests/bootstrap.php @@ -1,9 +1,6 @@ -? +@ABCDEFGHIJKLMNO +PQRSTUVWXYZ[\]^_ +`abcdefghijklmno +pqrstuvwxyz{|}~ +¡¢£¤¥¦§¨©ª«¬®¯ +°±²³´µ¶·¸¹º»¼½¾¿ +ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏ +ÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞß +àáâãäåæçèéêëìíîï +ðñòóôõö÷øùúûüýþÿ \ No newline at end of file diff --git a/tests/fixtures/urlify/input/iso-8859-1.txt b/tests/fixtures/urlify/input/iso-8859-1.txt new file mode 100644 index 0000000..495f805 --- /dev/null +++ b/tests/fixtures/urlify/input/iso-8859-1.txt @@ -0,0 +1,12 @@ +!"#$%&'()*+,-./ +0123456789:;<=>? +@ABCDEFGHIJKLMNO +PQRSTUVWXYZ[\]^_ +`abcdefghijklmno +pqrstuvwxyz{|}~ +¡¢£¤¥¦§¨©ª«¬®¯ +°±²³´µ¶·¸¹º»¼½¾¿ +ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏ +ÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞß +àáâãäåæçèéêëìíîï +ðñòóôõö÷øùúûüýþÿ \ No newline at end of file diff --git a/tests/fixtures/urlify/input/iso-8859-2-1.txt b/tests/fixtures/urlify/input/iso-8859-2-1.txt new file mode 100644 index 0000000..d2a1f6b --- /dev/null +++ b/tests/fixtures/urlify/input/iso-8859-2-1.txt @@ -0,0 +1,12 @@ +!"#$%&'()*+,-./ +0123456789:;<=>? +@ABCDEFGHIJKLMNO +PQRSTUVWXYZ[\]^_ +`abcdefghijklmno +pqrstuvwxyz{|}~ + Ą˘Ł¤ĽŚ§¨ŠŞŤŹŽŻ +°ą˛ł´ľśˇ¸šşťź˝žż +ŔÁÂĂÄĹĆÇČÉĘËĚÍÎĎ +ĐŃŇÓÔŐÖ×ŘŮÚŰÜÝŢß +ŕáâăäĺćçčéęëěíîď +đńňóôőö÷řůúűüýţ˙ \ No newline at end of file diff --git a/tests/fixtures/urlify/input/iso-8859-2.txt b/tests/fixtures/urlify/input/iso-8859-2.txt new file mode 100644 index 0000000..d2a1f6b --- /dev/null +++ b/tests/fixtures/urlify/input/iso-8859-2.txt @@ -0,0 +1,12 @@ +!"#$%&'()*+,-./ +0123456789:;<=>? +@ABCDEFGHIJKLMNO +PQRSTUVWXYZ[\]^_ +`abcdefghijklmno +pqrstuvwxyz{|}~ + Ą˘Ł¤ĽŚ§¨ŠŞŤŹŽŻ +°ą˛ł´ľśˇ¸šşťź˝žż +ŔÁÂĂÄĹĆÇČÉĘËĚÍÎĎ +ĐŃŇÓÔŐÖ×ŘŮÚŰÜÝŢß +ŕáâăäĺćçčéęëěíîď +đńňóôőö÷řůúűüýţ˙ \ No newline at end of file diff --git a/tests/fixtures/urlify/input/iso-8859-3-1.txt b/tests/fixtures/urlify/input/iso-8859-3-1.txt new file mode 100644 index 0000000..103572f --- /dev/null +++ b/tests/fixtures/urlify/input/iso-8859-3-1.txt @@ -0,0 +1,12 @@ +!"#$%&'()*+,-./ +0123456789:;<=>? +@ABCDEFGHIJKLMNO +PQRSTUVWXYZ[\]^_ +`abcdefghijklmno +pqrstuvwxyz{|}~ +Ħ˘£¤Ĥ§¨İŞĞĴŻ +°ħ²³´µĥ·¸ışğĵ½ż +ÀÁÂÄĊĈÇÈÉÊËÌÍÎÏ +ÑÒÓÔĠÖ×ĜÙÚÛÜŬŜß +àáâäċĉçèééêëìíîï +ñòóôġö÷ĝùúûüŭŝ˙ \ No newline at end of file diff --git a/tests/fixtures/urlify/input/iso-8859-3.txt b/tests/fixtures/urlify/input/iso-8859-3.txt new file mode 100644 index 0000000..103572f --- /dev/null +++ b/tests/fixtures/urlify/input/iso-8859-3.txt @@ -0,0 +1,12 @@ +!"#$%&'()*+,-./ +0123456789:;<=>? +@ABCDEFGHIJKLMNO +PQRSTUVWXYZ[\]^_ +`abcdefghijklmno +pqrstuvwxyz{|}~ +Ħ˘£¤Ĥ§¨İŞĞĴŻ +°ħ²³´µĥ·¸ışğĵ½ż +ÀÁÂÄĊĈÇÈÉÊËÌÍÎÏ +ÑÒÓÔĠÖ×ĜÙÚÛÜŬŜß +àáâäċĉçèééêëìíîï +ñòóôġö÷ĝùúûüŭŝ˙ \ No newline at end of file diff --git a/tests/fixtures/urlify/input/iso-8859-4-1.txt b/tests/fixtures/urlify/input/iso-8859-4-1.txt new file mode 100644 index 0000000..5283702 --- /dev/null +++ b/tests/fixtures/urlify/input/iso-8859-4-1.txt @@ -0,0 +1,12 @@ + !"#$%&'()*+,-./ +0123456789:;<=>? +@ABCDEFGHIJKLMNO +PQRSTUVWXYZ[\]^_ +`abcdefghijklmno +pqrstuvwxyz{|}~ +ĄĸŖ¤ĨĻ§¨ŠĒĢŦŽ¯ +°ą˛ŗ´ĩļˇ¸šēģŧŊžŋ +ĀÁÂÃÄÅÆĮČÉĘËĖÍÎĪ +ĐŅŌĶÔÕÖ×ØŲÚÛÜŨŪß +āáâãäåæįčéęëėíîī +đņōķôõö÷øųúûüũū˙ \ No newline at end of file diff --git a/tests/fixtures/urlify/input/iso-8859-4.txt b/tests/fixtures/urlify/input/iso-8859-4.txt new file mode 100644 index 0000000..5283702 --- /dev/null +++ b/tests/fixtures/urlify/input/iso-8859-4.txt @@ -0,0 +1,12 @@ + !"#$%&'()*+,-./ +0123456789:;<=>? +@ABCDEFGHIJKLMNO +PQRSTUVWXYZ[\]^_ +`abcdefghijklmno +pqrstuvwxyz{|}~ +ĄĸŖ¤ĨĻ§¨ŠĒĢŦŽ¯ +°ą˛ŗ´ĩļˇ¸šēģŧŊžŋ +ĀÁÂÃÄÅÆĮČÉĘËĖÍÎĪ +ĐŅŌĶÔÕÖ×ØŲÚÛÜŨŪß +āáâãäåæįčéęëėíîī +đņōķôõö÷øųúûüũū˙ \ No newline at end of file diff --git a/tests/fixtures/urlify/input/japanese.txt b/tests/fixtures/urlify/input/japanese.txt new file mode 100644 index 0000000..f59ee13 --- /dev/null +++ b/tests/fixtures/urlify/input/japanese.txt @@ -0,0 +1,167 @@ +日一国会人年大十二本中長出三同 +時政事自行社見月分議後前民生連 +五発間対上部東者党地合市業内相 +方四定今回新場金員九入選立開手 +米力学問高代明実円関決子動京全 +目表戦経通外最言氏現理調体化田 +当八六約主題下首意法不来作性的 +要用制治度務強気小七成期公持野 +協取都和統以機平総加山思家話世 +受区領多県続進正安設保改数記院 +女初北午指権心界支第産結百派点 +教報済書府活原先共得解名交資予 +川向際査勝面委告軍文反元重近千 +考判認画海参売利組知案道信策集 +在件団別物側任引使求所次水半品 +昨論計死官増係感特情投示変打男 +基私各始島直両朝革価式確村提運 +終挙果西勢減台広容必応演電歳住 +争談能無再位置企真流格有疑口過 +局少放税検藤町常校料沢裁状工建 +語球営空職証土与急止送援供可役 +構木割聞身費付施切由説転食比難 +防補車優夫研収断井何南石足違消 +境神番規術護展態導鮮備宅害配副 +算視条幹独警宮究育席輸訪楽起万 +着乗店述残想線率病農州武声質念 +待試族象銀域助労例衛然早張映限 +親額監環験追審商葉義伝働形景落 +欧担好退準賞訴辺造英被株頭技低 +毎医復仕去姿味負閣韓渡失移差衆 +個門写評課末守若脳極種美岡影命 +含福蔵量望松非撃佐核観察整段横 +融型白深字答夜製票況音申様財港 +識注呼渉達良響阪帰針専推谷古候 +史天階程満敗管値歌買突兵接請器 +士光討路悪科攻崎督授催細効図週 +積丸他及湾録処省旧室憲太橋歩離 +岸客風紙激否周師摘材登系批郎母 +易健黒火戸速存花春飛殺央券赤号 +単盟座青破編捜竹除完降超責並療 +従右修捕隊危採織森競拡故館振給 +屋介読弁根色友苦就迎走販園具左 +異歴辞将秋因献厳馬愛幅休維富浜 +父遺彼般未塁貿講邦舞林装諸夏素 +亡劇河遣航抗冷模雄適婦鉄寄益込 +顔緊類児余禁印逆王返標換久短油 +妻暴輪占宣背昭廃植熱宿薬伊江清 +習険頼僚覚吉盛船倍均億途圧芸許 +皇臨踏駅署抜壊債便伸留罪停興爆 +陸玉源儀波創障継筋狙帯延羽努固 +闘精則葬乱避普散司康測豊洋静善 +逮婚厚喜齢囲卒迫略承浮惑崩順紀 +聴脱旅絶級幸岩練押軽倒了庁博城 +患締等救執層版老令角絡損房募曲 +撤裏払削密庭徒措仏績築貨志混載 +昇池陣我勤為血遅抑幕居染温雑招 +奈季困星傷永択秀著徴誌庫弾償刊 +像功拠香欠更秘拒刑坂刻底賛塚致 +抱繰服犯尾描布恐寺鈴盤息宇項喪 +伴遠養懸戻街巨震願絵希越契掲躍 +棄欲痛触邸依籍汚縮還枚属笑互複 +慮郵束仲栄札枠似夕恵板列露沖探 +逃借緩節需骨射傾届曜遊迷夢巻購 +揮君燃充雨閉緒跡包駐貢鹿弱却端 +賃折紹獲郡併草徹飲貴埼衝焦奪雇 +災浦暮替析預焼簡譲称肉納樹挑章 +臓律誘紛貸至宗促慎控贈智握照宙 +酒俊銭薄堂渋群銃悲秒操携奥診詰 +託晴撮誕侵括掛謝双孝刺到駆寝透 +津壁稲仮暗裂敏鳥純是飯排裕堅訳 +盗芝綱吸典賀扱顧弘看訟戒祉誉歓 +勉奏勧騒翌陽閥甲快縄片郷敬揺免 +既薦隣悩華泉御範隠冬徳皮哲漁杉 +里釈己荒貯硬妥威豪熊歯滞微隆埋 +症暫忠倉昼茶彦肝柱喚沿妙唱祭袋 +阿索誠忘襲雪筆吹訓懇浴俳童宝柄 +驚麻封胸娘砂李塩浩誤剤瀬趣陥斎 +貫仙慰賢序弟旬腕兼聖旨即洗柳舎 +偽較覇兆床畑慣詳毛緑尊抵脅祝礼 +窓柔茂犠旗距雅飾網竜詩昔繁殿濃 +翼牛茨潟敵魅嫌魚斉液貧敷擁衣肩 +圏零酸兄罰怒滅泳礎腐祖幼脚菱荷 +潮梅泊尽杯僕桜滑孤黄煕炎賠句寿 +鋼頑甘臣鎖彩摩浅励掃雲掘縦輝蓄 +軸巡疲稼瞬捨皆砲軟噴沈誇祥牲秩 +帝宏唆鳴阻泰賄撲凍堀腹菊絞乳煙 +縁唯膨矢耐恋塾漏紅慶猛芳懲郊剣 +腰炭踊幌彰棋丁冊恒眠揚冒之勇曽 +械倫陳憶怖犬菜耳潜珍梨仁克岳概 +拘墓黙須偏雰卵遇湖諮狭喫卓干頂 +虫刷亀糧梶湯箱簿炉牧殊殖艦溶輩 +穴奇慢鶴謀暖昌拍朗丈鉱寛覆胞泣 +涙隔浄匹没暇肺孫貞靖鑑飼陰銘鋭 +随烈尋渕稿枝丹啓也丘棟壌漫玄粘 +悟舗妊塗熟軒旭恩毒騰往豆遂晩狂 +叫栃岐陛緯培衰艇屈径淡抽披廷錦 +准暑拝磯奨妹浸剰胆氷繊駒乾虚棒 +寒孜霊帳悔諭祈惨虐翻墜沼据肥徐 +糖搭姉髪忙盾脈滝拾軌俵妨盧粉擦 +鯨漢糸荘諾雷漂懐勘綿栽才拐笠駄 +添汗冠斜銅鏡聡浪亜覧詐壇勲魔酬 +紫湿曙紋卸奮趙欄逸涯拓眼瓶獄筑 +尚阜彫咲穏顕巧矛垣召欺釣缶萩粧 +隻葛脂粛栗愚蒸嘉遭架篠鬼庶肌稚 +靴菅滋幻煮姫誓耕把践呈疎仰鈍恥 +剛疾征砕謡嫁謙后嘆俣菌鎌巣泥頻 +琴班淵棚潔酷宰廊寂辰隅偶霞伏灯 +柏辛磨碁俗漠邪晶辻麦墨鎮洞履劣 +那殴娠奉憂朴亭姓淳荻筒鼻嶋怪粒 +詞鳩柴偉酔惜穫佳潤悼乏胃該赴桑 +桂髄虎盆晋穂壮堤飢傍疫累痴搬畳 +晃癒桐寸郭机尿凶吐宴鷹賓虜膚陶 +鐘憾畿猪紘磁弥昆粗訂芽尻庄傘敦 +騎寧濯循忍磐猫怠如寮祐鵬塔沸鉛 +珠凝苗獣哀跳灰匠菓垂蛇澄縫僧幾 +眺唐亘呉凡憩鄭芦龍媛溝恭刈睡錯 +伯帽笹穀柿陵霧魂枯弊釧妃舶餓腎 +窮掌麗綾臭釜悦刃縛暦宜盲粋辱毅 +轄猿弦嶌稔窒炊洪摂飽函冗涼桃狩 +舟貝朱渦紳枢碑鍛刀鼓裸鴨符猶塊 +旋弓幣膜扇脇腸憎槽鍋慈皿肯樋楊 +伐駿漬燥糾亮墳坪畜紺慌娯吾椿舌 +羅坊峡俸厘峰圭醸蓮弔乙倶汁尼遍 +堺衡呆薫瓦猟羊窪款閲雀偵喝敢畠 +胎酵憤豚遮扉硫赦挫挟窃泡瑞又慨 +紡恨肪扶戯伍忌濁奔斗蘭蒲迅肖鉢 +朽殻享秦茅藩沙輔曇媒鶏禅嘱胴粕 +冨迭挿湘嵐椎灘堰獅姜絹陪剖譜郁 +悠淑帆暁鷲傑楠笛芥其玲奴誰錠拳 +翔遷拙侍尺峠篤肇渇榎俺劉幡諏叔 +雌亨堪叙酢吟逓痕嶺袖甚喬崔妖琵 +琶聯蘇闇崇漆岬癖愉寅捉礁乃洲屯 +樽樺槙薩姻巌淀麹賭擬塀唇睦閑胡 +幽峻曹哨詠炒屏卑侮鋳抹尉槻隷禍 +蝶酪茎汎頃帥梁逝滴汽謎琢箕匿爪 +芭逗苫鍵襟蛍楢蕉兜寡琉痢庸朋坑 +姑烏藍僑賊搾奄臼畔遼唄孔橘漱呂 +桧拷宋嬢苑巽杜渓翁藝廉牙謹瞳湧 +欣窯褒醜魏篇升此峯殉煩巴禎枕劾 +菩堕丼租檜稜牟桟榊錫荏惧倭婿慕 +廟銚斐罷矯某囚魁薮虹鴻泌於赳漸 +逢凧鵜庵膳蚊葵厄藻萬禄孟鴈狼嫡 +呪斬尖翫嶽尭怨卿串已嚇巳凸暢腫 +粟燕韻綴埴霜餅魯硝牡箸勅芹杏迦 +棺儒鳳馨斑蔭焉慧祇摯愁鷺楼彬袴 +匡眉苅讃尹欽薪湛堆狐褐鴎瀋挺賜 +嵯雁佃綜繕狛壷橿栓翠鮎芯蜜播榛 +凹艶帖伺桶惣股匂鞍蔦玩萱梯雫絆 +錬湊蜂隼舵渚珂煥衷逐斥稀癌峨嘘 +旛篭芙詔皐雛娼篆鮫椅惟牌宕喧佑 +蒋樟耀黛叱櫛渥挨憧濡槍宵襄妄惇 +蛋脩笘宍甫酌蚕壕嬉囃蒼餌簗峙粥 +舘銕鄒蜷暉捧頒只肢箏檀鵠凱彗謄 +諌樫噂脊牝梓洛醍砦丑笏蕨噺抒嗣 +隈叶凄汐絢叩嫉朔蔡膝鍾仇伽夷恣 +瞑畝抄杭寓麺戴爽裾黎惰坐鍼蛮塙 +冴旺葦礒咸萌饗歪冥偲壱瑠韮漕杵 +薔膠允眞蒙蕃呑侯碓茗麓瀕蒔鯉竪 +弧稽瘤澤溥遥蹴或訃矩厦冤剥舜侠 +贅杖蓋畏喉汪猷瑛搜曼附彪撚噛卯 +桝撫喋但溢闊藏浙彭淘剃揃綺徘巷 +竿蟹芋袁舩拭茜凌頬厨犀簑皓甦洸 +毬檄姚蛭婆叢椙轟贋洒貰儲緋貼諜 +鯛蓼甕喘怜溜邑鉾倣碧燈諦煎瓜緻 +哺槌啄穣嗜偕罵酉蹄頚胚牢糞悌吊 +楕鮭乞倹嗅詫鱒蔑轍醤惚廣藁柚舛 +縞謳杞鱗繭釘弛狸壬硯蝦 \ No newline at end of file diff --git a/tests/fixtures/urlify/input/pangrams-1.txt b/tests/fixtures/urlify/input/pangrams-1.txt new file mode 100644 index 0000000..5819f3a --- /dev/null +++ b/tests/fixtures/urlify/input/pangrams-1.txt @@ -0,0 +1,26 @@ +Bulgarian: Жълтата дюля беше щастлива, че пухът, който цъфна, замръзна като гьон. +Croatian: Gojazni đačić s biciklom drži hmelj i finu vatu u džepu nošnje. +Czech: Příliš žluťoučký kůň úpěl ďábelské kódy. +Danish: Høj bly gom vandt fræk sexquiz på wc. +English: The quick brown fox jumps over the lazy dog. +Esperanto: Eble ĉiu kvazaŭ-deca fuŝĥoraĵo ĝojigos homtipon. +French: Les naïfs ægithales hâtifs pondant à Noël où il gèle sont sûrs d'être déçus en voyant leurs drôles d'œufs abîmés. +Finnish: On sangen hauskaa, että polkupyörä on maanteiden jokapäiväinen ilmiö. +German: Falsches Üben von Xylophonmusik quält jeden größeren Zwerg. +Greek: Ταχίστη αλώπηξ βαφής ψημένη γη, δρασκελίζει υπέρ νωθρού κυνός +Hungarian: Árvíztűrő tükörfúrógép. +Icelandic: Sævör grét áðan því úlpan var ónýt. +Latvian: Četri psihi faķīri vēlu vakarā zāģēja guļbūvei durvis, fonā šņācot mežam. +Lithuanian: Įlinkdama fechtuotojo špaga sublykčiojusi pragręžė apvalų arbūzą . +Maltese: Jekk jogħġbok ibgħat lili xi pangram (ĊċĠġĦħŻż). +Norwegian: Vår sære Zulu fra badeøya spilte jo whist og quickstep i min taxi. +Polish: Pchnąć w tę łódź jeża lub osiem skrzyń fig. +Portuguese: Luís argüia à Júlia que «brações, fé, chá, óxido, pôr, zângão» eram palavras do português. +Romanian: Muzicologă în bej vând whisky și tequila, preț fix. +Russian: Съешь же ещё этих мягких французских булок да выпей чаю. +Sami: Vuol Ruoŧa geđggiid leat máŋga luosa ja čuovžža. +Slovak: Starý kôň na hŕbe kníh žuje tíško povädnuté ruže, na stĺpe sa ďateľ učí kvákať novú ódu o živote. +Spanish: Benjamín pidió una bebida de kiwi y fresa. Noé, sin vergüenza, la más exquisita champaña del menú. +Swedish: Flygande bäckasiner söka strax hwila på mjuka tuvor. +Turkish: Pijamalı hasta yağız şoföre çabucak güvendi. +Ukrainian: Чуєш їх, доцю, га? Кумедна ж ти, прощайся без ґольфів!. \ No newline at end of file diff --git a/tests/fixtures/urlify/input/pangrams.txt b/tests/fixtures/urlify/input/pangrams.txt new file mode 100644 index 0000000..5819f3a --- /dev/null +++ b/tests/fixtures/urlify/input/pangrams.txt @@ -0,0 +1,26 @@ +Bulgarian: Жълтата дюля беше щастлива, че пухът, който цъфна, замръзна като гьон. +Croatian: Gojazni đačić s biciklom drži hmelj i finu vatu u džepu nošnje. +Czech: Příliš žluťoučký kůň úpěl ďábelské kódy. +Danish: Høj bly gom vandt fræk sexquiz på wc. +English: The quick brown fox jumps over the lazy dog. +Esperanto: Eble ĉiu kvazaŭ-deca fuŝĥoraĵo ĝojigos homtipon. +French: Les naïfs ægithales hâtifs pondant à Noël où il gèle sont sûrs d'être déçus en voyant leurs drôles d'œufs abîmés. +Finnish: On sangen hauskaa, että polkupyörä on maanteiden jokapäiväinen ilmiö. +German: Falsches Üben von Xylophonmusik quält jeden größeren Zwerg. +Greek: Ταχίστη αλώπηξ βαφής ψημένη γη, δρασκελίζει υπέρ νωθρού κυνός +Hungarian: Árvíztűrő tükörfúrógép. +Icelandic: Sævör grét áðan því úlpan var ónýt. +Latvian: Četri psihi faķīri vēlu vakarā zāģēja guļbūvei durvis, fonā šņācot mežam. +Lithuanian: Įlinkdama fechtuotojo špaga sublykčiojusi pragręžė apvalų arbūzą . +Maltese: Jekk jogħġbok ibgħat lili xi pangram (ĊċĠġĦħŻż). +Norwegian: Vår sære Zulu fra badeøya spilte jo whist og quickstep i min taxi. +Polish: Pchnąć w tę łódź jeża lub osiem skrzyń fig. +Portuguese: Luís argüia à Júlia que «brações, fé, chá, óxido, pôr, zângão» eram palavras do português. +Romanian: Muzicologă în bej vând whisky și tequila, preț fix. +Russian: Съешь же ещё этих мягких французских булок да выпей чаю. +Sami: Vuol Ruoŧa geđggiid leat máŋga luosa ja čuovžža. +Slovak: Starý kôň na hŕbe kníh žuje tíško povädnuté ruže, na stĺpe sa ďateľ učí kvákať novú ódu o živote. +Spanish: Benjamín pidió una bebida de kiwi y fresa. Noé, sin vergüenza, la más exquisita champaña del menú. +Swedish: Flygande bäckasiner söka strax hwila på mjuka tuvor. +Turkish: Pijamalı hasta yağız şoföre çabucak güvendi. +Ukrainian: Чуєш їх, доцю, га? Кумедна ж ти, прощайся без ґольфів!. \ No newline at end of file diff --git a/tests/fixtures/urlify/input/sample-unicode-chart.txt b/tests/fixtures/urlify/input/sample-unicode-chart.txt new file mode 100644 index 0000000..8054c12 --- /dev/null +++ b/tests/fixtures/urlify/input/sample-unicode-chart.txt @@ -0,0 +1,84 @@ +A 𝐀 𝐴 𝑨 𝖠 𝗔 𝘈 𝘼 𝒜 𝓐 𝔄 𝕬 𝙰 𝔸 +B 𝐁 𝐵 𝑩 𝖡 𝗕 𝘉 𝘽 ℬ 𝓑 𝔅 𝕭 𝙱 𝔹 +C 𝐂 𝐶 𝑪 𝖢 𝗖 𝘊 𝘾 𝒞 𝓒 ℭ 𝕮 𝙲 ℂ +D 𝐃 𝐷 𝑫 𝖣 𝗗 𝘋 𝘿 𝒟 𝓓 𝔇 𝕯 𝙳 𝔻 +E 𝐄 𝐸 𝑬 𝖤 𝗘 𝘌 𝙀 ℰ 𝓔 𝔈 𝕰 𝙴 𝔼 +F 𝐅 𝐹 𝑭 𝖥 𝗙 𝘍 𝙁 ℱ 𝓕 𝔉 𝕱 𝙵 𝔽 +G 𝐆 𝐺 𝑮 𝖦 𝗚 𝘎 𝙂 𝒢 𝓖 𝔊 𝕲 𝙶 𝔾 +H 𝐇 𝐻 𝑯 𝖧 𝗛 𝘏 𝙃 ℋ 𝓗 ℌ 𝕳 𝙷 ℍ +I 𝐈 𝐼 𝑰 𝖨 𝗜 𝘐 𝙄 ℐ 𝓘 ℑ 𝕴 𝙸 𝕀 +J 𝐉 𝐽 𝑱 𝖩 𝗝 𝘑 𝙅 𝒥 𝓙 𝔍 𝕵 𝙹 𝕁 +K 𝐊 𝐾 𝑲 𝖪 𝗞 𝘒 𝙆 𝒦 𝓚 𝔎 𝕶 𝙺 𝕂 +L 𝐋 𝐿 𝑳 𝖫 𝗟 𝘓 𝙇 ℒ 𝓛 𝔏 𝕷 𝙻 𝕃 +M 𝐌 𝑀 𝑴 𝖬 𝗠 𝘔 𝙈 ℳ 𝓜 𝔐 𝕸 𝙼 𝕄 +N 𝐍 𝑁 𝑵 𝖭 𝗡 𝘕 𝙉 𝒩 𝓝 𝔑 𝕹 𝙽 ℕ +O 𝐎 𝑂 𝑶 𝖮 𝗢 𝘖 𝙊 𝒪 𝓞 𝔒 𝕺 𝙾 𝕆 +P 𝐏 𝑃 𝑷 𝖯 𝗣 𝘗 𝙋 𝒫 𝓟 𝔓 𝕻 𝙿 ℙ +Q 𝐐 𝑄 𝑸 𝖰 𝗤 𝘘 𝙌 𝒬 𝓠 𝔔 𝕼 𝚀 ℚ +R 𝐑 𝑅 𝑹 𝖱 𝗥 𝘙 𝙍 ℛ 𝓡 ℜ 𝕽 𝚁 ℝ +S 𝐒 𝑆 𝑺 𝖲 𝗦 𝘚 𝙎 𝒮 𝓢 𝔖 𝕾 𝚂 𝕊 +T 𝐓 𝑇 𝑻 𝖳 𝗧 𝘛 𝙏 𝒯 𝓣 𝔗 𝕿 𝚃 𝕋 +U 𝐔 𝑈 𝑼 𝖴 𝗨 𝘜 𝙐 𝒰 𝓤 𝔘 𝖀 𝚄 𝕌 +V 𝐕 𝑉 𝑽 𝖵 𝗩 𝘝 𝙑 𝒱 𝓥 𝔙 𝖁 𝚅 𝕍 +W 𝐖 𝑊 𝑾 𝖶 𝗪 𝘞 𝙒 𝒲 𝓦 𝔚 𝖂 𝚆 𝕎 +X 𝐗 𝑋 𝑿 𝖷 𝗫 𝘟 𝙓 𝒳 𝓧 𝔛 𝖃 𝚇 𝕏 +Y 𝐘 𝑌 𝒀 𝖸 𝗬 𝘠 𝙔 𝒴 𝓨 𝔜 𝖄 𝚈 𝕐 +Z 𝐙 𝑍 𝒁 𝖹 𝗭 𝘡 𝙕 𝒵 𝓩 ℨ 𝖅 𝚉 ℤ +a 𝐚 𝑎 𝒂 𝖺 𝗮 𝘢 𝙖 𝒶 𝓪 𝔞 𝖆 𝚊 𝕒 +b 𝐛 𝑏 𝒃 𝖻 𝗯 𝘣 𝙗 𝒷 𝓫 𝔟 𝖇 𝚋 𝕓 +c 𝐜 𝑐 𝒄 𝖼 𝗰 𝘤 𝙘 𝒸 𝓬 𝔠 𝖈 𝚌 𝕔 +d 𝐝 𝑑 𝒅 𝖽 𝗱 𝘥 𝙙 𝒹 𝓭 𝔡 𝖉 𝚍 𝕕 +e 𝐞 𝑒 𝒆 𝖾 𝗲 𝘦 𝙚 ℯ 𝓮 𝔢 𝖊 𝚎 𝕖 +f 𝐟 𝑓 𝒇 𝖿 𝗳 𝘧 𝙛 𝒻 𝓯 𝔣 𝖋 𝚏 𝕗 +g 𝐠 𝑔 𝒈 𝗀 𝗴 𝘨 𝙜 ℊ 𝓰 𝔤 𝖌 𝚐 𝕘 +h 𝐡 ℎ 𝒉 𝗁 𝗵 𝘩 𝙝 𝒽 𝓱 𝔥 𝖍 𝚑 𝕙 +i 𝐢 𝑖 𝒊 𝗂 𝗶 𝘪 𝙞 𝒾 𝓲 𝔦 𝖎 𝚒 𝕚 +j 𝐣 𝑗 𝒋 𝗃 𝗷 𝘫 𝙟 𝒿 𝓳 𝔧 𝖏 𝚓 𝕛 +k 𝐤 𝑘 𝒌 𝗄 𝗸 𝘬 𝙠 𝓀 𝓴 𝔨 𝖐 𝚔 𝕜 +l 𝐥 𝑙 𝒍 𝗅 𝗹 𝘭 𝙡 𝓁 𝓵 𝔩 𝖑 𝚕 𝕝 +m 𝐦 𝑚 𝒎 𝗆 𝗺 𝘮 𝙢 𝓂 𝓶 𝔪 𝖒 𝚖 𝕞 +n 𝐧 𝑛 𝒏 𝗇 𝗻 𝘯 𝙣 𝓃 𝓷 𝔫 𝖓 𝚗 𝕟 +o 𝐨 𝑜 𝒐 𝗈 𝗼 𝘰 𝙤 ℴ 𝓸 𝔬 𝖔 𝚘 𝕠 +p 𝐩 𝑝 𝒑 𝗉 𝗽 𝘱 𝙥 𝓅 𝓹 𝔭 𝖕 𝚙 𝕡 +q 𝐪 𝑞 𝒒 𝗊 𝗾 𝘲 𝙦 𝓆 𝓺 𝔮 𝖖 𝚚 𝕢 +r 𝐫 𝑟 𝒓 𝗋 𝗿 𝘳 𝙧 𝓇 𝓻 𝔯 𝖗 𝚛 𝕣 +s 𝐬 𝑠 𝒔 𝗌 𝘀 𝘴 𝙨 𝓈 𝓼 𝔰 𝖘 𝚜 𝕤 +t 𝐭 𝑡 𝒕 𝗍 𝘁 𝘵 𝙩 𝓉 𝓽 𝔱 𝖙 𝚝 𝕥 +u 𝐮 𝑢 𝒖 𝗎 𝘂 𝘶 𝙪 𝓊 𝓾 𝔲 𝖚 𝚞 𝕦 +v 𝐯 𝑣 𝒗 𝗏 𝘃 𝘷 𝙫 𝓋 𝓿 𝔳 𝖛 𝚟 𝕧 +w 𝐰 𝑤 𝒘 𝗐 𝘄 𝘸 𝙬 𝓌 𝔀 𝔴 𝖜 𝚠 𝕨 +x 𝐱 𝑥 𝒙 𝗑 𝘅 𝘹 𝙭 𝓍 𝔁 𝔵 𝖝 𝚡 𝕩 +y 𝐲 𝑦 𝒚 𝗒 𝘆 𝘺 𝙮 𝓎 𝔂 𝔶 𝖞 𝚢 𝕪 +z 𝐳 𝑧 𝒛 𝗓 𝘇 𝘻 𝙯 𝓏 𝔃 𝔷 𝖟 𝚣 𝕫 + +abcdefghijklmnopqrstuvwxyz + +𝐚𝐛𝐜𝐝𝐞𝐟𝐠𝐡𝐢𝐣𝐤𝐥𝐦𝐧𝐨𝐩𝐪𝐫𝐬𝐭𝐮𝐯𝐰𝐱𝐲𝐳 + +𝖆𝖇𝖈𝖉𝖊𝖋𝖌𝖍𝖎𝖏𝖐𝖑𝖒𝖓𝖔𝖕𝖖𝖗𝖘𝖙𝖚𝖛𝖜𝖝𝖞𝖟 + +𝒂𝒃𝒄𝒅𝒆𝒇𝒈𝒉𝒊𝒋𝒌𝒍𝒎𝒏𝒐𝒑𝒒𝒓𝒔𝒕𝒖𝒗𝒘𝒙𝒚𝒛 + +𝓪𝓫𝓬𝓭𝓮𝓯𝓰𝓱𝓲𝓳𝓴𝓵𝓶𝓷𝓸𝓹𝓺𝓻𝓼𝓽𝓾𝓿𝔀𝔁𝔂𝔃 + +𝚊𝚋𝚌𝚍𝚎𝚏𝚐𝚑𝚒𝚓𝚔𝚕𝚖𝚗𝚘𝚙𝚚𝚛𝚜𝚝𝚞𝚟𝚠𝚡𝚢𝚣 + +𝖺𝖻𝖼𝖽𝖾𝖿𝗀𝗁𝗂𝗃𝗄𝗅𝗆𝗇𝗈𝗉𝗊𝗋𝗌𝗍𝗎𝗏𝗐𝗑𝗒𝗓 + +𝗮𝗯𝗰𝗱𝗲𝗳𝗴𝗵𝗶𝗷𝗸𝗹𝗺𝗻𝗼𝗽𝗾𝗿𝘀𝘁𝘂𝘃𝘄𝘅𝘆𝘇 + +𝙖𝙗𝙘𝙙𝙚𝙛𝙜𝙝𝙞𝙟𝙠𝙡𝙢𝙣𝙤𝙥𝙦𝙧𝙨𝙩𝙪𝙫𝙬𝙭𝙮𝙯 + +𝘢𝘣𝘤𝘥𝘦𝘧𝘨𝘩𝘪𝘫𝘬𝘭𝘮𝘯𝘰𝘱𝘲𝘳𝘴𝘵𝘶𝘷𝘸𝘹𝘺𝘻 + +ⓐⓑⓒⓓⓔⓕⓖⓗⓘⓙⓚⓛⓜⓝⓞⓟⓠⓡⓢⓣⓤⓥⓦⓧⓨⓩ + +🅐🅑🅒🅓🅔🅕🅖🅗🅘🅙🅚🅛🅜🅝🅞🅟🅠🅡🅢🅣🅤🅥🅦🅧🅨🅩 + +⒜⒝⒞⒟⒠⒡⒢⒣⒤⒥⒦⒧⒨⒩⒪⒫⒬⒭⒮⒯⒰⒱⒲⒳⒴⒵ + +🇦​🇧​🇨​🇩​🇪​🇫​🇬​🇭​🇮​🇯​🇰​🇱​🇲​🇳​🇴​🇵​🇶​🇷​🇸​🇹​🇺​🇻​🇼​🇽​🇾​🇿​ + +🄰🄱🄲🄳🄴🄵🄶🄷🄸🄹🄺🄻🄼🄽🄾🄿🅀🅁🅂🅃🅄🅅🅆🅇🅈🅉 + +🅰🅱🅲🅳🅴🅵🅶🅷🅸🅹🅺🅻🅼🅽🅾🅿🆀🆁🆂🆃🆄🆅🆆🆇🆈🆉 \ No newline at end of file diff --git a/tests/fixtures/urlify/input/sample-utf-8-bom.txt b/tests/fixtures/urlify/input/sample-utf-8-bom.txt new file mode 100644 index 0000000..8797e12 --- /dev/null +++ b/tests/fixtures/urlify/input/sample-utf-8-bom.txt @@ -0,0 +1,179 @@ +Sanskrit: काचं शक्नोम्यत्तुम् । नोपहिनस्ति माम् ॥ +Sanskrit (standard transcription): kācaṃ śaknomyattum; nopahinasti mām. +Classical Greek: ὕαλον ϕαγεῖν δύναμαι· τοῦτο οὔ με βλάπτει. +Greek (monotonic): Μπορώ να φάω σπασμένα γυαλιά χωρίς να πάθω τίποτα. +Greek (polytonic): Μπορῶ νὰ φάω σπασμένα γυαλιὰ χωρὶς νὰ πάθω τίποτα. +Etruscan: (NEEDED) +Latin: Vitrum edere possum; mihi non nocet. +Old French: Je puis mangier del voirre. Ne me nuit. +French: Je peux manger du verre, ça ne me fait pas mal. +Provençal / Occitan: Pòdi manjar de veire, me nafrariá pas. +Québécois: J'peux manger d'la vitre, ça m'fa pas mal. +Walloon: Dji pou magnî do vêre, çoula m' freut nén må. +Champenois: (NEEDED) +Lorrain: (NEEDED) +Picard: Ch'peux mingi du verre, cha m'foé mie n'ma. +Corsican/Corsu: (NEEDED) +Jèrriais: (NEEDED) +Kreyòl Ayisyen (Haitï): Mwen kap manje vè, li pa blese'm. +Basque: Kristala jan dezaket, ez dit minik ematen. +Catalan / Català: Puc menjar vidre, que no em fa mal. +Spanish: Puedo comer vidrio, no me hace daño. +Aragonés: Puedo minchar beire, no me'n fa mal. +Aranés: (NEEDED) +Mallorquín: (NEEDED) +Galician: Eu podo xantar cristais e non cortarme. +European Portuguese: Posso comer vidro, não me faz mal. +Brazilian Portuguese (8): Posso comer vidro, não me machuca. +Caboverdiano/Kabuverdianu (Cape Verde): M' podê cumê vidru, ca ta maguâ-m'. +Papiamentu: Ami por kome glas anto e no ta hasimi daño. +Italian: Posso mangiare il vetro e non mi fa male. +Milanese: Sôn bôn de magnà el véder, el me fa minga mal. +Roman: Me posso magna' er vetro, e nun me fa male. +Napoletano: M' pozz magna' o'vetr, e nun m' fa mal. +Venetian: Mi posso magnare el vetro, no'l me fa mae. +Zeneise (Genovese): Pòsso mangiâ o veddro e o no me fà mâ. +Sicilian: Puotsu mangiari u vitru, nun mi fa mali. +Campinadese (Sardinia): (NEEDED) +Lugudorese (Sardinia): (NEEDED) +Romansch (Grischun): Jau sai mangiar vaider, senza che quai fa donn a mai. +Romany / Tsigane: (NEEDED) +Romanian: Pot să mănânc sticlă și ea nu mă rănește. +Esperanto: Mi povas manĝi vitron, ĝi ne damaĝas min. +Pictish: (NEEDED) +Breton: (NEEDED) +Cornish: Mý a yl dybry gwéder hag éf ny wra ow ankenya. +Welsh: Dw i'n gallu bwyta gwydr, 'dyw e ddim yn gwneud dolur i mi. +Manx Gaelic: Foddym gee glonney agh cha jean eh gortaghey mee. +Old Irish (Ogham): ᚛᚛ᚉᚑᚅᚔᚉᚉᚔᚋ ᚔᚈᚔ ᚍᚂᚐᚅᚑ ᚅᚔᚋᚌᚓᚅᚐ᚜ +Old Irish (Latin): Con·iccim ithi nglano. Ním·géna. +Irish: Is féidir liom gloinne a ithe. Ní dhéanann sí dochar ar bith dom. +Ulster Gaelic: Ithim-sa gloine agus ní miste damh é. +Scottish Gaelic: S urrainn dhomh gloinne ithe; cha ghoirtich i mi. +Anglo-Saxon (Runes): ᛁᚳ᛫ᛗᚨᚷ᛫ᚷᛚᚨᛋ᛫ᛖᚩᛏᚪᚾ᛫ᚩᚾᛞ᛫ᚻᛁᛏ᛫ᚾᛖ᛫ᚻᛖᚪᚱᛗᛁᚪᚧ᛫ᛗᛖ᛬ +Anglo-Saxon (Latin): Ic mæg glæs eotan ond hit ne hearmiað me. +Middle English: Ich canne glas eten and hit hirtiþ me nouȝt. +English: I can eat glass and it doesn't hurt me. +English (IPA): [aɪ kæn iːt glɑːs ænd ɪt dɐz nɒt hɜːt miː] (Received Pronunciation) +English (Braille): ⠊⠀⠉⠁⠝⠀⠑⠁⠞⠀⠛⠇⠁⠎⠎⠀⠁⠝⠙⠀⠊⠞⠀⠙⠕⠑⠎⠝⠞⠀⠓⠥⠗⠞⠀⠍⠑ +Jamaican: Mi kian niam glas han i neba hot mi. +Lalland Scots / Doric: Ah can eat gless, it disnae hurt us. +Glaswegian: (NEEDED) +Gothic (4): 𐌼𐌰𐌲 𐌲𐌻𐌴𐍃 𐌹̈𐍄𐌰𐌽, 𐌽𐌹 𐌼𐌹𐍃 𐍅𐌿 𐌽𐌳𐌰𐌽 𐌱𐍂𐌹𐌲𐌲𐌹𐌸. +Old Norse (Runes): ᛖᚴ ᚷᛖᛏ ᛖᛏᛁ ᚧ ᚷᛚᛖᚱ ᛘᚾ ᚦᛖᛋᛋ ᚨᚧ ᚡᛖ ᚱᚧᚨ ᛋᚨᚱ +Old Norse (Latin): Ek get etið gler án þess að verða sár. +Norsk / Norwegian (Nynorsk): Eg kan eta glas utan å skada meg. +Norsk / Norwegian (Bokmål): Jeg kan spise glass uten å skade meg. +Føroyskt / Faroese: Eg kann eta glas, skaðaleysur. +Íslenska / Icelandic: Ég get etið gler án þess að meiða mig. +Svenska / Swedish: Jag kan äta glas utan att skada mig. +Dansk / Danish: Jeg kan spise glas, det gør ikke ondt på mig. +Sønderjysk: Æ ka æe glass uhen at det go mæ naue. +Frysk / Frisian: Ik kin glês ite, it docht me net sear. +Nederlands / Dutch: Ik kan glas eten, het doet mij geen kwaad. +Kirchröadsj/Bôchesserplat: Iech ken glaas èèse, mer 't deet miech jing pieng. +Afrikaans: Ek kan glas eet, maar dit doen my nie skade nie. +Lëtzebuergescht / Luxemburgish: Ech kan Glas iessen, daat deet mir nët wei. +Deutsch / German: Ich kann Glas essen, ohne mir zu schaden. +Ruhrdeutsch: Ich kann Glas verkasematuckeln, ohne dattet mich wat jucken tut. +Langenfelder Platt: Isch kann Jlaas kimmeln, uuhne datt mich datt weh dääd. +Lausitzer Mundart ('Lusatian'): Ich koann Gloos assn und doas dudd merr ni wii. +Odenwälderisch: Iech konn glaasch voschbachteln ohne dass es mir ebbs daun doun dud. +Sächsisch / Saxon: 'sch kann Glos essn, ohne dass'sch mer wehtue. +Pfälzisch: Isch konn Glass fresse ohne dasses mer ebbes ausmache dud. +Schwäbisch / Swabian: I kå Glas frässa, ond des macht mr nix! +Deutsch (Voralberg): I ka glas eassa, ohne dass mar weh tuat. +Bayrisch / Bavarian: I koh Glos esa, und es duard ma ned wei. +Allemannisch: I kaun Gloos essen, es tuat ma ned weh. +Schwyzerdütsch (Zürich): Ich chan Glaas ässe, das schadt mir nöd. +Schwyzerdütsch (Luzern): Ech cha Glâs ässe, das schadt mer ned. +Plautdietsch: (NEEDED) +Hungarian: Meg tudom enni az üveget, nem lesz tőle bajom. +Suomi / Finnish: Voin syödä lasia, se ei vahingoita minua. +Sami (Northern): Sáhtán borrat lása, dat ii leat bávččas. +Erzian: Мон ярсан суликадо, ды зыян эйстэнзэ а ули. +Northern Karelian: Mie voin syvvä lasie ta minla ei ole kipie. +Southern Karelian: Minä voin syvvä st'oklua dai minule ei ole kibie. +Vepsian: (NEEDED) +Votian: (NEEDED) +Livonian: (NEEDED) +Estonian: Ma võin klaasi süüa, see ei tee mulle midagi. +Latvian: Es varu ēst stiklu, tas man nekaitē. +Lithuanian: Aš galiu valgyti stiklą ir jis manęs nežeidžia +Old Prussian: (NEEDED) +Sorbian (Wendish): (NEEDED) +Czech: Mohu jíst sklo, neublíží mi. +Slovak: Môžem jesť sklo. Nezraní ma. +Polska / Polish: Mogę jeść szkło i mi nie szkodzi. +Slovenian: Lahko jem steklo, ne da bi mi škodovalo. +Croatian: Ja mogu jesti staklo i ne boli me. +Serbian (Latin): Ja mogu da jedem staklo. +Serbian (Cyrillic): Ја могу да једем стакло. +Macedonian: Можам да јадам стакло, а не ме штета. +Russian: Я могу есть стекло, оно мне не вредит. +Belarusian (Cyrillic): Я магу есці шкло, яно мне не шкодзіць. +Belarusian (Lacinka): Ja mahu jeści škło, jano mne ne škodzić. +Ukrainian: Я можу їсти скло, і воно мені не зашкодить. +Bulgarian: Мога да ям стъкло, то не ми вреди. +Georgian: მინას ვჭამ და არა მტკივა. +Armenian: Կրնամ ապակի ուտել և ինծի անհանգիստ չըներ։ +Albanian: Unë mund të ha qelq dhe nuk më gjen gjë. +Turkish: Cam yiyebilirim, bana zararı dokunmaz. +Turkish (Ottoman): جام ييه بلورم بڭا ضررى طوقونمز +Bangla / Bengali: আমি কাঁচ খেতে পারি, তাতে আমার কোনো ক্ষতি হয় না। +Marathi: मी काच खाऊ शकतो, मला ते दुखत नाही. +Kannada: ನನಗೆ ಹಾನಿ ಆಗದೆ, ನಾನು ಗಜನ್ನು ತಿನಬಹುದು +Hindi: मैं काँच खा सकता हूँ और मुझे उससे कोई चोट नहीं पहुंचती. +Tamil: நான் கண்ணாடி சாப்பிடுவேன், அதனால் எனக்கு ஒரு கேடும் வராது. +Telugu: నేను గాజు తినగలను మరియు అలా చేసినా నాకు ఏమి ఇబ్బంది లేదు +Sinhalese: මට වීදුරු කෑමට හැකියි. එයින් මට කිසි හානියක් සිදු නොවේ. +Urdu(3): میں کانچ کھا سکتا ہوں اور مجھے تکلیف نہیں ہوتی ۔ +Pashto(3): زه شيشه خوړلې شم، هغه ما نه خوږوي +Farsi / Persian(3): .من می توانم بدونِ احساس درد شيشه بخورم +Arabic(3): أنا قادر على أكل الزجاج و هذا لا يؤلمني. +Aramaic: (NEEDED) +Maltese: Nista' niekol il-ħġieġ u ma jagħmilli xejn. +Hebrew(3): אני יכול לאכול זכוכית וזה לא מזיק לי. +Yiddish(3): איך קען עסן גלאָז און עס טוט מיר נישט װײ. +Judeo-Arabic: (NEEDED) +Ladino: (NEEDED) +Gǝʼǝz: (NEEDED) +Amharic: (NEEDED) +Twi: Metumi awe tumpan, ɜnyɜ me hwee. +Hausa (Latin): Inā iya taunar gilāshi kuma in gamā lāfiyā. +Hausa (Ajami) (2): إِنا إِىَ تَونَر غِلَاشِ كُمَ إِن غَمَا لَافِىَا +Yoruba(4): Mo lè je̩ dígí, kò ní pa mí lára. +Lingala: Nakokí kolíya biténi bya milungi, ekosála ngáí mabé tɛ́. +(Ki)Swahili: Naweza kula bilauri na sikunyui. +Malay: Saya boleh makan kaca dan ia tidak mencederakan saya. +Tagalog: Kaya kong kumain nang bubog at hindi ako masaktan. +Chamorro: Siña yo' chumocho krestat, ti ha na'lalamen yo'. +Fijian: Au rawa ni kana iloilo, ia au sega ni vakacacani kina. +Javanese: Aku isa mangan beling tanpa lara. +Burmese: က္ယ္ဝန္‌တော္‌၊က္ယ္ဝန္‌မ မ္ယက္‌စားနုိင္‌သည္‌။ ၎က္ရောင္‌့ ထိခုိက္‌မ္ဟု မရ္ဟိပာ။ (9) +Vietnamese (quốc ngữ): Tôi có thể ăn thủy tinh mà không hại gì. +Vietnamese (nôm) (4): 些 𣎏 世 咹 水 晶 𦓡 空 𣎏 害 咦 +Khmer: ខ្ញុំអាចញុំកញ្ចក់បាន ដោយគ្មានបញ្ហារ +Lao: ຂອ້ຍກິນແກ້ວໄດ້ໂດຍທີ່ມັນບໍ່ໄດ້ເຮັດໃຫ້ຂອ້ຍເຈັບ. +Thai: ฉันกินกระจกได้ แต่มันไม่ทำให้ฉันเจ็บ +Mongolian (Cyrillic): Би шил идэй чадна, надад хортой биш +Mongolian (Classic) (5): ᠪᠢ ᠰᠢᠯᠢ ᠢᠳᠡᠶᠦ ᠴᠢᠳᠠᠨᠠ ᠂ ᠨᠠᠳᠤᠷ ᠬᠣᠤᠷᠠᠳᠠᠢ ᠪᠢᠰᠢ +Dzongkha: (NEEDED) +Nepali: म काँच खान सक्छू र मलाई केहि नी हुन्‍न् । +Tibetan: ཤེལ་སྒོ་ཟ་ནས་ང་ན་གི་མ་རེད། +Chinese: 我能吞下玻璃而不伤身体。 +Chinese (Traditional): 我能吞下玻璃而不傷身體。 +Taiwanese(6): Góa ē-tàng chia̍h po-lê, mā bē tio̍h-siong. +Japanese: 私はガラスを食べられます。それは私を傷つけません。 +Korean: 나는 유리를 먹을 수 있어요. 그래도 아프지 않아요 +Bislama: Mi save kakae glas, hemi no save katem mi. +Hawaiian: Hiki iaʻu ke ʻai i ke aniani; ʻaʻole nō lā au e ʻeha. +Marquesan: E koʻana e kai i te karahi, mea ʻā, ʻaʻe hauhau. +Inuktitut (10): ᐊᓕᒍᖅ ᓂᕆᔭᕌᖓᒃᑯ ᓱᕋᙱᑦᑐᓐᓇᖅᑐᖓ +Chinook Jargon: Naika məkmək kakshət labutay, pi weyk ukuk munk-sik nay. +Navajo: Tsésǫʼ yishą́ągo bííníshghah dóó doo shił neezgai da. +Cherokee (and Cree, Chickasaw, Cree, Micmac, Ojibwa, Lakota, Náhuatl, Quechua, Aymara, and other American languages): (NEEDED) +Garifuna: (NEEDED) +Gullah: (NEEDED) +Lojban: mi kakne le nu citka le blaci .iku'i le se go'i na xrani mi +Nórdicg: Ljœr ye caudran créneþ ý jor cẃran. \ No newline at end of file diff --git a/tests/fixtures/urlify/input/strings-1.txt b/tests/fixtures/urlify/input/strings-1.txt new file mode 100644 index 0000000..a7fdf73 --- /dev/null +++ b/tests/fixtures/urlify/input/strings-1.txt @@ -0,0 +1,20 @@ +The price is 5.99€ +The price is £5.99 +The price is $5.99 +The price is ¥5990 +5.99€ = 5 euros & 99 cents +£5.99 = 5 pounds & 99 pence +$5.99 = 5 dollars & 99 cents +¥5990 = 5990 yen +5.99€ is higher than $5.99 +lorem@ipsum.com +lorem@ipsum +lorem & ipsum +lorem&ipsum at ipsum.com +my username is @lorem +The review gave 5* to the place +10% of 10* is 1* +use lorem@ipsum.com to get a 10% discount +E = mc^2 +The gravity of Earth is 9.81 m/s2 +The #3 comment at @lorem = 10% of *&* \ No newline at end of file diff --git a/tests/fixtures/urlify/input/strings-2.txt b/tests/fixtures/urlify/input/strings-2.txt new file mode 100644 index 0000000..75eacd9 --- /dev/null +++ b/tests/fixtures/urlify/input/strings-2.txt @@ -0,0 +1,15 @@ +ذرزسشصضطظعغػؼؽؾؿ 5.99€ +ذرزسشصضطظعغػؼؽؾؿ £5.99 +׆אבגדהוזחטיךכלםמן $5.99 +日一国会人年大十二本中長出三同 ¥5990 +5.99€ 日一国会人年大十 $5.99 +בגדה@ضطظعغػ.com +年大十@ضطظعغػ +בגדה & 年大十 +国&ם at ضطظعغػ.הוז +my username is @בגדה +The review gave 5* to ظعغػ +use 年大十@ضطظعغػ.הוז to get a 10% discount +日 = הط^2 +ךכלם 国会 غػؼؽ 9.81 m/s2 +The #会 comment at @בגדה = 10% of *&* \ No newline at end of file diff --git a/tests/fixtures/urlify/input/strings-3.txt b/tests/fixtures/urlify/input/strings-3.txt new file mode 100644 index 0000000..75eacd9 --- /dev/null +++ b/tests/fixtures/urlify/input/strings-3.txt @@ -0,0 +1,15 @@ +ذرزسشصضطظعغػؼؽؾؿ 5.99€ +ذرزسشصضطظعغػؼؽؾؿ £5.99 +׆אבגדהוזחטיךכלםמן $5.99 +日一国会人年大十二本中長出三同 ¥5990 +5.99€ 日一国会人年大十 $5.99 +בגדה@ضطظعغػ.com +年大十@ضطظعغػ +בגדה & 年大十 +国&ם at ضطظعغػ.הוז +my username is @בגדה +The review gave 5* to ظعغػ +use 年大十@ضطظعغػ.הוז to get a 10% discount +日 = הط^2 +ךכלם 国会 غػؼؽ 9.81 m/s2 +The #会 comment at @בגדה = 10% of *&* \ No newline at end of file diff --git a/tests/profile.php b/tests/profile.php new file mode 100644 index 0000000..865f745 --- /dev/null +++ b/tests/profile.php @@ -0,0 +1,8 @@ +testing
öäü'; + $str_new = URLify::filter($str); +} From d1183d9ae92396de1f944d9d0a991067f1bc415f Mon Sep 17 00:00:00 2001 From: Lars Moelleken Date: Fri, 13 Dec 2019 19:59:18 +0100 Subject: [PATCH 3/3] [+]: cherry-pick performance improvements --- .travis.yml | 2 +- URLify.php | 208 +++++++++++------- composer.json | 2 +- phpcs.php_cs | 2 +- phpstan.neon | 2 +- .../urlify/expected/sample-utf-8-bom.txt | 2 +- tests/profile.php | 5 +- 7 files changed, 135 insertions(+), 88 deletions(-) diff --git a/.travis.yml b/.travis.yml index 5470dc5..954e37b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,7 +9,7 @@ php: before_script: - composer install --no-interaction --prefer-source - - if [ "$(phpenv version-name)" == 7.3 ]; then travis_retry composer require phpstan/phpstan-shim; fi + - if [ "$(phpenv version-name)" == 7.3 ]; then travis_retry composer require phpstan/phpstan; fi script: - vendor/bin/phpunit --coverage-text --verbose diff --git a/URLify.php b/URLify.php index e1358cf..db3fad6 100644 --- a/URLify.php +++ b/URLify.php @@ -18,21 +18,21 @@ class URLify * * ISO 639-1 codes: https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes * - * @var array + * @var array[] */ public static $maps = []; /** * List of words to remove from URLs. * - * @var array + * @var array[] */ public static $remove_list = []; /** * An array of strings that will convert into the separator-char - used by "URLify::filter()". * - * @var array + * @var string[] */ private static $arrayToSeparator = []; @@ -41,6 +41,10 @@ class URLify * * @param array $array

An array of things that should replaced by the separator.

* @param bool $merge

Keep the previous (default) array-to-separator array.

+ * + * @return void + * + * @psalm-param string[] $array */ public static function add_array_to_separator(array $array, bool $merge = true) { @@ -61,6 +65,10 @@ public static function add_array_to_separator(array $array, bool $merge = true) * * @param array $map * @param string|null $language + * + * @return void + * + * @psalm-param array $map */ public static function add_chars(array $map, string $language = null) { @@ -86,9 +94,9 @@ public static function reset_chars() * $language specifies a priority for a specific language. * The latter is useful if languages have different rules for the same character. * - * @param string $string

The input string.

- * @param string $language

Your primary language.

- * @param string $unknown

Character use if character unknown. (default is ?).

+ * @param string $string

The input string.

+ * @param string $language

Your primary language.

+ * @param string $unknown

Character use if character unknown. (default is ?).

* * @return string */ @@ -105,20 +113,18 @@ public static function downcode( } } - $langSpecific = \voku\helper\ASCII::charsArrayWithOneLanguage($language, true); - if (!empty($langSpecific)) { - $string = \str_replace( - $langSpecific['orig'], - $langSpecific['replace'], - $string - ); - } - - foreach (\voku\helper\ASCII::charsArrayWithMultiLanguageValues(true) as $replace => $orig) { - $string = \str_replace($orig, $replace, $string); - } + $string = \voku\helper\ASCII::to_ascii( + $string, + $language, + false, + true + ); - return \voku\helper\ASCII::to_transliterate($string, $unknown, false); + return \voku\helper\ASCII::to_transliterate( + $string, + $unknown, + false + ); } /** @@ -126,20 +132,20 @@ public static function downcode( * * e.g.: "Petty
theft" to "Petty-theft" * - * @param string $string

The text you want to convert.

- * @param int $maxLength

Max. length of the output string, set to "0" (zero) to - * disable it

- * @param string $language

The language you want to convert to.

- * @param bool $fileName

- * Keep the "." from the extension e.g.: "imaäe.jpg" => - * "image.jpg" - *

- * @param bool $removeWords

- * Remove some "words" from the string.
- * Info: Set extra words via remove_words(). - *

- * @param bool $strToLower

Use strtolower() at the end.

- * @param bool|string $separator

Define a new separator for the words.

+ * @param string $string

The text you want to convert.

+ * @param int $maxLength

Max. length of the output string, set to "0" (zero) to + * disable it

+ * @param string $language

The language you want to convert to.

+ * @param bool $fileName

+ * Keep the "." from the extension e.g.: "imaäe.jpg" => + * "image.jpg" + *

+ * @param bool $removeWords

+ * Remove some "words" from the string.
+ * Info: Set extra words via remove_words(). + *

+ * @param bool $strToLower

Use strtolower() at the end.

+ * @param bool|string $separator

Define a new separator for the words.

* * @return string */ @@ -178,22 +184,29 @@ public static function filter( } // remove apostrophes which are not used as quotes around a string - $stringTmp = \preg_replace("/(\w)'(\w)/u", '${1}${2}', $string); - if ($stringTmp !== null) { - $string = (string) $stringTmp; + if (\strpos($string, "'") !== false) { + $stringTmp = \preg_replace("/(\w)'(\w)/u", '${1}${2}', $string); + if ($stringTmp !== null) { + $string = (string) $stringTmp; + } } // replace with $separator - // + - // remove all other html-tags - $string = \strip_tags( - (string) \preg_replace( - self::$arrayToSeparator, - $separator, - $string - ) + $string = (string) \preg_replace( + self::$arrayToSeparator, + $separator, + $string ); + // remove all other html-tags + if ( + \strpos($string, '<') !== false + || + \strpos($string, '>') !== false + ) { + $string = \strip_tags($string); + } + // use special language replacer $string = self::downcode($string, $language); @@ -222,16 +235,16 @@ public static function filter( $string = (string) \preg_replace( [ - '/[^' . $separatorEscaped . $removePatternAddOn . '\-a-zA-Z0-9\s]/u', // 1) remove un-needed chars - '/[\s]+/u', + '/[^' . $separatorEscaped . $removePatternAddOn . '\-a-zA-Z0-9\s]/u', // 2) convert spaces to $separator - $removeWordsSearch, + '/[\s]+/u', // 3) remove some extras words - '/[' . ($separatorEscaped ?: ' ') . ']+/u', + $removeWordsSearch, // 4) remove double $separator's - '/[' . ($separatorEscaped ?: ' ') . ']+$/u', + '/[' . ($separatorEscaped ?: ' ') . ']+/u', // 5) remove $separator at the end + '/[' . ($separatorEscaped ?: ' ') . ']+$/u', ], [ '', @@ -268,7 +281,9 @@ public static function filter( * * @param string|string[] $words * @param string $language - * @param bool $merge

Keep the previous (default) remove-words array.

+ * @param bool $merge

Keep the previous (default) remove-words array.

+ * + * @return void */ public static function remove_words($words, string $language = 'en', bool $merge = true) { @@ -276,7 +291,6 @@ public static function remove_words($words, string $language = 'en', bool $merge $words = [$words]; } - /** @noinspection ForeachSourceInspection */ foreach ($words as $removeWordKey => $removeWord) { $words[$removeWordKey] = \preg_quote($removeWord, '/'); } @@ -295,6 +309,8 @@ public static function remove_words($words, string $language = 'en', bool $merge /** * Reset the internal "self::$arrayToSeparator" to the default values. + * + * @return void */ public static function reset_array_to_separator() { @@ -305,35 +321,12 @@ public static function reset_array_to_separator() ]; } - /** - * @param string $language - * - * @return string - */ - private static function get_language_for_reset_remove_list(string $language): string - { - if ($language === '') { - return ''; - } - - if ( - \strpos($language, '_') === false - && - \strpos($language, '-') === false - ) { - $language = \strtolower($language); - } else { - $regex = '/(?[a-z]{2}).*/i'; - $language = \strtolower((string) \preg_replace($regex, '$1', $language)); - } - - return $language; - } - /** * reset the word-remove-array * * @param string $language + * + * @return void */ public static function reset_remove_list(string $language = 'en') { @@ -377,7 +370,7 @@ public static function transliterate(string $string, string $language = 'en'): s * project, distributed under the Eclipse Public License. * Copyright 2012 Pelle Braendgaard * - * @param string $string The string to expand + * @param string $string The string to expand * @param string $language * * @return string The result of expanding the string @@ -389,6 +382,31 @@ protected static function expandString(string $string, string $language = 'en'): return self::expandSymbols($string, $language); } + /** + * @param string $language + * + * @return string + */ + private static function get_language_for_reset_remove_list(string $language) + { + if ($language === '') { + return ''; + } + + if ( + \strpos($language, '_') === false + && + \strpos($language, '-') === false + ) { + $language = \strtolower($language); + } else { + $regex = '/(?[a-z]{2}).*/i'; + $language = \strtolower((string) \preg_replace($regex, '$1', $language)); + } + + return $language; + } + /** * Expands the numeric currencies in euros, dollars, pounds * and yens that the given string may include. @@ -398,8 +416,20 @@ protected static function expandString(string $string, string $language = 'en'): * * @return string */ - private static function expandCurrencies(string $string, string $language = 'en'): string + private static function expandCurrencies(string $string, string $language = 'en') { + if ( + \strpos($string, '€') === false + && + \strpos($string, '$') === false + && + \strpos($string, '£') === false + && + \strpos($string, '¥') === false + ) { + return $string; + } + if ($language === 'de') { return (string) \preg_replace( [ @@ -467,8 +497,24 @@ private static function expandCurrencies(string $string, string $language = 'en' * * @return string */ - private static function expandSymbols(string $string, string $language = 'en'): string + private static function expandSymbols(string $string, string $language = 'en') { + if ( + \strpos($string, '©') === false + && + \strpos($string, '®') === false + && + \strpos($string, '@') === false + && + \strpos($string, '&') === false + && + \strpos($string, '%') === false + && + \strpos($string, '=') === false + ) { + return $string; + } + $maps = \voku\helper\ASCII::charsArray(true); return (string) \preg_replace( @@ -497,9 +543,9 @@ private static function expandSymbols(string $string, string $language = 'en'): * * @param string $language * - * @return array + * @return array */ - private static function get_remove_list(string $language = 'en'): array + private static function get_remove_list(string $language = 'en') { // check for language if ($language === '') { diff --git a/composer.json b/composer.json index 3fcfaa1..49fc259 100644 --- a/composer.json +++ b/composer.json @@ -14,7 +14,7 @@ ], "require": { "php": ">=7.0.0", - "voku/portable-ascii": "^1.3", + "voku/portable-ascii": "^1.4", "voku/stop-words": "^2.0" }, "require-dev": { diff --git a/phpcs.php_cs b/phpcs.php_cs index 640d7ac..e702185 100644 --- a/phpcs.php_cs +++ b/phpcs.php_cs @@ -131,7 +131,7 @@ return PhpCsFixer\Config::create() 'no_unset_on_property' => true, 'no_unused_imports' => true, 'no_useless_else' => true, - 'no_useless_return' => true, + 'no_useless_return' => false, // allow void 'no_whitespace_before_comma_in_array' => true, 'no_whitespace_in_blank_line' => true, 'non_printable_character' => true, diff --git a/phpstan.neon b/phpstan.neon index 2f44329..bad6607 100644 --- a/phpstan.neon +++ b/phpstan.neon @@ -1,5 +1,5 @@ parameters: - level: 7 + level: max paths: - %currentWorkingDirectory%/ excludes_analyse: diff --git a/tests/fixtures/urlify/expected/sample-utf-8-bom.txt b/tests/fixtures/urlify/expected/sample-utf-8-bom.txt index 4a4dc71..4c0fbc3 100644 --- a/tests/fixtures/urlify/expected/sample-utf-8-bom.txt +++ b/tests/fixtures/urlify/expected/sample-utf-8-bom.txt @@ -126,7 +126,7 @@ kannada-nnge-haani-aagde-naanu-gjnnu-tinbhudu hindi-maain-kaaanca-khaaa-sakataaa-hauun-oura-maujhae-usasae-kaoii-caota-nahaiin-pahauncataii tamil-naannn-knnnnaatti-caappittuveennn-atnnnaal-ennnkku-oru-keettum-vraatu telugu-neenu-gaaju-tinglnu-mriyu-alaa-ceesinaa-naaku-eemi-ibbndi-leedu -sinhalese-mtt-viidu-rupee-kaaemtt-haekiyi-eyin-mtt-kisi-haaniyk-sidu-novee +sinhalese-mtt-viiduru-kaaemtt-haekiyi-eyin-mtt-kisi-haaniyk-sidu-novee urdu3-myn-kanch-kha-skta-hon-aor-mghy-tklyf-nhyn-hoty pashto3-zh-shyshh-khorl-shm-hghh-ma-nh-khoroy farsi-persian3-mn-my-toanm-bdoni-ahsas-drd-shyshh-bkhorm diff --git a/tests/profile.php b/tests/profile.php index 865f745..cbb4374 100644 --- a/tests/profile.php +++ b/tests/profile.php @@ -1,8 +1,9 @@ testing
öäü'; $str_new = URLify::filter($str); } +echo 'time: ' . (\microtime(true) - $start);