Skip to content
This repository has been archived by the owner on Jul 25, 2022. It is now read-only.

Commit

Permalink
Merge pull request #9 from true/encoding
Browse files Browse the repository at this point in the history
Character encoding
  • Loading branch information
renan committed Mar 12, 2015
2 parents 7d6bfbb + 92db63d commit 3e2d539
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 28 deletions.
15 changes: 2 additions & 13 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,6 @@ composer require true/punycode:~1.0
// Import Punycode
use True\Punycode;

// Use UTF-8 as the encoding
mb_internal_encoding('utf-8');

$Punycode = new Punycode();
var_dump($Punycode->encode('renangonçalves.com'));
// outputs: xn--renangonalves-pgb.com
Expand All @@ -38,18 +35,10 @@ var_dump($Punycode->decode('xn--renangonalves-pgb.com'));

### 1. What is this library for?

This library converts a UTF-8 encoded domain name to a IDNA ASCII form and vice-versa.


### 2. Do I need to use UTF-8?

Yes, domain names should be UTF-8 encoded.

Unless your application is not focused on international users, you should have been using a Unicode charset already.
Take your time to read [The Absolute Minimum Every Software Developer Must Know About Unicode](http://www.joelonsoftware.com/articles/Unicode.html).
This library converts a Unicode encoded domain name to a IDNA ASCII form and vice-versa.


### 3. Why should I use this instead of [PHP's IDN Functions](http://php.net/manual/en/ref.intl.idn.php)?
### 2. Why should I use this instead of [PHP's IDN Functions](http://php.net/manual/en/ref.intl.idn.php)?

If you can compile the needed dependencies (intl, libidn) there is not much difference.
But if you want to write portable code between hosts (including Windows and Mac OS), or can't install PECL extensions, this is the right library for you.
25 changes: 21 additions & 4 deletions src/Punycode.php
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,23 @@ class Punycode
'4' => 30, '5' => 31, '6' => 32, '7' => 33, '8' => 34, '9' => 35
);

/**
* Character encoding
*
* @param string
*/
protected $encoding;

/**
* Constructor
*
* @param string $encoding Character encoding
*/
public function __construct($encoding = 'UTF-8')
{
$this->encoding = $encoding;
}

/**
* Encode a domain to its Punycode version
*
Expand Down Expand Up @@ -94,7 +111,7 @@ protected function _encodePart($input)
sort($codePoints['nonBasic']);

$i = 0;
$length = mb_strlen($input);
$length = mb_strlen($input, $this->encoding);
while ($h < $length) {
$m = $codePoints['nonBasic'][$i++];
$delta = $delta + ($m - $n) * ($h + 1);
Expand Down Expand Up @@ -194,7 +211,7 @@ protected function _decodePart($input)
$bias = $this->_adapt($i - $oldi, ++$outputLength, ($oldi === 0));
$n = $n + (int) ($i / $outputLength);
$i = $i % ($outputLength);
$output = mb_substr($output, 0, $i) . $this->_codePointToChar($n) . mb_substr($output, $i, $outputLength - 1);
$output = mb_substr($output, 0, $i, $this->encoding) . $this->_codePointToChar($n) . mb_substr($output, $i, $outputLength - 1, $this->encoding);

$i++;
}
Expand Down Expand Up @@ -260,9 +277,9 @@ protected function _codePoints($input)
'nonBasic' => array(),
);

$length = mb_strlen($input);
$length = mb_strlen($input, $this->encoding);
for ($i = 0; $i < $length; $i++) {
$char = mb_substr($input, $i, 1);
$char = mb_substr($input, $i, 1, $this->encoding);
$code = $this->_charToCodePoint($char);
if ($code < 128) {
$codePoints['all'][] = $codePoints['basic'][] = $code;
Expand Down
15 changes: 4 additions & 11 deletions tests/PunycodeTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,6 @@
class PunycodeTest extends \PHPUnit_Framework_TestCase
{

/**
* Make sure the right internal encoding is defined when testing
*
*/
public function setUp()
{
parent::setUp();

mb_internal_encoding('utf-8');
}

/**
* Test encoding Punycode
*
Expand Down Expand Up @@ -125,6 +114,10 @@ public function domainNamesProvider()
'guangdong.广东',
'guangdong.xn--xhq521b',
),
array(
'gwóźdź.pl',
'xn--gwd-hna98db.pl',
),
);
}
}

0 comments on commit 3e2d539

Please sign in to comment.