Skip to content
This repository has been archived by the owner on May 19, 2020. It is now read-only.

Commit

Permalink
Merge pull request theiconic#33 from theiconic/feature/handle-combine…
Browse files Browse the repository at this point in the history
…d-initials

Handle combined initials
  • Loading branch information
wyrfel authored Nov 14, 2019
2 parents 1961d10 + 94f2b4c commit 9a54a71
Show file tree
Hide file tree
Showing 5 changed files with 127 additions and 19 deletions.
20 changes: 20 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,26 @@ This defaults to half the amount of words in the input string,
meaning that effectively the salutation may occur within
the first half of the name parts.

### Adjusting combined initials support
```php
$parser = new TheIconic\NameParser\Parser();
$parser->setMaxCombinedInitials(3);
```
Combined initials are combinations of several
uppercased letters, e.g. `DJ` or `J.T.` without
separating spaces. The parser will treat such sequences
of uppercase letters (with optional dots) as combined
initials and parse them into individual initials.
This value adjusts the maximum number of uppercase letters
in a single name part are recognised as comnined initials.
Parts with more than the specified maximum amount of letters
will not be parsed into initials and hence will most likely
be parsed into first or middle names.

The default value is 2.

To disable combined initials support, set this value to 1;

## Tips
### Provide clean input strings
If your input string consists of more than just the name and
Expand Down
20 changes: 18 additions & 2 deletions src/Mapper/InitialMapper.php
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,12 @@ class InitialMapper extends AbstractMapper
{
protected $matchLastPart = false;

public function __construct(bool $matchLastPart = false)
private $combinedMax = 2;

public function __construct(int $combinedMax = 2, bool $matchLastPart = false)
{
$this->matchLastPart = $matchLastPart;
$this->combinedMax = $combinedMax;
}

/**
Expand All @@ -27,7 +30,9 @@ public function map(array $parts): array
{
$last = count($parts) - 1;

foreach ($parts as $k => $part) {
for ($k = 0; $k < count($parts); $k++) {
$part = $parts[$k];

if ($part instanceof AbstractPart) {
continue;
}
Expand All @@ -36,6 +41,17 @@ public function map(array $parts): array
continue;
}

if (strtoupper($part) === $part) {
$stripped = str_replace('.', '', $part);
$length = strlen($stripped);

if (1 < $length && $length <= $this->combinedMax) {
array_splice($parts, $k, 1, str_split($stripped));
$last = count($parts) - 1;
$part = $parts[$k];
}
}

if ($this->isInitial($part)) {
$parts[$k] = new Initial($part);
}
Expand Down
28 changes: 26 additions & 2 deletions src/Parser.php
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@ class Parser
*/
protected $maxSalutationIndex = 0;

/**
* @var int
*/
protected $maxCombinedInitials = 2;

public function __construct(array $languages = [])
{
if (empty($languages)) {
Expand Down Expand Up @@ -125,7 +130,7 @@ protected function getSecondSegmentParser(): Parser
new SalutationMapper($this->getSalutations(), $this->getMaxSalutationIndex()),
new SuffixMapper($this->getSuffixes(), true, 1),
new NicknameMapper($this->getNicknameDelimiters()),
new InitialMapper(true),
new InitialMapper($this->getMaxCombinedInitials(), true),
new FirstnameMapper(),
new MiddlenameMapper(true),
]);
Expand Down Expand Up @@ -156,7 +161,7 @@ public function getMappers(): array
new NicknameMapper($this->getNicknameDelimiters()),
new SalutationMapper($this->getSalutations(), $this->getMaxSalutationIndex()),
new SuffixMapper($this->getSuffixes()),
new InitialMapper(),
new InitialMapper($this->getMaxCombinedInitials()),
new LastnameMapper($this->getPrefixes()),
new FirstnameMapper(),
new MiddlenameMapper(),
Expand Down Expand Up @@ -299,4 +304,23 @@ public function setMaxSalutationIndex(int $maxSalutationIndex): Parser

return $this;
}

/**
* @return int
*/
public function getMaxCombinedInitials(): int
{
return $this->maxCombinedInitials;
}

/**
* @param int $maxCombinedInitials
* @return Parser
*/
public function setMaxCombinedInitials(int $maxCombinedInitials): Parser
{
$this->maxCombinedInitials = $maxCombinedInitials;

return $this;
}
}
29 changes: 27 additions & 2 deletions tests/Mapper/InitialMapperTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -71,14 +71,39 @@ public function provider()
new Initial('B'),
],
'arguments' => [
2,
true
],
],
[
'input' => [
'JM',
'Walker',
],
'expectation' => [
new Initial('J'),
new Initial('M'),
'Walker'
]
],
[
'input' => [
'JM',
'Walker',
],
'expectation' => [
'JM',
'Walker'
],
'arguments' => [
1
]
]
];
}

protected function getMapper($matchLastPart = false)
protected function getMapper($maxCombined = 2, $matchLastPart = false)
{
return new InitialMapper($matchLastPart);
return new InitialMapper($maxCombined, $matchLastPart);
}
}
49 changes: 36 additions & 13 deletions tests/ParserTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,8 @@ public function provider()
[
'J.B. Hunt',
[
'firstname' => 'J.B.',
'firstname' => 'J',
'initials' => 'B',
'lastname' => 'Hunt',
]
],
Expand Down Expand Up @@ -534,24 +535,31 @@ public function provider()
'firstname' => 'Etje',
'lastname' => 'Heijdanus-De Boer',
]
]
];
}

/**
* @return array
*/
public function dysfunctionalFirstnameProvider()
{
return [
// fails. both initials should be capitalized
],
[
'JB Hunt',
[
'firstname' => 'JB',
'firstname' => 'J',
'initials' => 'B',
'lastname' => 'Hunt',
]
],
[
'Charles Philip Arthur George Mountbatten-Windsor',
[
'firstname' => 'Charles',
'middlename' => 'Philip Arthur George',
'lastname' => 'Mountbatten-Windsor',
]
],
[
'Ella Marija Lani Yelich-O\'Connor',
[
'firstname' => 'Ella',
'middlename' => 'Marija Lani',
'lastname' => 'Yelich-O\'Connor',
]
]
];
}

Expand Down Expand Up @@ -605,6 +613,21 @@ public function testSetMaxSalutationIndex()
$this->assertSame('Mr.', $parser->parse('Francis Mr')->getSalutation());
}

public function testSetMaxCombinedInitials()
{
$parser = new Parser();
$this->assertSame(2, $parser->getMaxCombinedInitials());
$parser->setMaxCombinedInitials(1);
$this->assertSame(1, $parser->getMaxCombinedInitials());
$this->assertSame('', $parser->parse('DJ Westbam')->getInitials());

$parser = new Parser();
$this->assertSame(2, $parser->getMaxCombinedInitials());
$parser->setMaxCombinedInitials(3);
$this->assertSame(3, $parser->getMaxCombinedInitials());
$this->assertSame('P A G', $parser->parse('Charles PAG Mountbatten-Windsor')->getInitials());
}

public function testParserAndSubparsersProperlyHandleLanguages()
{
$parser = new Parser([
Expand Down

0 comments on commit 9a54a71

Please sign in to comment.