diff --git a/CHANGELOG.md b/CHANGELOG.md index ded651b..6acfb9c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,14 @@ Changelog -------- +## 3.1.1 - 2024-10-18 + +### Fixed + +* [Tokenizer] Correct a state bug that made it impossible to tokenize/parse multiple messages ([#14](https://github.com/estratocloud/edifact/issues/14)). + +-------- + ## 3.1.0 - 2024-01-19 ### Added diff --git a/src/Tokenizer.php b/src/Tokenizer.php index da8f54b..9164e99 100644 --- a/src/Tokenizer.php +++ b/src/Tokenizer.php @@ -57,9 +57,11 @@ final class Tokenizer implements TokenizerInterface public function getTokens(string $message, ControlCharactersInterface $characters): array { $this->message = $message; + $this->position = 0; $this->characters = $characters; $this->char = ""; $this->string = ""; + $this->isEscaped = false; $this->readNextChar(); diff --git a/tests/TokenizerTest.php b/tests/TokenizerTest.php index 2ce5fe0..b6eaa83 100644 --- a/tests/TokenizerTest.php +++ b/tests/TokenizerTest.php @@ -47,6 +47,30 @@ public function testBasic(): void } + /** + * Regression test for https://github.com/estratocloud/edifact/issues/14 + */ + public function testMultiple(): void + { + $this->assertTokens("RFF+PD:50515", [ + new Token(Token::CONTENT, "RFF"), + new Token(Token::DATA_SEPARATOR, "+"), + new Token(Token::CONTENT, "PD"), + new Token(Token::COMPONENT_SEPARATOR, ":"), + new Token(Token::CONTENT, "50515"), + ]); + + # Ensure we can use the same tokenizer instance for multiple messages + $this->assertTokens("RFF+PD:50515", [ + new Token(Token::CONTENT, "RFF"), + new Token(Token::DATA_SEPARATOR, "+"), + new Token(Token::CONTENT, "PD"), + new Token(Token::COMPONENT_SEPARATOR, ":"), + new Token(Token::CONTENT, "50515"), + ]); + } + + public function testEscape(): void { $this->assertTokens("RFF+PD?:5", [