Skip to content

Commit

Permalink
Fix #578: lexer wrongly interprets ".e[0-9]" as a number with scienti…
Browse files Browse the repository at this point in the history
…fic notation. (#579)

* Add invalid number on lexer to show it is wrongly detected as a number.

* Fix the lexer about parsing invalid numbers with updated unit tests.

* Remove forgotten debug function.

* Fix forgotten statement on parseNumber to move from state 10 to state 4, causing wrong lexing process.

* Fix linters and ignore new psalm issues.

* Add test case with parser for issue #578.

Fixes #578
  • Loading branch information
niconoe- authored Sep 19, 2024
1 parent a1c555a commit 86315f7
Show file tree
Hide file tree
Showing 7 changed files with 583 additions and 13 deletions.
4 changes: 3 additions & 1 deletion psalm-baseline.xml
Original file line number Diff line number Diff line change
Expand Up @@ -671,7 +671,7 @@
<code>$this-&gt;last</code>
<code>$this-&gt;last</code>
</LoopInvalidation>
<MixedArrayAccess occurrences="41">
<MixedArrayAccess occurrences="43">
<code>$this-&gt;str[$this-&gt;last + 1]</code>
<code>$this-&gt;str[$this-&gt;last++]</code>
<code>$this-&gt;str[$this-&gt;last]</code>
Expand Down Expand Up @@ -713,6 +713,8 @@
<code>$this-&gt;str[$this-&gt;last]</code>
<code>$this-&gt;str[$this-&gt;last]</code>
<code>$this-&gt;str[$this-&gt;last]</code>
<code>$this-&gt;str[$this-&gt;last]</code>
<code>$this-&gt;str[$this-&gt;last]</code>
</MixedArrayAccess>
<MixedAssignment occurrences="2">
<code>$lastToken</code>
Expand Down
22 changes: 17 additions & 5 deletions src/Lexer.php
Original file line number Diff line number Diff line change
Expand Up @@ -812,7 +812,7 @@ public function parseNumber()
// 1 --------------------[ + or - ]-------------------> 1
// 1 -------------------[ 0x or 0X ]------------------> 2
// 1 --------------------[ 0 to 9 ]-------------------> 3
// 1 -----------------------[ . ]---------------------> 4
// 1 -----------------------[ . ]---------------------> 10
// 1 -----------------------[ b ]---------------------> 7
//
// 2 --------------------[ 0 to F ]-------------------> 2
Expand All @@ -831,11 +831,16 @@ public function parseNumber()
// 8 --------------------[ 0 or 1 ]-------------------> 8
// 8 -----------------------[ ' ]---------------------> 9
//
// 10 -------------------[ 0 to 9 ]-------------------> 4
//
// State 1 may be reached by negative numbers.
// State 2 is reached only by hex numbers.
// State 4 is reached only by float numbers.
// State 5 is reached only by numbers in approximate form.
// State 7 is reached only by numbers in bit representation.
// State 10 is a forced proxy to state 4 ensuring a starting dot (= "0.something") precedes a digit, and not "e"
// or "E" causing wrongly interpreted scientific notation (".e[0 to 9]" is invalid). Such invalid notation could
// break the lexer when table names under a given database context starts with ".e[0-9]".
//
// Valid final states are: 2, 3, 4 and 6. Any parsing that finished in a
// state other than these is invalid.
Expand All @@ -858,7 +863,7 @@ public function parseNumber()
} elseif ($this->str[$this->last] >= '0' && $this->str[$this->last] <= '9') {
$state = 3;
} elseif ($this->str[$this->last] === '.') {
$state = 4;
$state = 10;
} elseif ($this->str[$this->last] === 'b') {
$state = 7;
} elseif ($this->str[$this->last] !== '+') {
Expand All @@ -885,7 +890,7 @@ public function parseNumber()
($this->str[$this->last] >= 'a' && $this->str[$this->last] <= 'z')
|| ($this->str[$this->last] >= 'A' && $this->str[$this->last] <= 'Z')
) {
// A number can't be directly followed by a letter
// A number can't be directly followed by a letter
$state = -$state;
} elseif ($this->str[$this->last] < '0' || $this->str[$this->last] > '9') {
// Just digits and `.`, `e` and `E` are valid characters.
Expand All @@ -899,7 +904,7 @@ public function parseNumber()
($this->str[$this->last] >= 'a' && $this->str[$this->last] <= 'z')
|| ($this->str[$this->last] >= 'A' && $this->str[$this->last] <= 'Z')
) {
// A number can't be directly followed by a letter
// A number can't be directly followed by a letter
$state = -$state;
} elseif ($this->str[$this->last] < '0' || $this->str[$this->last] > '9') {
// Just digits, `e` and `E` are valid characters.
Expand All @@ -916,7 +921,7 @@ public function parseNumber()
($this->str[$this->last] >= 'a' && $this->str[$this->last] <= 'z')
|| ($this->str[$this->last] >= 'A' && $this->str[$this->last] <= 'Z')
) {
// A number can't be directly followed by a letter
// A number can't be directly followed by a letter
$state = -$state;
} else {
break;
Expand All @@ -941,6 +946,13 @@ public function parseNumber()
}
} elseif ($state === 9) {
break;
} elseif ($state === 10) {
$flags |= Token::FLAG_NUMBER_FLOAT;
if ($this->str[$this->last] < '0' || $this->str[$this->last] > '9') {
break;
}

$state = 4;
}

$token .= $this->str[$this->last];
Expand Down
1 change: 1 addition & 0 deletions tests/Parser/LoadStatementTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ public static function loadProvider(): array
['parser/parseLoad5'],
['parser/parseLoad6'],
['parser/parseLoad7'],
['parser/parseLoad8'],
['parser/parseLoadErr1'],
['parser/parseLoadErr2'],
['parser/parseLoadErr3'],
Expand Down
2 changes: 1 addition & 1 deletion tests/data/lexer/lexNumber.in
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT 12, 34, 5.67, 0x89, -10, --11, +12, .15, 0xFFa, 0xfFA, +0xfFA, -0xFFa, -0xfFA, 1e-10, 1e10, .5e10, b'10';
-- invalid numbers
SELECT 12ex10, b'15', 0XFfA, -0XFfA, +0XFfA;
SELECT 12ex10, b'15', 0XFfA, -0XFfA, +0XFfA, .e4;
57 changes: 51 additions & 6 deletions tests/data/lexer/lexNumber.out
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
{
"query": "SELECT 12, 34, 5.67, 0x89, -10, --11, +12, .15, 0xFFa, 0xfFA, +0xfFA, -0xFFa, -0xfFA, 1e-10, 1e10, .5e10, b'10';\n-- invalid numbers\nSELECT 12ex10, b'15', 0XFfA, -0XFfA, +0XFfA;",
"query": "SELECT 12, 34, 5.67, 0x89, -10, --11, +12, .15, 0xFFa, 0xfFA, +0xfFA, -0xFFa, -0xfFA, 1e-10, 1e10, .5e10, b'10';\n-- invalid numbers\nSELECT 12ex10, b'15', 0XFfA, -0XFfA, +0XFfA, .e4;\n",
"lexer": {
"@type": "PhpMyAdmin\\SqlParser\\Lexer",
"str": "SELECT 12, 34, 5.67, 0x89, -10, --11, +12, .15, 0xFFa, 0xfFA, +0xfFA, -0xFFa, -0xfFA, 1e-10, 1e10, .5e10, b'10';\n-- invalid numbers\nSELECT 12ex10, b'15', 0XFfA, -0XFfA, +0XFfA;",
"len": 176,
"last": 176,
"str": "SELECT 12, 34, 5.67, 0x89, -10, --11, +12, .15, 0xFFa, 0xfFA, +0xfFA, -0xFFa, -0xfFA, 1e-10, 1e10, .5e10, b'10';\n-- invalid numbers\nSELECT 12ex10, b'15', 0XFfA, -0XFfA, +0XFfA, .e4;\n",
"len": 182,
"last": 182,
"list": {
"@type": "PhpMyAdmin\\SqlParser\\TokensList",
"tokens": [
Expand Down Expand Up @@ -665,14 +665,59 @@
"flags": 0,
"position": 170
},
{
"@type": "PhpMyAdmin\\SqlParser\\Token",
"token": ",",
"value": ",",
"keyword": null,
"type": 2,
"flags": 16,
"position": 175
},
{
"@type": "PhpMyAdmin\\SqlParser\\Token",
"token": " ",
"value": " ",
"keyword": null,
"type": 3,
"flags": 0,
"position": 176
},
{
"@type": "PhpMyAdmin\\SqlParser\\Token",
"token": ".",
"value": ".",
"keyword": null,
"type": 2,
"flags": 16,
"position": 177
},
{
"@type": "PhpMyAdmin\\SqlParser\\Token",
"token": "e4",
"value": "e4",
"keyword": null,
"type": 0,
"flags": 0,
"position": 178
},
{
"@type": "PhpMyAdmin\\SqlParser\\Token",
"token": ";",
"value": ";",
"keyword": null,
"type": 9,
"flags": 0,
"position": 175
"position": 180
},
{
"@type": "PhpMyAdmin\\SqlParser\\Token",
"token": "\n",
"value": " ",
"keyword": null,
"type": 3,
"flags": 0,
"position": 181
},
{
"@type": "PhpMyAdmin\\SqlParser\\Token",
Expand All @@ -684,7 +729,7 @@
"position": null
}
],
"count": 75,
"count": 80,
"idx": 0
},
"delimiter": ";",
Expand Down
8 changes: 8 additions & 0 deletions tests/data/parser/parseLoad8.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
-- Query from https://github.com/phpmyadmin/sql-parser/issues/578
-- Issue was that Lexer detected ".e1" as number token.

LOAD DATA LOCAL INFILE '/home/user/myloadfile.csv'
IGNORE INTO TABLE erp.e1_table
FIELDS TERMINATED BY '\t'
LINES TERMINATED BY '\n'
IGNORE 0 LINES;
Loading

0 comments on commit 86315f7

Please sign in to comment.