From 86315f7a1cbd041949832bc20b992fcdf2314a51 Mon Sep 17 00:00:00 2001 From: Nicolas Giraud Date: Thu, 19 Sep 2024 21:18:11 +0200 Subject: [PATCH] Fix #578: lexer wrongly interprets ".e[0-9]" as a number with scientific notation. (#579) * Add invalid number on lexer to show it is wrongly detected as a number. * Fix the lexer about parsing invalid numbers with updated unit tests. * Remove forgotten debug function. * Fix forgotten statement on parseNumber to move from state 10 to state 4, causing wrong lexing process. * Fix linters and ignore new psalm issues. * Add test case with parser for issue #578. Fixes #578 --- psalm-baseline.xml | 4 +- src/Lexer.php | 22 +- tests/Parser/LoadStatementTest.php | 1 + tests/data/lexer/lexNumber.in | 2 +- tests/data/lexer/lexNumber.out | 57 +++- tests/data/parser/parseLoad8.in | 8 + tests/data/parser/parseLoad8.out | 502 +++++++++++++++++++++++++++++ 7 files changed, 583 insertions(+), 13 deletions(-) create mode 100644 tests/data/parser/parseLoad8.in create mode 100644 tests/data/parser/parseLoad8.out diff --git a/psalm-baseline.xml b/psalm-baseline.xml index 2079bcdbe..ed08d689f 100644 --- a/psalm-baseline.xml +++ b/psalm-baseline.xml @@ -671,7 +671,7 @@ $this->last $this->last - + $this->str[$this->last + 1] $this->str[$this->last++] $this->str[$this->last] @@ -713,6 +713,8 @@ $this->str[$this->last] $this->str[$this->last] $this->str[$this->last] + $this->str[$this->last] + $this->str[$this->last] $lastToken diff --git a/src/Lexer.php b/src/Lexer.php index e8586b88f..5a051a28d 100644 --- a/src/Lexer.php +++ b/src/Lexer.php @@ -812,7 +812,7 @@ public function parseNumber() // 1 --------------------[ + or - ]-------------------> 1 // 1 -------------------[ 0x or 0X ]------------------> 2 // 1 --------------------[ 0 to 9 ]-------------------> 3 - // 1 -----------------------[ . ]---------------------> 4 + // 1 -----------------------[ . ]---------------------> 10 // 1 -----------------------[ b ]---------------------> 7 // // 2 --------------------[ 0 to F ]-------------------> 2 @@ -831,11 +831,16 @@ public function parseNumber() // 8 --------------------[ 0 or 1 ]-------------------> 8 // 8 -----------------------[ ' ]---------------------> 9 // + // 10 -------------------[ 0 to 9 ]-------------------> 4 + // // State 1 may be reached by negative numbers. // State 2 is reached only by hex numbers. // State 4 is reached only by float numbers. // State 5 is reached only by numbers in approximate form. // State 7 is reached only by numbers in bit representation. + // State 10 is a forced proxy to state 4 ensuring a starting dot (= "0.something") precedes a digit, and not "e" + // or "E" causing wrongly interpreted scientific notation (".e[0 to 9]" is invalid). Such invalid notation could + // break the lexer when table names under a given database context starts with ".e[0-9]". // // Valid final states are: 2, 3, 4 and 6. Any parsing that finished in a // state other than these is invalid. @@ -858,7 +863,7 @@ public function parseNumber() } elseif ($this->str[$this->last] >= '0' && $this->str[$this->last] <= '9') { $state = 3; } elseif ($this->str[$this->last] === '.') { - $state = 4; + $state = 10; } elseif ($this->str[$this->last] === 'b') { $state = 7; } elseif ($this->str[$this->last] !== '+') { @@ -885,7 +890,7 @@ public function parseNumber() ($this->str[$this->last] >= 'a' && $this->str[$this->last] <= 'z') || ($this->str[$this->last] >= 'A' && $this->str[$this->last] <= 'Z') ) { - // A number can't be directly followed by a letter + // A number can't be directly followed by a letter $state = -$state; } elseif ($this->str[$this->last] < '0' || $this->str[$this->last] > '9') { // Just digits and `.`, `e` and `E` are valid characters. @@ -899,7 +904,7 @@ public function parseNumber() ($this->str[$this->last] >= 'a' && $this->str[$this->last] <= 'z') || ($this->str[$this->last] >= 'A' && $this->str[$this->last] <= 'Z') ) { - // A number can't be directly followed by a letter + // A number can't be directly followed by a letter $state = -$state; } elseif ($this->str[$this->last] < '0' || $this->str[$this->last] > '9') { // Just digits, `e` and `E` are valid characters. @@ -916,7 +921,7 @@ public function parseNumber() ($this->str[$this->last] >= 'a' && $this->str[$this->last] <= 'z') || ($this->str[$this->last] >= 'A' && $this->str[$this->last] <= 'Z') ) { - // A number can't be directly followed by a letter + // A number can't be directly followed by a letter $state = -$state; } else { break; @@ -941,6 +946,13 @@ public function parseNumber() } } elseif ($state === 9) { break; + } elseif ($state === 10) { + $flags |= Token::FLAG_NUMBER_FLOAT; + if ($this->str[$this->last] < '0' || $this->str[$this->last] > '9') { + break; + } + + $state = 4; } $token .= $this->str[$this->last]; diff --git a/tests/Parser/LoadStatementTest.php b/tests/Parser/LoadStatementTest.php index 75bb21a48..7c623bbe2 100644 --- a/tests/Parser/LoadStatementTest.php +++ b/tests/Parser/LoadStatementTest.php @@ -38,6 +38,7 @@ public static function loadProvider(): array ['parser/parseLoad5'], ['parser/parseLoad6'], ['parser/parseLoad7'], + ['parser/parseLoad8'], ['parser/parseLoadErr1'], ['parser/parseLoadErr2'], ['parser/parseLoadErr3'], diff --git a/tests/data/lexer/lexNumber.in b/tests/data/lexer/lexNumber.in index a0fff528e..0f92bbb63 100644 --- a/tests/data/lexer/lexNumber.in +++ b/tests/data/lexer/lexNumber.in @@ -1,3 +1,3 @@ SELECT 12, 34, 5.67, 0x89, -10, --11, +12, .15, 0xFFa, 0xfFA, +0xfFA, -0xFFa, -0xfFA, 1e-10, 1e10, .5e10, b'10'; -- invalid numbers -SELECT 12ex10, b'15', 0XFfA, -0XFfA, +0XFfA; \ No newline at end of file +SELECT 12ex10, b'15', 0XFfA, -0XFfA, +0XFfA, .e4; diff --git a/tests/data/lexer/lexNumber.out b/tests/data/lexer/lexNumber.out index 5c6988dd1..1b43ba952 100644 --- a/tests/data/lexer/lexNumber.out +++ b/tests/data/lexer/lexNumber.out @@ -1,10 +1,10 @@ { - "query": "SELECT 12, 34, 5.67, 0x89, -10, --11, +12, .15, 0xFFa, 0xfFA, +0xfFA, -0xFFa, -0xfFA, 1e-10, 1e10, .5e10, b'10';\n-- invalid numbers\nSELECT 12ex10, b'15', 0XFfA, -0XFfA, +0XFfA;", + "query": "SELECT 12, 34, 5.67, 0x89, -10, --11, +12, .15, 0xFFa, 0xfFA, +0xfFA, -0xFFa, -0xfFA, 1e-10, 1e10, .5e10, b'10';\n-- invalid numbers\nSELECT 12ex10, b'15', 0XFfA, -0XFfA, +0XFfA, .e4;\n", "lexer": { "@type": "PhpMyAdmin\\SqlParser\\Lexer", - "str": "SELECT 12, 34, 5.67, 0x89, -10, --11, +12, .15, 0xFFa, 0xfFA, +0xfFA, -0xFFa, -0xfFA, 1e-10, 1e10, .5e10, b'10';\n-- invalid numbers\nSELECT 12ex10, b'15', 0XFfA, -0XFfA, +0XFfA;", - "len": 176, - "last": 176, + "str": "SELECT 12, 34, 5.67, 0x89, -10, --11, +12, .15, 0xFFa, 0xfFA, +0xfFA, -0xFFa, -0xfFA, 1e-10, 1e10, .5e10, b'10';\n-- invalid numbers\nSELECT 12ex10, b'15', 0XFfA, -0XFfA, +0XFfA, .e4;\n", + "len": 182, + "last": 182, "list": { "@type": "PhpMyAdmin\\SqlParser\\TokensList", "tokens": [ @@ -665,6 +665,42 @@ "flags": 0, "position": 170 }, + { + "@type": "PhpMyAdmin\\SqlParser\\Token", + "token": ",", + "value": ",", + "keyword": null, + "type": 2, + "flags": 16, + "position": 175 + }, + { + "@type": "PhpMyAdmin\\SqlParser\\Token", + "token": " ", + "value": " ", + "keyword": null, + "type": 3, + "flags": 0, + "position": 176 + }, + { + "@type": "PhpMyAdmin\\SqlParser\\Token", + "token": ".", + "value": ".", + "keyword": null, + "type": 2, + "flags": 16, + "position": 177 + }, + { + "@type": "PhpMyAdmin\\SqlParser\\Token", + "token": "e4", + "value": "e4", + "keyword": null, + "type": 0, + "flags": 0, + "position": 178 + }, { "@type": "PhpMyAdmin\\SqlParser\\Token", "token": ";", @@ -672,7 +708,16 @@ "keyword": null, "type": 9, "flags": 0, - "position": 175 + "position": 180 + }, + { + "@type": "PhpMyAdmin\\SqlParser\\Token", + "token": "\n", + "value": " ", + "keyword": null, + "type": 3, + "flags": 0, + "position": 181 }, { "@type": "PhpMyAdmin\\SqlParser\\Token", @@ -684,7 +729,7 @@ "position": null } ], - "count": 75, + "count": 80, "idx": 0 }, "delimiter": ";", diff --git a/tests/data/parser/parseLoad8.in b/tests/data/parser/parseLoad8.in new file mode 100644 index 000000000..46e6d2ac3 --- /dev/null +++ b/tests/data/parser/parseLoad8.in @@ -0,0 +1,8 @@ +-- Query from https://github.com/phpmyadmin/sql-parser/issues/578 +-- Issue was that Lexer detected ".e1" as number token. + +LOAD DATA LOCAL INFILE '/home/user/myloadfile.csv' +IGNORE INTO TABLE erp.e1_table +FIELDS TERMINATED BY '\t' +LINES TERMINATED BY '\n' +IGNORE 0 LINES; diff --git a/tests/data/parser/parseLoad8.out b/tests/data/parser/parseLoad8.out new file mode 100644 index 000000000..fca14e8df --- /dev/null +++ b/tests/data/parser/parseLoad8.out @@ -0,0 +1,502 @@ +{ + "query": "-- Query from https://github.com/phpmyadmin/sql-parser/issues/578\n-- Issue was that Lexer detected \".e1\" as number token.\n\nLOAD DATA LOCAL INFILE '/home/user/myloadfile.csv'\nIGNORE INTO TABLE erp.e1_table\nFIELDS TERMINATED BY '\\t'\nLINES TERMINATED BY '\\n'\nIGNORE 0 LINES;\n", + "lexer": { + "@type": "PhpMyAdmin\\SqlParser\\Lexer", + "str": "-- Query from https://github.com/phpmyadmin/sql-parser/issues/578\n-- Issue was that Lexer detected \".e1\" as number token.\n\nLOAD DATA LOCAL INFILE '/home/user/myloadfile.csv'\nIGNORE INTO TABLE erp.e1_table\nFIELDS TERMINATED BY '\\t'\nLINES TERMINATED BY '\\n'\nIGNORE 0 LINES;\n", + "len": 272, + "last": 272, + "list": { + "@type": "PhpMyAdmin\\SqlParser\\TokensList", + "tokens": [ + { + "@type": "PhpMyAdmin\\SqlParser\\Token", + "token": "-- Query from https://github.com/phpmyadmin/sql-parser/issues/578", + "value": "-- Query from https://github.com/phpmyadmin/sql-parser/issues/578", + "keyword": null, + "type": 4, + "flags": 4, + "position": 0 + }, + { + "@type": "PhpMyAdmin\\SqlParser\\Token", + "token": "\n", + "value": " ", + "keyword": null, + "type": 3, + "flags": 0, + "position": 65 + }, + { + "@type": "PhpMyAdmin\\SqlParser\\Token", + "token": "-- Issue was that Lexer detected \".e1\" as number token.", + "value": "-- Issue was that Lexer detected \".e1\" as number token.", + "keyword": null, + "type": 4, + "flags": 4, + "position": 66 + }, + { + "@type": "PhpMyAdmin\\SqlParser\\Token", + "token": "\n\n", + "value": " ", + "keyword": null, + "type": 3, + "flags": 0, + "position": 121 + }, + { + "@type": "PhpMyAdmin\\SqlParser\\Token", + "token": "LOAD DATA", + "value": "LOAD DATA", + "keyword": "LOAD DATA", + "type": 1, + "flags": 7, + "position": 123 + }, + { + "@type": "PhpMyAdmin\\SqlParser\\Token", + "token": " ", + "value": " ", + "keyword": null, + "type": 3, + "flags": 0, + "position": 132 + }, + { + "@type": "PhpMyAdmin\\SqlParser\\Token", + "token": "LOCAL", + "value": "LOCAL", + "keyword": "LOCAL", + "type": 1, + "flags": 1, + "position": 133 + }, + { + "@type": "PhpMyAdmin\\SqlParser\\Token", + "token": " ", + "value": " ", + "keyword": null, + "type": 3, + "flags": 0, + "position": 138 + }, + { + "@type": "PhpMyAdmin\\SqlParser\\Token", + "token": "INFILE", + "value": "INFILE", + "keyword": "INFILE", + "type": 1, + "flags": 3, + "position": 139 + }, + { + "@type": "PhpMyAdmin\\SqlParser\\Token", + "token": " ", + "value": " ", + "keyword": null, + "type": 3, + "flags": 0, + "position": 145 + }, + { + "@type": "PhpMyAdmin\\SqlParser\\Token", + "token": "'/home/user/myloadfile.csv'", + "value": "/home/user/myloadfile.csv", + "keyword": null, + "type": 7, + "flags": 1, + "position": 146 + }, + { + "@type": "PhpMyAdmin\\SqlParser\\Token", + "token": "\n", + "value": " ", + "keyword": null, + "type": 3, + "flags": 0, + "position": 173 + }, + { + "@type": "PhpMyAdmin\\SqlParser\\Token", + "token": "IGNORE", + "value": "IGNORE", + "keyword": "IGNORE", + "type": 1, + "flags": 3, + "position": 174 + }, + { + "@type": "PhpMyAdmin\\SqlParser\\Token", + "token": " ", + "value": " ", + "keyword": null, + "type": 3, + "flags": 0, + "position": 180 + }, + { + "@type": "PhpMyAdmin\\SqlParser\\Token", + "token": "INTO", + "value": "INTO", + "keyword": "INTO", + "type": 1, + "flags": 3, + "position": 181 + }, + { + "@type": "PhpMyAdmin\\SqlParser\\Token", + "token": " ", + "value": " ", + "keyword": null, + "type": 3, + "flags": 0, + "position": 185 + }, + { + "@type": "PhpMyAdmin\\SqlParser\\Token", + "token": "TABLE", + "value": "TABLE", + "keyword": "TABLE", + "type": 1, + "flags": 3, + "position": 186 + }, + { + "@type": "PhpMyAdmin\\SqlParser\\Token", + "token": " ", + "value": " ", + "keyword": null, + "type": 3, + "flags": 0, + "position": 191 + }, + { + "@type": "PhpMyAdmin\\SqlParser\\Token", + "token": "erp", + "value": "erp", + "keyword": null, + "type": 0, + "flags": 0, + "position": 192 + }, + { + "@type": "PhpMyAdmin\\SqlParser\\Token", + "token": ".", + "value": ".", + "keyword": null, + "type": 2, + "flags": 16, + "position": 195 + }, + { + "@type": "PhpMyAdmin\\SqlParser\\Token", + "token": "e1_table", + "value": "e1_table", + "keyword": null, + "type": 0, + "flags": 0, + "position": 196 + }, + { + "@type": "PhpMyAdmin\\SqlParser\\Token", + "token": "\n", + "value": " ", + "keyword": null, + "type": 3, + "flags": 0, + "position": 204 + }, + { + "@type": "PhpMyAdmin\\SqlParser\\Token", + "token": "FIELDS", + "value": "FIELDS", + "keyword": "FIELDS", + "type": 1, + "flags": 1, + "position": 205 + }, + { + "@type": "PhpMyAdmin\\SqlParser\\Token", + "token": " ", + "value": " ", + "keyword": null, + "type": 3, + "flags": 0, + "position": 211 + }, + { + "@type": "PhpMyAdmin\\SqlParser\\Token", + "token": "TERMINATED BY", + "value": "TERMINATED BY", + "keyword": "TERMINATED BY", + "type": 1, + "flags": 7, + "position": 212 + }, + { + "@type": "PhpMyAdmin\\SqlParser\\Token", + "token": " ", + "value": " ", + "keyword": null, + "type": 3, + "flags": 0, + "position": 225 + }, + { + "@type": "PhpMyAdmin\\SqlParser\\Token", + "token": "'\\t'", + "value": "\t", + "keyword": null, + "type": 7, + "flags": 1, + "position": 226 + }, + { + "@type": "PhpMyAdmin\\SqlParser\\Token", + "token": "\n", + "value": " ", + "keyword": null, + "type": 3, + "flags": 0, + "position": 230 + }, + { + "@type": "PhpMyAdmin\\SqlParser\\Token", + "token": "LINES", + "value": "LINES", + "keyword": "LINES", + "type": 1, + "flags": 3, + "position": 231 + }, + { + "@type": "PhpMyAdmin\\SqlParser\\Token", + "token": " ", + "value": " ", + "keyword": null, + "type": 3, + "flags": 0, + "position": 236 + }, + { + "@type": "PhpMyAdmin\\SqlParser\\Token", + "token": "TERMINATED BY", + "value": "TERMINATED BY", + "keyword": "TERMINATED BY", + "type": 1, + "flags": 7, + "position": 237 + }, + { + "@type": "PhpMyAdmin\\SqlParser\\Token", + "token": " ", + "value": " ", + "keyword": null, + "type": 3, + "flags": 0, + "position": 250 + }, + { + "@type": "PhpMyAdmin\\SqlParser\\Token", + "token": "'\\n'", + "value": "\n", + "keyword": null, + "type": 7, + "flags": 1, + "position": 251 + }, + { + "@type": "PhpMyAdmin\\SqlParser\\Token", + "token": "\n", + "value": " ", + "keyword": null, + "type": 3, + "flags": 0, + "position": 255 + }, + { + "@type": "PhpMyAdmin\\SqlParser\\Token", + "token": "IGNORE", + "value": "IGNORE", + "keyword": "IGNORE", + "type": 1, + "flags": 3, + "position": 256 + }, + { + "@type": "PhpMyAdmin\\SqlParser\\Token", + "token": " ", + "value": " ", + "keyword": null, + "type": 3, + "flags": 0, + "position": 262 + }, + { + "@type": "PhpMyAdmin\\SqlParser\\Token", + "token": "0", + "value": 0, + "keyword": null, + "type": 6, + "flags": 0, + "position": 263 + }, + { + "@type": "PhpMyAdmin\\SqlParser\\Token", + "token": " ", + "value": " ", + "keyword": null, + "type": 3, + "flags": 0, + "position": 264 + }, + { + "@type": "PhpMyAdmin\\SqlParser\\Token", + "token": "LINES", + "value": "LINES", + "keyword": "LINES", + "type": 1, + "flags": 3, + "position": 265 + }, + { + "@type": "PhpMyAdmin\\SqlParser\\Token", + "token": ";", + "value": ";", + "keyword": null, + "type": 9, + "flags": 0, + "position": 270 + }, + { + "@type": "PhpMyAdmin\\SqlParser\\Token", + "token": "\n", + "value": " ", + "keyword": null, + "type": 3, + "flags": 0, + "position": 271 + }, + { + "@type": "PhpMyAdmin\\SqlParser\\Token", + "token": null, + "value": null, + "keyword": null, + "type": 9, + "flags": 0, + "position": null + } + ], + "count": 42, + "idx": 42 + }, + "delimiter": ";", + "delimiterLen": 1, + "strict": false, + "errors": [] + }, + "parser": { + "@type": "PhpMyAdmin\\SqlParser\\Parser", + "list": { + "@type": "@1" + }, + "statements": [ + { + "@type": "PhpMyAdmin\\SqlParser\\Statements\\LoadStatement", + "file_name": { + "@type": "PhpMyAdmin\\SqlParser\\Components\\Expression", + "database": null, + "table": null, + "column": null, + "expr": "'/home/user/myloadfile.csv'", + "alias": null, + "function": null, + "subquery": null, + "file": "/home/user/myloadfile.csv" + }, + "table": { + "@type": "PhpMyAdmin\\SqlParser\\Components\\Expression", + "database": "erp", + "table": "e1_table", + "column": null, + "expr": "erp.e1_table", + "alias": null, + "function": null, + "subquery": null + }, + "partition": null, + "charset_name": null, + "fields_options": { + "@type": "PhpMyAdmin\\SqlParser\\Components\\OptionsArray", + "options": { + "1": { + "name": "TERMINATED BY", + "equals": false, + "expr": { + "@type": "PhpMyAdmin\\SqlParser\\Components\\Expression", + "database": null, + "table": null, + "column": "\t", + "expr": "'\\t'", + "alias": null, + "function": null, + "subquery": null + }, + "value": "'\\t'" + } + } + }, + "fields_keyword": "FIELDS", + "lines_options": { + "@type": "PhpMyAdmin\\SqlParser\\Components\\OptionsArray", + "options": { + "2": { + "name": "TERMINATED BY", + "equals": false, + "expr": { + "@type": "PhpMyAdmin\\SqlParser\\Components\\Expression", + "database": null, + "table": null, + "column": "\n", + "expr": "'\\n'", + "alias": null, + "function": null, + "subquery": null + }, + "value": "'\\n'" + } + } + }, + "col_name_or_user_var": null, + "set": null, + "ignore_number": { + "@type": "PhpMyAdmin\\SqlParser\\Components\\Expression", + "database": null, + "table": null, + "column": null, + "expr": "0", + "alias": null, + "function": null, + "subquery": null + }, + "replace_ignore": "IGNORE", + "lines_rows": "LINES", + "options": { + "@type": "PhpMyAdmin\\SqlParser\\Components\\OptionsArray", + "options": { + "2": "LOCAL" + } + }, + "first": 0, + "last": 40 + } + ], + "brackets": 0, + "strict": false, + "errors": [] + }, + "errors": { + "lexer": [], + "parser": [] + } +} \ No newline at end of file