diff --git a/parser.js b/parser.js index 62ef2ee..2e65701 100644 --- a/parser.js +++ b/parser.js @@ -851,16 +851,6 @@ var res, match, from = pos; - if (hasUnicodeFlag) { - if (res = matchReg(/^\d/)) { - if (res[0] !== "0" || (res = matchReg(/^\d/)) ) { - bail("Invalid decimal escape in unicode mode", null, from, pos); - } - return createEscaped('null', 0x0000, '0', 1); - } - return false; - } - if (res = matchReg(/^(?!0)\d+/)) { match = res[0]; var refIdx = parseInt(res[0], 10); @@ -878,6 +868,16 @@ // a second time with the total normal-groups. backrefDenied.push(refIdx); + // \1 octal escapes are disallowed in unicode mode, but they might + // be references to groups which haven't been parsed yet. + // We must parse a second time to determine if \1 is a reference + // or an octal scape, and then we can report the error. + if (firstIteration) { + shouldReparse = true; + } else { + bailOctalEscapeIfUnicode(from, pos); + } + // Reset the position again, as maybe only parts of the previous // matched numbers are actual octal numbers. E.g. in '019' only // the '01' should be matched. @@ -904,6 +904,9 @@ // /\091/.exec('\091')[0].length === 3 else if (res = matchReg(/^[0-7]{1,3}/)) { match = res[0]; + if (match !== '0') { + bailOctalEscapeIfUnicode(from, pos); + } if (/^0{1,3}$/.test(match)) { // If they are all zeros, then only take the first one. return createEscaped('null', 0x0000, '0', match.length); @@ -914,6 +917,12 @@ return false; } + function bailOctalEscapeIfUnicode(from, pos) { + if (hasUnicodeFlag || hasUnicodeSetFlag) { + bail("Invalid decimal escape in unicode mode", null, from, pos); + } + } + function parseCharacterClassEscape() { // CharacterClassEscape :: one of d D s S w W var res; @@ -1500,6 +1509,7 @@ var backrefDenied = []; var closedCaptureCounter = 0; var firstIteration = true; + var shouldReparse = false; var hasUnicodeFlag = (flags || "").indexOf("u") !== -1; var hasUnicodeSetFlag = (flags || "").indexOf("v") !== -1; var pos = 0; @@ -1532,13 +1542,14 @@ // the total number of capture groups set. // // SEE: https://github.com/jviereck/regjsparser/issues/70 - for (var i = 0; i < backrefDenied.length; i++) { - if (backrefDenied[i] <= closedCaptureCounter) { - // Parse the input a second time. - pos = 0; - firstIteration = false; - return parseDisjunction(); - } + shouldReparse = shouldReparse || backrefDenied.some(function (ref) { + return ref <= closedCaptureCounter; + }); + if (shouldReparse) { + // Parse the input a second time. + pos = 0; + firstIteration = false; + return parseDisjunction(); } return result; diff --git a/test/test-data-unicode-set.json b/test/test-data-unicode-set.json index c3735f8..dfcb83e 100644 --- a/test/test-data-unicode-set.json +++ b/test/test-data-unicode-set.json @@ -1493,5 +1493,11 @@ 9 ], "raw": "[^\\q{AB}]" + }, + "\\1": { + "type": "error", + "name": "SyntaxError", + "message": "Invalid decimal escape in unicode mode at position 1\n \\1\n ^", + "input": "\\1" } } diff --git a/test/test-data-unicode.json b/test/test-data-unicode.json index 2405068..e97d778 100644 --- a/test/test-data-unicode.json +++ b/test/test-data-unicode.json @@ -1140,5 +1140,93 @@ 2 ], "raw": "\\0" + }, + "(.)\\1": { + "type": "alternative", + "body": [ + { + "type": "group", + "behavior": "normal", + "body": [ + { + "type": "dot", + "range": [ + 1, + 2 + ], + "raw": "." + } + ], + "range": [ + 0, + 3 + ], + "raw": "(.)" + }, + { + "type": "reference", + "matchIndex": 1, + "range": [ + 3, + 5 + ], + "raw": "\\1" + } + ], + "range": [ + 0, + 5 + ], + "raw": "(.)\\1" + }, + "\\1(.)": { + "type": "alternative", + "body": [ + { + "type": "reference", + "matchIndex": 1, + "range": [ + 0, + 2 + ], + "raw": "\\1" + }, + { + "type": "group", + "behavior": "normal", + "body": [ + { + "type": "dot", + "range": [ + 3, + 4 + ], + "raw": "." + } + ], + "range": [ + 2, + 5 + ], + "raw": "(.)" + } + ], + "range": [ + 0, + 5 + ], + "raw": "\\1(.)" + }, + "(.)\\2": { + "type": "error", + "name": "SyntaxError", + "message": "Invalid decimal escape in unicode mode at position 4\n (.)\\2\n ^", + "input": "(.)\\2" + }, + "\\2(.)": { + "type": "error", + "name": "SyntaxError", + "message": "Invalid decimal escape in unicode mode at position 1\n \\2(.)\n ^", + "input": "\\2(.)" } }