Skip to content

Commit

Permalink
Improve patch in 62a6498 by skipping irrelevant assertions when looki…
Browse files Browse the repository at this point in the history
…ng for a starting code unit.
  • Loading branch information
PhilipHazel committed Dec 8, 2023
1 parent c9e03ce commit 536e6a1
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 4 deletions.
50 changes: 46 additions & 4 deletions src/pcre2_study.c
Original file line number Diff line number Diff line change
Expand Up @@ -1111,15 +1111,57 @@ do
tcode++;
break;

/* For a positive lookahead assertion, inspect what immediately follows.
If the next item is one that sets a mandatory character, skip this
assertion. Otherwise, treat it the same as other bracket groups. */
/* For a positive lookahead assertion, inspect what immediately follows,
ignoring intermediate assertions and callouts. If the next item is one
that sets a mandatory character, skip this assertion. Otherwise, treat it
the same as other bracket groups. */

case OP_ASSERT:
case OP_ASSERT_NA:
ncode = tcode + GET(tcode, 1);
while (*ncode == OP_ALT) ncode += GET(ncode, 1);
ncode += 1 + LINK_SIZE;

/* Skip irrelevant items */

for (BOOL done = FALSE; !done;)
{
switch (*ncode)
{
case OP_ASSERT:
case OP_ASSERT_NOT:
case OP_ASSERTBACK:
case OP_ASSERTBACK_NOT:
case OP_ASSERT_NA:
case OP_ASSERTBACK_NA:
ncode += GET(ncode, 1);
while (*ncode == OP_ALT) ncode += GET(ncode, 1);
ncode += 1 + LINK_SIZE;
break;

case OP_WORD_BOUNDARY:
case OP_NOT_WORD_BOUNDARY:
case OP_UCP_WORD_BOUNDARY:
case OP_NOT_UCP_WORD_BOUNDARY:
ncode++;
break;

case OP_CALLOUT:
ncode += PRIV(OP_lengths)[OP_CALLOUT];
break;

case OP_CALLOUT_STR:
ncode += GET(ncode, 1 + 2*LINK_SIZE);
break;

default:
done = TRUE;
break;
}
}

/* Now check the next significant item. */

switch(*ncode)
{
default:
Expand Down Expand Up @@ -1149,7 +1191,7 @@ do
case OP_WHITESPACE:
case OP_NOT_WHITESPACE:
tcode = ncode;
continue; /* With the following opcode */
continue; /* With the following significant opcode */
}
/* Fall through */

Expand Down
6 changes: 6 additions & 0 deletions testdata/testinput2
Original file line number Diff line number Diff line change
Expand Up @@ -6087,4 +6087,10 @@ a)"xI
/(?=b(*COMMIT)c|)d/I,no_start_optimize
bd

/a?(?=bc|)d/I,auto_callout
bd

/a?(?=bc|)\bd/I
bd

# End of testinput2
25 changes: 25 additions & 0 deletions testdata/testoutput2
Original file line number Diff line number Diff line change
Expand Up @@ -18004,6 +18004,31 @@ Options: no_start_optimize
bd
No match

/a?(?=bc|)d/I,auto_callout
Capture group count = 0
Options: auto_callout
Starting code units: a d
Last code unit = 'd'
Subject length lower bound = 1
bd
--->bd
+0 ^ a?
+2 ^ (?=
+5 ^ b
+8 ^ )
+9 ^ d
+10 ^^ End of pattern
0: d

/a?(?=bc|)\bd/I
Capture group count = 0
Max lookbehind = 1
Starting code units: a d
Last code unit = 'd'
Subject length lower bound = 1
bd
No match

# End of testinput2
Error -70: PCRE2_ERROR_BADDATA (unknown error number)
Error -62: bad serialized data
Expand Down

0 comments on commit 536e6a1

Please sign in to comment.