From 3c80e02cd464ea049e117b423fd48fab294c51a9 Mon Sep 17 00:00:00 2001 From: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> Date: Thu, 26 Nov 2015 20:29:13 +0000 Subject: [PATCH] Fix auto-callout (?# comment bug. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1611 2f5784b3-3f2a-0410-8824-cb99058d5e15 Petr Pisar: Ported to 8.38. diff --git a/pcre_compile.c b/pcre_compile.c index 4d3b313..3360a8b 100644 --- a/pcre_compile.c +++ b/pcre_compile.c @@ -4699,6 +4699,23 @@ for (;; ptr++) } } + /* Skip over (?# comments. We need to do this here because we want to know if + the next thing is a quantifier, and these comments may come between an item + and its quantifier. */ + + if (c == CHAR_LEFT_PARENTHESIS && ptr[1] == CHAR_QUESTION_MARK && + ptr[2] == CHAR_NUMBER_SIGN) + { + ptr += 3; + while (*ptr != CHAR_NULL && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++; + if (*ptr == CHAR_NULL) + { + *errorcodeptr = ERR18; + goto FAILED; + } + continue; + } + /* See if the next thing is a quantifier. */ is_quantifier = @@ -6529,21 +6546,6 @@ for (;; ptr++) case CHAR_LEFT_PARENTHESIS: ptr++; - /* First deal with comments. Putting this code right at the start ensures - that comments have no bad side effects. */ - - if (ptr[0] == CHAR_QUESTION_MARK && ptr[1] == CHAR_NUMBER_SIGN) - { - ptr += 2; - while (*ptr != CHAR_NULL && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++; - if (*ptr == CHAR_NULL) - { - *errorcodeptr = ERR18; - goto FAILED; - } - continue; - } - /* Now deal with various "verbs" that can be introduced by '*'. */ if (ptr[0] == CHAR_ASTERISK && (ptr[1] == ':' diff --git a/testdata/testinput2 b/testdata/testinput2 index e2e520f..92e3359 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -4217,4 +4217,12 @@ backtracking verbs. --/ /a[[:punct:]b]/BZ +/L(?#(|++<!(2)?/BZ + +/L(?#(|++<!(2)?/BOZ + +/L(?#(|++<!(2)?/BCZ + +/L(?#(|++<!(2)?/BCOZ + /-- End of testinput2 --/ diff --git a/testdata/testinput7 b/testdata/testinput7 index e411a4b..00b9738 100644 --- a/testdata/testinput7 +++ b/testdata/testinput7 @@ -853,4 +853,8 @@ of case for anything other than the ASCII letters. --/ /a[b[:punct:]]/8WBZ +/L(?#(|++<!(2)?/B8COZ + +/L(?#(|++<!(2)?/B8WCZ + /-- End of testinput7 --/ diff --git a/testdata/testoutput2 b/testdata/testoutput2 index 85c565d..2cf7a90 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -14574,4 +14574,40 @@ No match End ------------------------------------------------------------------ +/L(?#(|++<!(2)?/BZ +------------------------------------------------------------------ + Bra + L?+ + Ket + End +------------------------------------------------------------------ + +/L(?#(|++<!(2)?/BOZ +------------------------------------------------------------------ + Bra + L? + Ket + End +------------------------------------------------------------------ + +/L(?#(|++<!(2)?/BCZ +------------------------------------------------------------------ + Bra + Callout 255 0 14 + L?+ + Callout 255 14 0 + Ket + End +------------------------------------------------------------------ + +/L(?#(|++<!(2)?/BCOZ +------------------------------------------------------------------ + Bra + Callout 255 0 14 + L? + Callout 255 14 0 + Ket + End +------------------------------------------------------------------ + /-- End of testinput2 --/ diff --git a/testdata/testoutput7 b/testdata/testoutput7 index cc9ebdd..fdfff64 100644 --- a/testdata/testoutput7 +++ b/testdata/testoutput7 @@ -2348,4 +2348,24 @@ No match End ------------------------------------------------------------------ +/L(?#(|++<!(2)?/B8COZ +------------------------------------------------------------------ + Bra + Callout 255 0 14 + L? + Callout 255 14 0 + Ket + End +------------------------------------------------------------------ + +/L(?#(|++<!(2)?/B8WCZ +------------------------------------------------------------------ + Bra + Callout 255 0 14 + L?+ + Callout 255 14 0 + Ket + End +------------------------------------------------------------------ + /-- End of testinput7 --/ -- 2.4.3