From ec54c9566b27e4923e51db7e90a000c30dbfe36e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20P=C3=ADsa=C5=99?= <ppisar@redhat.com> Date: Mon, 23 Apr 2012 10:19:12 +0200 Subject: [PATCH] Possessify high ASCII MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ported to pcre-8.12: r962 | ph10 | 2012-04-20 19:28:23 +0200 (Pá, 20 dub 2012) | 3 lines Fix auto-possessifying bugs when PCRE_UCP is not set, but character tables specify characters in the range 127-255 are letters, spaces, etc. --- pcre_compile.c | 24 ++++---- testdata/testinput5 | 36 +++++++++++++ testdata/testoutput5 | 136 ++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 184 insertions(+), 12 deletions(-) diff --git a/pcre_compile.c b/pcre_compile.c index 84c5a02..8a7e4e0 100644 --- a/pcre_compile.c +++ b/pcre_compile.c @@ -2674,22 +2674,22 @@ if (next >= 0) switch(op_code) When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */ case OP_DIGIT: - return next > 127 || (cd->ctypes[next] & ctype_digit) == 0; + return next > 255 || (cd->ctypes[next] & ctype_digit) == 0; case OP_NOT_DIGIT: - return next <= 127 && (cd->ctypes[next] & ctype_digit) != 0; + return next <= 255 && (cd->ctypes[next] & ctype_digit) != 0; case OP_WHITESPACE: - return next > 127 || (cd->ctypes[next] & ctype_space) == 0; + return next > 255 || (cd->ctypes[next] & ctype_space) == 0; case OP_NOT_WHITESPACE: - return next <= 127 && (cd->ctypes[next] & ctype_space) != 0; + return next <= 255 && (cd->ctypes[next] & ctype_space) != 0; case OP_WORDCHAR: - return next > 127 || (cd->ctypes[next] & ctype_word) == 0; + return next > 255 || (cd->ctypes[next] & ctype_word) == 0; case OP_NOT_WORDCHAR: - return next <= 127 && (cd->ctypes[next] & ctype_word) != 0; + return next <= 255 && (cd->ctypes[next] & ctype_word) != 0; case OP_HSPACE: case OP_NOT_HSPACE: @@ -2767,22 +2767,22 @@ switch(op_code) switch(-next) { case ESC_d: - return c > 127 || (cd->ctypes[c] & ctype_digit) == 0; + return c > 255 || (cd->ctypes[c] & ctype_digit) == 0; case ESC_D: - return c <= 127 && (cd->ctypes[c] & ctype_digit) != 0; + return c <= 255 && (cd->ctypes[c] & ctype_digit) != 0; case ESC_s: - return c > 127 || (cd->ctypes[c] & ctype_space) == 0; + return c > 255 || (cd->ctypes[c] & ctype_space) == 0; case ESC_S: - return c <= 127 && (cd->ctypes[c] & ctype_space) != 0; + return c <= 255 && (cd->ctypes[c] & ctype_space) != 0; case ESC_w: - return c > 127 || (cd->ctypes[c] & ctype_word) == 0; + return c > 255 || (cd->ctypes[c] & ctype_word) == 0; case ESC_W: - return c <= 127 && (cd->ctypes[c] & ctype_word) != 0; + return c <= 255 && (cd->ctypes[c] & ctype_word) != 0; case ESC_h: case ESC_H: diff --git a/testdata/testinput5 b/testdata/testinput5 index 5cebd7e..696c43c 100644 --- a/testdata/testinput5 +++ b/testdata/testinput5 @@ -829,4 +829,40 @@ correctly, but that messes up comparisons). --/ /^\cģ/8 +/\w+\x{C4}/8BZ + a\x{C4}\x{C4} + +/\w+\x{C4}/8BZT1 + a\x{C4}\x{C4} + +/\W+\x{C4}/8BZ + !\x{C4} + +/\W+\x{C4}/8BZT1 + !\x{C4} + +/\W+\x{A1}/8BZ + !\x{A1} + +/\W+\x{A1}/8BZT1 + !\x{A1} + +/X\s+\x{A0}/8BZ + X\x20\x{A0}\x{A0} + +/X\s+\x{A0}/8BZT1 + X\x20\x{A0}\x{A0} + +/\S+\x{A0}/8BZ + X\x{A0}\x{A0} + +/\S+\x{A0}/8BZT1 + X\x{A0}\x{A0} + +/\x{a0}+\s!/8BZ + \x{a0}\x20! + +/\x{a0}+\s!/8BZT1 + \x{a0}\x20! + /-- End of testinput5 --/ diff --git a/testdata/testoutput5 b/testdata/testoutput5 index ed617cc..2e91955 100644 --- a/testdata/testoutput5 +++ b/testdata/testoutput5 @@ -2315,4 +2315,140 @@ Error -24 /^\cģ/8 Failed: \c must be followed by an ASCII character at offset 3 +/\w+\x{C4}/8BZ +------------------------------------------------------------------ + Bra + \w++ + \x{c4} + Ket + End +------------------------------------------------------------------ + a\x{C4}\x{C4} + 0: a\x{c4} + +/\w+\x{C4}/8BZT1 +------------------------------------------------------------------ + Bra + \w+ + \x{c4} + Ket + End +------------------------------------------------------------------ + a\x{C4}\x{C4} + 0: a\x{c4}\x{c4} + +/\W+\x{C4}/8BZ +------------------------------------------------------------------ + Bra + \W+ + \x{c4} + Ket + End +------------------------------------------------------------------ + !\x{C4} + 0: !\x{c4} + +/\W+\x{C4}/8BZT1 +------------------------------------------------------------------ + Bra + \W++ + \x{c4} + Ket + End +------------------------------------------------------------------ + !\x{C4} + 0: !\x{c4} + +/\W+\x{A1}/8BZ +------------------------------------------------------------------ + Bra + \W+ + \x{a1} + Ket + End +------------------------------------------------------------------ + !\x{A1} + 0: !\x{a1} + +/\W+\x{A1}/8BZT1 +------------------------------------------------------------------ + Bra + \W+ + \x{a1} + Ket + End +------------------------------------------------------------------ + !\x{A1} + 0: !\x{a1} + +/X\s+\x{A0}/8BZ +------------------------------------------------------------------ + Bra + X + \s++ + \x{a0} + Ket + End +------------------------------------------------------------------ + X\x20\x{A0}\x{A0} + 0: X \x{a0} + +/X\s+\x{A0}/8BZT1 +------------------------------------------------------------------ + Bra + X + \s+ + \x{a0} + Ket + End +------------------------------------------------------------------ + X\x20\x{A0}\x{A0} + 0: X \x{a0}\x{a0} + +/\S+\x{A0}/8BZ +------------------------------------------------------------------ + Bra + \S+ + \x{a0} + Ket + End +------------------------------------------------------------------ + X\x{A0}\x{A0} + 0: X\x{a0}\x{a0} + +/\S+\x{A0}/8BZT1 +------------------------------------------------------------------ + Bra + \S++ + \x{a0} + Ket + End +------------------------------------------------------------------ + X\x{A0}\x{A0} + 0: X\x{a0} + +/\x{a0}+\s!/8BZ +------------------------------------------------------------------ + Bra + \x{a0}++ + \s + ! + Ket + End +------------------------------------------------------------------ + \x{a0}\x20! + 0: \x{a0} ! + +/\x{a0}+\s!/8BZT1 +------------------------------------------------------------------ + Bra + \x{a0}+ + \s + ! + Ket + End +------------------------------------------------------------------ + \x{a0}\x20! + 0: \x{a0} ! + /-- End of testinput5 --/ -- 1.7.7.6