From 484e68d7976d2d8ea2988e449e34234e235ce302 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20P=C3=ADsa=C5=99?= <ppisar@redhat.com> Date: Fri, 2 Dec 2011 13:11:55 +0100 Subject: [PATCH] Fix caseless match if cases differ in encoding length MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit From: r778 | ph10 | 2011-12-01 18:38:47 +0100 (Čt, 01 pro 2011) | 3 lines Fix bug with caseless matching of characters of different lengths when the shorter is right at the end of the subject. Petr Pisar: Changelog entry removed. --- pcre_exec.c | 32 ++++++++++++++++---------------- testdata/testinput6 | 14 ++++++++++++++ testdata/testoutput6 | 22 ++++++++++++++++++++++ 3 files changed, 52 insertions(+), 16 deletions(-) diff --git a/pcre_exec.c b/pcre_exec.c index caf5fc3..2b7c5bd 100644 --- a/pcre_exec.c +++ b/pcre_exec.c @@ -432,7 +432,7 @@ returns a negative (error) response, the outer incarnation must also return the same response. */ /* These macros pack up tests that are used for partial matching, and which -appears several times in the code. We set the "hit end" flag if the pointer is +appear several times in the code. We set the "hit end" flag if the pointer is at the end of the subject and also past the start of the subject (i.e. something has been matched). For hard partial matching, we then return immediately. The second one is used when we already know we are past the end of @@ -2743,31 +2743,36 @@ for (;;) } break; - /* Match a single character, caselessly */ + /* Match a single character, caselessly. If we are at the end of the + subject, give up immediately. */ case OP_CHARNC: + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + MRRETURN(MATCH_NOMATCH); + } + #ifdef SUPPORT_UTF8 if (utf8) { length = 1; ecode++; GETCHARLEN(fc, ecode, length); - - if (length > md->end_subject - eptr) - { - CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */ - MRRETURN(MATCH_NOMATCH); - } - + /* If the pattern character's value is < 128, we have only one byte, and - can use the fast lookup table. */ + we know that its other case must also be one byte long, so we can use the + fast lookup table. We know that there is at least one byte left in the + subject. */ if (fc < 128) { if (md->lcc[*ecode++] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH); } - /* Otherwise we must pick up the subject character */ + /* Otherwise we must pick up the subject character. Note that we cannot + use the value of "length" to check for sufficient bytes left, because the + other case of the character may have more or fewer bytes. */ else { @@ -2792,11 +2797,6 @@ for (;;) /* Non-UTF-8 mode */ { - if (md->end_subject - eptr < 1) - { - SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */ - MRRETURN(MATCH_NOMATCH); - } if (md->lcc[ecode[1]] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH); ecode += 2; } diff --git a/testdata/testinput6 b/testdata/testinput6 index 503a5bc..c92140c 100644 --- a/testdata/testinput6 +++ b/testdata/testinput6 @@ -802,4 +802,18 @@ ** Failers a\xFCb +/ⱥ/8i + ⱥ + Ⱥx + Ⱥ + +/[ⱥ]/8i + ⱥ + Ⱥx + Ⱥ + +/Ⱥ/8i + Ⱥ + ⱥ + /-- End of testinput6 --/ diff --git a/testdata/testoutput6 b/testdata/testoutput6 index 6a9ec83..0ada170 100644 --- a/testdata/testoutput6 +++ b/testdata/testoutput6 @@ -1353,4 +1353,26 @@ No match a\xFCb No match +/ⱥ/8i + ⱥ + 0: \x{2c65} + Ⱥx + 0: \x{23a} + Ⱥ + 0: \x{23a} + +/[ⱥ]/8i + ⱥ + 0: \x{2c65} + Ⱥx + 0: \x{23a} + Ⱥ + 0: \x{23a} + +/Ⱥ/8i + Ⱥ + 0: \x{23a} + ⱥ + 0: \x{2c65} + /-- End of testinput6 --/ -- 1.7.7.4