Sophie

Sophie

distrib > Fedora > 16 > x86_64 > by-pkgid > 20ff4ff75433449222836d4ab1064b5b > files > 4

pcre-8.12-9.fc16.src.rpm

From 484e68d7976d2d8ea2988e449e34234e235ce302 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Petr=20P=C3=ADsa=C5=99?= <ppisar@redhat.com>
Date: Fri, 2 Dec 2011 13:11:55 +0100
Subject: [PATCH] Fix caseless match if cases differ in encoding length
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

From:
r778 | ph10 | 2011-12-01 18:38:47 +0100 (Čt, 01 pro 2011) | 3 lines

Fix bug with caseless matching of characters of different lengths when
the shorter is right at the end of the subject.

Petr Pisar: Changelog entry removed.
---
 pcre_exec.c          |   32 ++++++++++++++++----------------
 testdata/testinput6  |   14 ++++++++++++++
 testdata/testoutput6 |   22 ++++++++++++++++++++++
 3 files changed, 52 insertions(+), 16 deletions(-)

diff --git a/pcre_exec.c b/pcre_exec.c
index caf5fc3..2b7c5bd 100644
--- a/pcre_exec.c
+++ b/pcre_exec.c
@@ -432,7 +432,7 @@ returns a negative (error) response, the outer incarnation must also return the
 same response. */
 
 /* These macros pack up tests that are used for partial matching, and which
-appears several times in the code. We set the "hit end" flag if the pointer is
+appear several times in the code. We set the "hit end" flag if the pointer is
 at the end of the subject and also past the start of the subject (i.e.
 something has been matched). For hard partial matching, we then return
 immediately. The second one is used when we already know we are past the end of
@@ -2743,31 +2743,36 @@ for (;;)
       }
     break;
 
-    /* Match a single character, caselessly */
+    /* Match a single character, caselessly. If we are at the end of the 
+    subject, give up immediately. */
 
     case OP_CHARNC:
+    if (eptr >= md->end_subject)
+      {
+      SCHECK_PARTIAL(); 
+      MRRETURN(MATCH_NOMATCH); 
+      }   
+ 
 #ifdef SUPPORT_UTF8
     if (utf8)
       {
       length = 1;
       ecode++;
       GETCHARLEN(fc, ecode, length);
-
-      if (length > md->end_subject - eptr)
-        {
-        CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
-        MRRETURN(MATCH_NOMATCH);
-        }
-
+ 
       /* If the pattern character's value is < 128, we have only one byte, and
-      can use the fast lookup table. */
+      we know that its other case must also be one byte long, so we can use the
+      fast lookup table. We know that there is at least one byte left in the 
+      subject. */
 
       if (fc < 128)
         {
         if (md->lcc[*ecode++] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
         }
 
-      /* Otherwise we must pick up the subject character */
+      /* Otherwise we must pick up the subject character. Note that we cannot
+      use the value of "length" to check for sufficient bytes left, because the
+      other case of the character may have more or fewer bytes.  */
 
       else
         {
@@ -2792,11 +2797,6 @@ for (;;)
 
     /* Non-UTF-8 mode */
       {
-      if (md->end_subject - eptr < 1)
-        {
-        SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
-        MRRETURN(MATCH_NOMATCH);
-        }
       if (md->lcc[ecode[1]] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
       ecode += 2;
       }
diff --git a/testdata/testinput6 b/testdata/testinput6
index 503a5bc..c92140c 100644
--- a/testdata/testinput6
+++ b/testdata/testinput6
@@ -802,4 +802,18 @@
     ** Failers 
     a\xFCb   
 
+/ⱥ/8i
+    ⱥ
+    Ⱥx 
+    Ⱥ 
+
+/[ⱥ]/8i
+    ⱥ
+    Ⱥx 
+    Ⱥ 
+
+/Ⱥ/8i
+    Ⱥ
+    ⱥ
+
 /-- End of testinput6 --/
diff --git a/testdata/testoutput6 b/testdata/testoutput6
index 6a9ec83..0ada170 100644
--- a/testdata/testoutput6
+++ b/testdata/testoutput6
@@ -1353,4 +1353,26 @@ No match
     a\xFCb   
 No match
 
+/ⱥ/8i
+    ⱥ
+ 0: \x{2c65}
+    Ⱥx 
+ 0: \x{23a}
+    Ⱥ 
+ 0: \x{23a}
+
+/[ⱥ]/8i
+    ⱥ
+ 0: \x{2c65}
+    Ⱥx 
+ 0: \x{23a}
+    Ⱥ 
+ 0: \x{23a}
+
+/Ⱥ/8i
+    Ⱥ
+ 0: \x{23a}
+    ⱥ
+ 0: \x{2c65}
+
 /-- End of testinput6 --/
-- 
1.7.7.4