diff -uNrp icu-53.1.orig/source/i18n/regexcmp.cpp icu-53.1/source/i18n/regexcmp.cpp --- icu-53.1.orig/source/i18n/regexcmp.cpp 2015-01-27 16:01:44.006129489 -0500 +++ icu-53.1/source/i18n/regexcmp.cpp 2015-01-27 15:59:24.375802044 -0500 @@ -2133,6 +2133,10 @@ void RegexCompile::handleCloseParen() { int32_t patEnd = fRXPat->fCompiledPat->size() - 1; int32_t minML = minMatchLength(fMatchOpenParen, patEnd); int32_t maxML = maxMatchLength(fMatchOpenParen, patEnd); + if (URX_TYPE(maxML) != 0) { + error(U_REGEX_LOOK_BEHIND_LIMIT); + break; + } if (maxML == INT32_MAX) { error(U_REGEX_LOOK_BEHIND_LIMIT); break; @@ -2166,6 +2170,10 @@ void RegexCompile::handleCloseParen() { int32_t patEnd = fRXPat->fCompiledPat->size() - 1; int32_t minML = minMatchLength(fMatchOpenParen, patEnd); int32_t maxML = maxMatchLength(fMatchOpenParen, patEnd); + if (URX_TYPE(maxML) != 0) { + error(U_REGEX_LOOK_BEHIND_LIMIT); + break; + } if (maxML == INT32_MAX) { error(U_REGEX_LOOK_BEHIND_LIMIT); break; @@ -2329,7 +2337,15 @@ UBool RegexCompile::compileInlineInterva int32_t topOfBlock = blockTopLoc(FALSE); if (fIntervalUpper == 0) { // Pathological case. Attempt no matches, as if the block doesn't exist. + // Discard the generated code for the block. + // If the block included parens, discard the info pertaining to them as well. fRXPat->fCompiledPat->setSize(topOfBlock); + if (fMatchOpenParen >= topOfBlock) { + fMatchOpenParen = -1; + } + if (fMatchCloseParen >= topOfBlock) { + fMatchCloseParen = -1; + } return TRUE; } diff -uNrp icu-53.1.orig/source/i18n/regexcmp.h icu-53.1/source/i18n/regexcmp.h --- icu-53.1.orig/source/i18n/regexcmp.h 2015-01-27 16:01:38.186104393 -0500 +++ icu-53.1/source/i18n/regexcmp.h 2015-01-27 15:59:24.375802044 -0500 @@ -182,7 +182,9 @@ private: int32_t fMatchOpenParen; // The position in the compiled pattern // of the slot reserved for a state save // at the start of the most recently processed - // parenthesized block. + // parenthesized block. Updated when processing + // a close to the location for the corresponding open. + int32_t fMatchCloseParen; // The position in the pattern of the first // location after the most recently processed // parenthesized block. diff -uNrp icu-53.1.orig/source/test/testdata/regextst.txt icu-53.1/source/test/testdata/regextst.txt --- icu-53.1.orig/source/test/testdata/regextst.txt 2015-01-27 16:01:44.016129532 -0500 +++ icu-53.1/source/test/testdata/regextst.txt 2015-01-27 16:05:55.187199411 -0500 @@ -1172,6 +1172,23 @@ "(?<=a{1,})bc" E "aaaa<0>bc</0>def" # U_REGEX_LOOK_BEHIND_LIMIT error. "(?<=(?:){11})bc" "<0>bc</0>" # Empty (?:) expression. +# Bug 11369 +# Incorrect optimization of patterns with a zero length quantifier {0} + +"(.|b)(|b){0}\$(?#xxx){3}(?>\D*)" "AAAAABBBBBCCCCCDDDDEEEEE" +"(|b)ab(c)" "<0><1></1>ab<2>c</2></0>" +"(|b){0}a{3}(D*)" "<0>aaa<2></2></0>" +"(|b){0,1}a{3}(D*)" "<0><1></1>aaa<2></2></0>" +"((|b){0})a{3}(D*)" "<0><1></1>aaa<3></3></0>" + +# Bug 11370 +# Max match length computation of look-behind expression gives result that is too big to fit in the +# in the 24 bit operand portion of the compiled code. Expressions should fail to compile +# (Look-behind match length must be bounded. This case is treated as unbounded, an error.) + +"(?<!(0123456789a){10000000})x" E "no match" +"(?<!\\ubeaf(\\ubeaf{11000}){11000})" E "no match" + # Random debugging, Temporary # @@ -1200,6 +1217,7 @@ "^([a-zA-Z0-9_\-\.]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?)$" G "<0>foo12@foo.edu</0>" "^([a-zA-Z0-9_\-\.]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?)$" G "<0>bob.smith@foo.tv</0>" "^([a-zA-Z0-9_\-\.]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?)$" "joe" + "^([a-zA-Z0-9_\-\.]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?)$" "@foo.com" "^([a-zA-Z0-9_\-\.]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?)$" "a@a" "^\d{1,2}\/\d{1,2}\/\d{4}$" G "<0>4/1/2001</0>"