Sophie

Sophie

distrib > * > 2008.0 > x86_64 > by-pkgid > fb1832787a7adf918aad2d840f64675b > files > 20

php-5.2.4-3.5mdv2008.0.src.rpm

http://cvs.php.net/viewvc.cgi/php-src/ext/standard/html.c?r1=1.111.2.2.2.14&r2=1.111.2.2.2.15&view=patch
--- old/ext/standard/html.c	2007/05/27 15:57:11	1.111.2.2.2.14
+++ new/ext/standard/html.c	2007/10/03 04:53:05	1.111.2.2.2.15
@@ -484,18 +484,29 @@
 			}                        \
 			mbseq[mbpos++] = (mbchar); }
 
+#define CHECK_LEN(pos, chars_need)			\
+	if((str_len - (pos)) < chars_need) {	\
+		*status = FAILURE;					\
+		return 0;							\
+	}
+
 /* {{{ get_next_char
  */
 inline static unsigned short get_next_char(enum entity_charset charset,
 		unsigned char * str,
+		int str_len,
 		int * newpos,
 		unsigned char * mbseq,
-		int * mbseqlen)
+		int * mbseqlen, 
+		int *status)
 {
 	int pos = *newpos;
 	int mbpos = 0;
 	int mbspace = *mbseqlen;
 	unsigned short this_char = str[pos++];
+	unsigned char next_char;
+
+	*status = SUCCESS;
 	
 	if (mbspace <= 0) {
 		*mbseqlen = 0;
@@ -517,6 +528,10 @@
 				do {
 					if (this_char < 0x80) {
 						more = 0;
+						if(stat) {
+							/* we didn't finish the UTF sequence correctly */
+							*status = FAILURE;
+						}
 						break;
 					} else if (this_char < 0xc0) {
 						switch (stat) {
@@ -555,6 +570,7 @@
 								break;
 							default:
 								/* invalid */
+								*status = FAILURE;
 								more = 0;
 						}
 					}
@@ -562,21 +578,27 @@
 					else if (this_char < 0xe0) {
 						stat = 0x10;	/* 2 byte */
 						utf = (this_char & 0x1f) << 6;
+						CHECK_LEN(pos, 1);
 					} else if (this_char < 0xf0) {
 						stat = 0x20;	/* 3 byte */
 						utf = (this_char & 0xf) << 12;
+						CHECK_LEN(pos, 2);
 					} else if (this_char < 0xf8) {
 						stat = 0x30;	/* 4 byte */
 						utf = (this_char & 0x7) << 18;
+						CHECK_LEN(pos, 3);
 					} else if (this_char < 0xfc) {
 						stat = 0x40;	/* 5 byte */
 						utf = (this_char & 0x3) << 24;
+						CHECK_LEN(pos, 4);
 					} else if (this_char < 0xfe) {
 						stat = 0x50;	/* 6 byte */
 						utf = (this_char & 0x1) << 30;
+						CHECK_LEN(pos, 5);
 					} else {
 						/* invalid; bail */
 						more = 0;
+						*status = FAILURE;
 						break;
 					}
 
@@ -594,7 +616,8 @@
 				/* check if this is the first of a 2-byte sequence */
 				if (this_char >= 0xa1 && this_char <= 0xfe) {
 					/* peek at the next char */
-					unsigned char next_char = str[pos];
+					CHECK_LEN(pos, 1);
+					next_char = str[pos];
 					if ((next_char >= 0x40 && next_char <= 0x7e) ||
 							(next_char >= 0xa1 && next_char <= 0xfe)) {
 						/* yes, this a wide char */
@@ -614,7 +637,8 @@
 					 (this_char >= 0xe0 && this_char <= 0xef)
 					) {
 					/* peek at the next char */
-					unsigned char next_char = str[pos];
+					CHECK_LEN(pos, 1);
+					next_char = str[pos];
 					if ((next_char >= 0x40 && next_char <= 0x7e) ||
 						(next_char >= 0x80 && next_char <= 0xfc))
 					{
@@ -633,7 +657,8 @@
 				/* check if this is the first of a multi-byte sequence */
 				if (this_char >= 0xa1 && this_char <= 0xfe) {
 					/* peek at the next char */
-					unsigned char next_char = str[pos];
+					CHECK_LEN(pos, 1);
+					next_char = str[pos];
 					if (next_char >= 0xa1 && next_char <= 0xfe) {
 						/* yes, this a jis kanji char */
 						this_char <<= 8;
@@ -644,7 +669,8 @@
 					
 				} else if (this_char == 0x8e) {
 					/* peek at the next char */
-					unsigned char next_char = str[pos];
+					CHECK_LEN(pos, 1);
+					next_char = str[pos];
 					if (next_char >= 0xa1 && next_char <= 0xdf) {
 						/* JIS X 0201 kana */
 						this_char <<= 8;
@@ -655,8 +681,10 @@
 					
 				} else if (this_char == 0x8f) {
 					/* peek at the next two char */
-					unsigned char next_char = str[pos];
-					unsigned char next2_char = str[pos+1];
+					unsigned char next2_char;
+					CHECK_LEN(pos, 2);
+					next_char = str[pos];
+					next2_char = str[pos+1];
 					if ((next_char >= 0xa1 && next_char <= 0xfe) &&
 						(next2_char >= 0xa1 && next2_char <= 0xfe)) {
 						/* JIS X 0212 hojo-kanji */
@@ -1098,13 +1126,22 @@
 		maxlen = 128;
 	replaced = emalloc (maxlen);
 	len = 0;
-
 	i = 0;
 	while (i < oldlen) {
 		unsigned char mbsequence[16];	/* allow up to 15 characters in a multibyte sequence */
 		int mbseqlen = sizeof(mbsequence);
-		unsigned short this_char = get_next_char(charset, old, &i, mbsequence, &mbseqlen);
+		int status = SUCCESS;
+		unsigned short this_char = get_next_char(charset, old, oldlen, &i, mbsequence, &mbseqlen, &status);
 
+		if(status == FAILURE) {
+			/* invalid MB sequence */
+			efree(replaced);
+			if(!PG(display_errors)) {
+				php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid multibyte sequence in argument");
+			}
+			*newlen = 0;
+			return STR_EMPTY_ALLOC();
+		}
 		matches_map = 0;
 
 		if (len + 16 > maxlen)