--- gnome-libs-1.4.2/libgnome/gnome-dentry.c.utf8 2002-08-19 13:06:14.000000000 +0200 +++ gnome-libs-1.4.2/libgnome/gnome-dentry.c 2002-08-19 13:15:24.000000000 +0200 @@ -48,6 +48,9 @@ NULL }; +static char *utf8tolocalechar (const char *value); +static gboolean string_looks_utf8 (const char *buf); + /* g_free already checks if x is NULL */ #define free_if_empty(x) g_free (x) @@ -235,22 +238,21 @@ #ifdef __GLIBC__ comment = gnome_config_get_translated_string ("Comment"); - if (is_utf8) { - if (name) { - newitem->name = utf8tolocalechar (name); - g_free (name); - } else - newitem->name = NULL; - if (comment) { - newitem->comment = utf8tolocalechar (comment); - g_free (comment); - } else - newitem->comment = NULL; + if ((name) && (string_looks_utf8 (name))) { + newitem->name = utf8tolocalechar (name); + g_free (name); + } else { + newitem->name = NULL; + } + + if ((comment) && (string_looks_utf8 (comment))) { + newitem->comment = utf8tolocalechar (comment); + g_free (comment); } else { - newitem->name = name; - newitem->comment = comment; + newitem->comment = comment; } + #else newitem->name = name; newitem->comment = gnome_config_get_translated_string ("Comment"); @@ -884,6 +886,7 @@ } #ifdef __GLIBC__ + static gboolean get_contents_regfile (const gchar *filename, struct stat *stat_buf, @@ -1011,6 +1014,75 @@ }; static gboolean +string_looks_utf8 (const char *buf) +{ + int i, n; + unsigned long c; + size_t nbytes; + gboolean gotone = FALSE; + gboolean result = TRUE; + + nbytes = strlen(buf); + + for (i = 0; result && i < nbytes; i++) { + if ((buf[i] & 0x80) == 0) { /* 0xxxxxxx is plain ASCII */ + /* + * Even if the whole file is valid UTF-8 sequences, + * still reject it if it uses weird control characters. + */ + + if (text_chars[((guchar *)buf)[i]] != T) + result = FALSE; + } else if ((buf[i] & 0x40) == 0) { /* 10xxxxxx never 1st byte */ + result = FALSE; + } else { /* 11xxxxxx begins UTF-8 */ + int following; + + if ((buf[i] & 0x20) == 0) { /* 110xxxxx */ + c = buf[i] & 0x1f; + following = 1; + } else if ((buf[i] & 0x10) == 0) { /* 1110xxxx */ + c = buf[i] & 0x0f; + following = 2; + } else if ((buf[i] & 0x08) == 0) { /* 11110xxx */ + c = buf[i] & 0x07; + following = 3; + } else if ((buf[i] & 0x04) == 0) { /* 111110xx */ + c = buf[i] & 0x03; + following = 4; + } else if ((buf[i] & 0x02) == 0) { /* 1111110x */ + c = buf[i] & 0x01; + following = 5; + } else { + result = FALSE; + goto done; + } + + for (n = 0; n < following; n++) { + i++; + if (i >= nbytes) { + result = FALSE; + goto done; + } + + if ((buf[i] & 0x80) == 0 || (buf[i] & 0x40)) { + result = FALSE; + goto done; + } + + c = (c << 6) + (buf[i] & 0x3f); + } + + gotone = TRUE; + } + } +done: + + return result && gotone; /* don't claim it's UTF-8 if it's all 7-bit */ +} + + +static gboolean file_looks_utf8 (const char *filename) { int i, n;